main:新增并发控制功能

变更内容:
- 增加 `max_concurrent_jobs` 配置项,支持设置最大并发任务数。
- 为 `JobManager` 添加信号量控制实现任务并发限制。
- 新增获取任务并发状态的接口 `/jobs/concurrency/status`。
- 编写并发控制功能相关的测试用
This commit is contained in:
2026-02-02 17:11:52 +08:00
parent 57b276d038
commit 87ed8c071c
5 changed files with 265 additions and 54 deletions

View File

@@ -27,6 +27,8 @@ class JobManager:
self._redis_client: Optional[aioredis.Redis] = None
self._algorithm_registry: Dict[str, Type[BaseAlgorithm]] = {}
self._http_client: Optional[httpx.AsyncClient] = None
self._semaphore: Optional[asyncio.Semaphore] = None
self._max_concurrent_jobs: int = 0
async def initialize(self) -> None:
"""初始化 Redis 连接和 HTTP 客户端"""
@@ -51,6 +53,11 @@ class JobManager:
# 初始化 HTTP 客户端
self._http_client = httpx.AsyncClient(timeout=settings.webhook_timeout)
# 初始化并发控制信号量
self._max_concurrent_jobs = settings.max_concurrent_jobs
self._semaphore = asyncio.Semaphore(self._max_concurrent_jobs)
logger.info(f"任务并发限制已设置: {self._max_concurrent_jobs}")
# 注册算法
self._register_algorithms()
@@ -203,6 +210,10 @@ class JobManager:
logger.error(f"Redis 不可用,无法执行任务: {job_id}")
return
if not self._semaphore:
logger.error(f"并发控制未初始化,无法执行任务: {job_id}")
return
key = f"job:{job_id}"
job_data = await self._redis_client.hgetall(key)
@@ -219,74 +230,76 @@ class JobManager:
except json.JSONDecodeError:
params = {}
# 更新状态为 running
started_at = self._get_timestamp()
await self._redis_client.hset(key, mapping={"status": "running", "started_at": started_at})
# 使用信号量控制并发
async with self._semaphore:
# 更新状态为 running
started_at = self._get_timestamp()
await self._redis_client.hset(key, mapping={"status": "running", "started_at": started_at})
logger.info(f"开始执行任务: job_id={job_id}, algorithm={algorithm_name}")
logger.info(f"开始执行任务: job_id={job_id}, algorithm={algorithm_name}")
import time
import time
start_time = time.time()
status = "completed"
result_data = None
error_msg = None
metadata = None
start_time = time.time()
status = "completed"
result_data = None
error_msg = None
metadata = None
try:
# 获取算法类并执行
algorithm_cls = self._algorithm_registry.get(algorithm_name)
if not algorithm_cls:
raise ValueError(f"算法 '{algorithm_name}' 不存在")
try:
# 获取算法类并执行
algorithm_cls = self._algorithm_registry.get(algorithm_name)
if not algorithm_cls:
raise ValueError(f"算法 '{algorithm_name}' 不存在")
algorithm = algorithm_cls()
algorithm = algorithm_cls()
# 根据算法类型传递参数
if algorithm_name == "PrimeChecker":
execution_result = algorithm.execute(params.get("number", 0))
else:
# 通用参数传递
execution_result = algorithm.execute(**params)
# 根据算法类型传递参数
if algorithm_name == "PrimeChecker":
execution_result = algorithm.execute(params.get("number", 0))
else:
# 通用参数传递
execution_result = algorithm.execute(**params)
if execution_result.get("success"):
result_data = execution_result.get("result", {})
metadata = execution_result.get("metadata", {})
else:
if execution_result.get("success"):
result_data = execution_result.get("result", {})
metadata = execution_result.get("metadata", {})
else:
status = "failed"
error_msg = execution_result.get("error", "算法执行失败")
metadata = execution_result.get("metadata", {})
except Exception as e:
status = "failed"
error_msg = execution_result.get("error", "算法执行失败")
metadata = execution_result.get("metadata", {})
error_msg = str(e)
logger.error(f"任务执行失败: job_id={job_id}, error={e}", exc_info=True)
except Exception as e:
status = "failed"
error_msg = str(e)
logger.error(f"任务执行失败: job_id={job_id}, error={e}", exc_info=True)
# 计算执行时间
elapsed_time = time.time() - start_time
completed_at = self._get_timestamp()
# 计算执行时间
elapsed_time = time.time() - start_time
completed_at = self._get_timestamp()
# 更新任务状态
update_data = {
"status": status,
"completed_at": completed_at,
"result": json.dumps(result_data) if result_data else "",
"error": error_msg or "",
"metadata": json.dumps(metadata) if metadata else "",
}
await self._redis_client.hset(key, mapping=update_data)
# 更新任务状态
update_data = {
"status": status,
"completed_at": completed_at,
"result": json.dumps(result_data) if result_data else "",
"error": error_msg or "",
"metadata": json.dumps(metadata) if metadata else "",
}
await self._redis_client.hset(key, mapping=update_data)
# 设置 TTL
await self._redis_client.expire(key, settings.job_result_ttl)
# 设置 TTL
await self._redis_client.expire(key, settings.job_result_ttl)
# 记录指标
incr("jobs_completed_total", {"algorithm": algorithm_name, "status": status})
observe("job_execution_duration_seconds", {"algorithm": algorithm_name}, elapsed_time)
# 记录指标
incr("jobs_completed_total", {"algorithm": algorithm_name, "status": status})
observe("job_execution_duration_seconds", {"algorithm": algorithm_name}, elapsed_time)
logger.info(f"任务执行完成: job_id={job_id}, status={status}, elapsed={elapsed_time:.3f}s")
logger.info(f"任务执行完成: job_id={job_id}, status={status}, elapsed={elapsed_time:.3f}s")
# 发送 Webhook 回调
if webhook_url:
await self._send_webhook(job_id, webhook_url)
# 发送 Webhook 回调
if webhook_url:
await self._send_webhook(job_id, webhook_url)
async def _send_webhook(self, job_id: str, webhook_url: str) -> None:
"""发送 Webhook 回调(带重试)
@@ -359,6 +372,32 @@ class JobManager:
"""检查任务管理器是否可用"""
return self._redis_client is not None
def get_concurrency_status(self) -> Dict[str, int]:
"""获取并发状态
Returns:
Dict[str, int]: 包含以下键的字典
- max_concurrent: 最大并发数
- available_slots: 可用槽位数
- running_jobs: 当前运行中的任务数
"""
if not self._semaphore:
return {
"max_concurrent": 0,
"available_slots": 0,
"running_jobs": 0,
}
max_concurrent = self._max_concurrent_jobs
available_slots = self._semaphore._value
running_jobs = max_concurrent - available_slots
return {
"max_concurrent": max_concurrent,
"available_slots": available_slots,
"running_jobs": running_jobs,
}
# 全局单例
_job_manager: Optional[JobManager] = None