main:新增并发控制功能
变更内容: - 增加 `max_concurrent_jobs` 配置项,支持设置最大并发任务数。 - 为 `JobManager` 添加信号量控制实现任务并发限制。 - 新增获取任务并发状态的接口 `/jobs/concurrency/status`。 - 编写并发控制功能相关的测试用
This commit is contained in:
@@ -27,6 +27,8 @@ class JobManager:
|
||||
self._redis_client: Optional[aioredis.Redis] = None
|
||||
self._algorithm_registry: Dict[str, Type[BaseAlgorithm]] = {}
|
||||
self._http_client: Optional[httpx.AsyncClient] = None
|
||||
self._semaphore: Optional[asyncio.Semaphore] = None
|
||||
self._max_concurrent_jobs: int = 0
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""初始化 Redis 连接和 HTTP 客户端"""
|
||||
@@ -51,6 +53,11 @@ class JobManager:
|
||||
# 初始化 HTTP 客户端
|
||||
self._http_client = httpx.AsyncClient(timeout=settings.webhook_timeout)
|
||||
|
||||
# 初始化并发控制信号量
|
||||
self._max_concurrent_jobs = settings.max_concurrent_jobs
|
||||
self._semaphore = asyncio.Semaphore(self._max_concurrent_jobs)
|
||||
logger.info(f"任务并发限制已设置: {self._max_concurrent_jobs}")
|
||||
|
||||
# 注册算法
|
||||
self._register_algorithms()
|
||||
|
||||
@@ -203,6 +210,10 @@ class JobManager:
|
||||
logger.error(f"Redis 不可用,无法执行任务: {job_id}")
|
||||
return
|
||||
|
||||
if not self._semaphore:
|
||||
logger.error(f"并发控制未初始化,无法执行任务: {job_id}")
|
||||
return
|
||||
|
||||
key = f"job:{job_id}"
|
||||
job_data = await self._redis_client.hgetall(key)
|
||||
|
||||
@@ -219,74 +230,76 @@ class JobManager:
|
||||
except json.JSONDecodeError:
|
||||
params = {}
|
||||
|
||||
# 更新状态为 running
|
||||
started_at = self._get_timestamp()
|
||||
await self._redis_client.hset(key, mapping={"status": "running", "started_at": started_at})
|
||||
# 使用信号量控制并发
|
||||
async with self._semaphore:
|
||||
# 更新状态为 running
|
||||
started_at = self._get_timestamp()
|
||||
await self._redis_client.hset(key, mapping={"status": "running", "started_at": started_at})
|
||||
|
||||
logger.info(f"开始执行任务: job_id={job_id}, algorithm={algorithm_name}")
|
||||
logger.info(f"开始执行任务: job_id={job_id}, algorithm={algorithm_name}")
|
||||
|
||||
import time
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
status = "completed"
|
||||
result_data = None
|
||||
error_msg = None
|
||||
metadata = None
|
||||
start_time = time.time()
|
||||
status = "completed"
|
||||
result_data = None
|
||||
error_msg = None
|
||||
metadata = None
|
||||
|
||||
try:
|
||||
# 获取算法类并执行
|
||||
algorithm_cls = self._algorithm_registry.get(algorithm_name)
|
||||
if not algorithm_cls:
|
||||
raise ValueError(f"算法 '{algorithm_name}' 不存在")
|
||||
try:
|
||||
# 获取算法类并执行
|
||||
algorithm_cls = self._algorithm_registry.get(algorithm_name)
|
||||
if not algorithm_cls:
|
||||
raise ValueError(f"算法 '{algorithm_name}' 不存在")
|
||||
|
||||
algorithm = algorithm_cls()
|
||||
algorithm = algorithm_cls()
|
||||
|
||||
# 根据算法类型传递参数
|
||||
if algorithm_name == "PrimeChecker":
|
||||
execution_result = algorithm.execute(params.get("number", 0))
|
||||
else:
|
||||
# 通用参数传递
|
||||
execution_result = algorithm.execute(**params)
|
||||
# 根据算法类型传递参数
|
||||
if algorithm_name == "PrimeChecker":
|
||||
execution_result = algorithm.execute(params.get("number", 0))
|
||||
else:
|
||||
# 通用参数传递
|
||||
execution_result = algorithm.execute(**params)
|
||||
|
||||
if execution_result.get("success"):
|
||||
result_data = execution_result.get("result", {})
|
||||
metadata = execution_result.get("metadata", {})
|
||||
else:
|
||||
if execution_result.get("success"):
|
||||
result_data = execution_result.get("result", {})
|
||||
metadata = execution_result.get("metadata", {})
|
||||
else:
|
||||
status = "failed"
|
||||
error_msg = execution_result.get("error", "算法执行失败")
|
||||
metadata = execution_result.get("metadata", {})
|
||||
|
||||
except Exception as e:
|
||||
status = "failed"
|
||||
error_msg = execution_result.get("error", "算法执行失败")
|
||||
metadata = execution_result.get("metadata", {})
|
||||
error_msg = str(e)
|
||||
logger.error(f"任务执行失败: job_id={job_id}, error={e}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
status = "failed"
|
||||
error_msg = str(e)
|
||||
logger.error(f"任务执行失败: job_id={job_id}, error={e}", exc_info=True)
|
||||
# 计算执行时间
|
||||
elapsed_time = time.time() - start_time
|
||||
completed_at = self._get_timestamp()
|
||||
|
||||
# 计算执行时间
|
||||
elapsed_time = time.time() - start_time
|
||||
completed_at = self._get_timestamp()
|
||||
# 更新任务状态
|
||||
update_data = {
|
||||
"status": status,
|
||||
"completed_at": completed_at,
|
||||
"result": json.dumps(result_data) if result_data else "",
|
||||
"error": error_msg or "",
|
||||
"metadata": json.dumps(metadata) if metadata else "",
|
||||
}
|
||||
await self._redis_client.hset(key, mapping=update_data)
|
||||
|
||||
# 更新任务状态
|
||||
update_data = {
|
||||
"status": status,
|
||||
"completed_at": completed_at,
|
||||
"result": json.dumps(result_data) if result_data else "",
|
||||
"error": error_msg or "",
|
||||
"metadata": json.dumps(metadata) if metadata else "",
|
||||
}
|
||||
await self._redis_client.hset(key, mapping=update_data)
|
||||
# 设置 TTL
|
||||
await self._redis_client.expire(key, settings.job_result_ttl)
|
||||
|
||||
# 设置 TTL
|
||||
await self._redis_client.expire(key, settings.job_result_ttl)
|
||||
# 记录指标
|
||||
incr("jobs_completed_total", {"algorithm": algorithm_name, "status": status})
|
||||
observe("job_execution_duration_seconds", {"algorithm": algorithm_name}, elapsed_time)
|
||||
|
||||
# 记录指标
|
||||
incr("jobs_completed_total", {"algorithm": algorithm_name, "status": status})
|
||||
observe("job_execution_duration_seconds", {"algorithm": algorithm_name}, elapsed_time)
|
||||
logger.info(f"任务执行完成: job_id={job_id}, status={status}, elapsed={elapsed_time:.3f}s")
|
||||
|
||||
logger.info(f"任务执行完成: job_id={job_id}, status={status}, elapsed={elapsed_time:.3f}s")
|
||||
|
||||
# 发送 Webhook 回调
|
||||
if webhook_url:
|
||||
await self._send_webhook(job_id, webhook_url)
|
||||
# 发送 Webhook 回调
|
||||
if webhook_url:
|
||||
await self._send_webhook(job_id, webhook_url)
|
||||
|
||||
async def _send_webhook(self, job_id: str, webhook_url: str) -> None:
|
||||
"""发送 Webhook 回调(带重试)
|
||||
@@ -359,6 +372,32 @@ class JobManager:
|
||||
"""检查任务管理器是否可用"""
|
||||
return self._redis_client is not None
|
||||
|
||||
def get_concurrency_status(self) -> Dict[str, int]:
|
||||
"""获取并发状态
|
||||
|
||||
Returns:
|
||||
Dict[str, int]: 包含以下键的字典
|
||||
- max_concurrent: 最大并发数
|
||||
- available_slots: 可用槽位数
|
||||
- running_jobs: 当前运行中的任务数
|
||||
"""
|
||||
if not self._semaphore:
|
||||
return {
|
||||
"max_concurrent": 0,
|
||||
"available_slots": 0,
|
||||
"running_jobs": 0,
|
||||
}
|
||||
|
||||
max_concurrent = self._max_concurrent_jobs
|
||||
available_slots = self._semaphore._value
|
||||
running_jobs = max_concurrent - available_slots
|
||||
|
||||
return {
|
||||
"max_concurrent": max_concurrent,
|
||||
"available_slots": available_slots,
|
||||
"running_jobs": running_jobs,
|
||||
}
|
||||
|
||||
|
||||
# 全局单例
|
||||
_job_manager: Optional[JobManager] = None
|
||||
|
||||
Reference in New Issue
Block a user