main:支持 Worker 模式运行并优化任务管理
变更内容: - 在 `Dockerfile` 和 `docker-compose.yml` 中添加 Worker 模式支持,包含运行模式 `RUN_MODE` 的配置。 - 更新 API 路由,改为将任务入队处理,并由 Worker 执行。 - 在 JobManager 中新增任务队列及分布式锁功能,支持任务的入队、出队、执行控制以及重试机制。 - 添加全局并发控制逻辑,避免任务超额运行。 - 扩展单元测试,覆盖任务队列、锁机制和并发控制的各类场景。 - 在 Serverless 配置中分别为 API 和 Worker 添加独立服务定义。 提升任务调度灵活性,增强系统可靠性与扩展性。
This commit is contained in:
@@ -372,6 +372,195 @@ class JobManager:
|
||||
"""检查任务管理器是否可用"""
|
||||
return self._redis_client is not None
|
||||
|
||||
async def enqueue_job(self, job_id: str) -> bool:
|
||||
"""将任务加入队列
|
||||
|
||||
Args:
|
||||
job_id: 任务 ID
|
||||
|
||||
Returns:
|
||||
bool: 是否成功入队
|
||||
"""
|
||||
if not self._redis_client:
|
||||
logger.error(f"Redis 不可用,无法入队任务: {job_id}")
|
||||
return False
|
||||
|
||||
try:
|
||||
await self._redis_client.lpush(settings.job_queue_key, job_id)
|
||||
logger.info(f"任务已入队: job_id={job_id}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"任务入队失败: job_id={job_id}, error={e}")
|
||||
return False
|
||||
|
||||
async def dequeue_job(self, timeout: int = 5) -> Optional[str]:
|
||||
"""从队列获取任务(阻塞式)
|
||||
|
||||
Args:
|
||||
timeout: 阻塞超时时间(秒)
|
||||
|
||||
Returns:
|
||||
Optional[str]: 任务 ID,超时返回 None
|
||||
"""
|
||||
if not self._redis_client:
|
||||
return None
|
||||
|
||||
try:
|
||||
result = await self._redis_client.brpop(settings.job_queue_key, timeout=timeout)
|
||||
if result:
|
||||
# brpop 返回 (key, value) 元组
|
||||
return result[1]
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"任务出队失败: error={e}")
|
||||
return None
|
||||
|
||||
async def acquire_job_lock(self, job_id: str) -> bool:
|
||||
"""获取任务执行锁(分布式锁)
|
||||
|
||||
Args:
|
||||
job_id: 任务 ID
|
||||
|
||||
Returns:
|
||||
bool: 是否成功获取锁
|
||||
"""
|
||||
if not self._redis_client:
|
||||
return False
|
||||
|
||||
lock_key = f"job:lock:{job_id}"
|
||||
try:
|
||||
acquired = await self._redis_client.set(
|
||||
lock_key, "locked", nx=True, ex=settings.job_lock_ttl
|
||||
)
|
||||
if acquired:
|
||||
logger.debug(f"获取任务锁成功: job_id={job_id}")
|
||||
return acquired is not None
|
||||
except Exception as e:
|
||||
logger.error(f"获取任务锁失败: job_id={job_id}, error={e}")
|
||||
return False
|
||||
|
||||
async def release_job_lock(self, job_id: str) -> bool:
|
||||
"""释放任务执行锁
|
||||
|
||||
Args:
|
||||
job_id: 任务 ID
|
||||
|
||||
Returns:
|
||||
bool: 是否成功释放锁
|
||||
"""
|
||||
if not self._redis_client:
|
||||
return False
|
||||
|
||||
lock_key = f"job:lock:{job_id}"
|
||||
try:
|
||||
await self._redis_client.delete(lock_key)
|
||||
logger.debug(f"释放任务锁成功: job_id={job_id}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"释放任务锁失败: job_id={job_id}, error={e}")
|
||||
return False
|
||||
|
||||
async def increment_concurrency(self) -> int:
|
||||
"""增加全局并发计数
|
||||
|
||||
Returns:
|
||||
int: 增加后的并发数
|
||||
"""
|
||||
if not self._redis_client:
|
||||
return 0
|
||||
|
||||
try:
|
||||
count = await self._redis_client.incr(settings.job_concurrency_key)
|
||||
return count
|
||||
except Exception as e:
|
||||
logger.error(f"增加并发计数失败: error={e}")
|
||||
return 0
|
||||
|
||||
async def decrement_concurrency(self) -> int:
|
||||
"""减少全局并发计数
|
||||
|
||||
Returns:
|
||||
int: 减少后的并发数
|
||||
"""
|
||||
if not self._redis_client:
|
||||
return 0
|
||||
|
||||
try:
|
||||
count = await self._redis_client.decr(settings.job_concurrency_key)
|
||||
# 防止计数变为负数
|
||||
if count < 0:
|
||||
await self._redis_client.set(settings.job_concurrency_key, 0)
|
||||
return 0
|
||||
return count
|
||||
except Exception as e:
|
||||
logger.error(f"减少并发计数失败: error={e}")
|
||||
return 0
|
||||
|
||||
async def get_global_concurrency(self) -> int:
|
||||
"""获取当前全局并发数
|
||||
|
||||
Returns:
|
||||
int: 当前并发数
|
||||
"""
|
||||
if not self._redis_client:
|
||||
return 0
|
||||
|
||||
try:
|
||||
count = await self._redis_client.get(settings.job_concurrency_key)
|
||||
return int(count) if count else 0
|
||||
except Exception as e:
|
||||
logger.error(f"获取并发计数失败: error={e}")
|
||||
return 0
|
||||
|
||||
async def can_execute(self) -> bool:
|
||||
"""检查是否可以执行新任务(全局并发控制)
|
||||
|
||||
Returns:
|
||||
bool: 是否可以执行
|
||||
"""
|
||||
current = await self.get_global_concurrency()
|
||||
return current < settings.max_concurrent_jobs
|
||||
|
||||
async def get_job_retry_count(self, job_id: str) -> int:
|
||||
"""获取任务重试次数
|
||||
|
||||
Args:
|
||||
job_id: 任务 ID
|
||||
|
||||
Returns:
|
||||
int: 重试次数
|
||||
"""
|
||||
if not self._redis_client:
|
||||
return 0
|
||||
|
||||
key = f"job:{job_id}"
|
||||
try:
|
||||
retry_count = await self._redis_client.hget(key, "retry_count")
|
||||
return int(retry_count) if retry_count else 0
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
async def increment_job_retry(self, job_id: str) -> int:
|
||||
"""增加任务重试次数
|
||||
|
||||
Args:
|
||||
job_id: 任务 ID
|
||||
|
||||
Returns:
|
||||
int: 增加后的重试次数
|
||||
"""
|
||||
if not self._redis_client:
|
||||
return 0
|
||||
|
||||
key = f"job:{job_id}"
|
||||
try:
|
||||
await self._redis_client.hincrby(key, "retry_count", 1)
|
||||
retry_count = await self._redis_client.hget(key, "retry_count")
|
||||
return int(retry_count) if retry_count else 1
|
||||
except Exception as e:
|
||||
logger.error(f"增加重试次数失败: job_id={job_id}, error={e}")
|
||||
return 0
|
||||
|
||||
def get_concurrency_status(self) -> Dict[str, int]:
|
||||
"""获取并发状态
|
||||
|
||||
|
||||
Reference in New Issue
Block a user