main:优化任务管理及队列监控性能

变更内容:
- 优化任务出队逻辑,采用 BLMOVE 提升队列操作的原子性和可靠性。
- 在 JobManager 中新增任务锁续租、超时任务回收、ACK/NACK 状态管理功能。
- 实现任务队列和死信队列监控指标收集,为系统性能分析提供数据支持。
- 扩展 Worker 模块,增加锁续租逻辑及任务回收调度。
- 更新测试用例,覆盖任务管理和队列指标的新增逻辑。
- 补充 metrics.yaml 文件,添加队列相关的监控指标定义。
- 更新依赖,补充 Redis 支持及相关库版本规范。
This commit is contained in:
2026-02-03 18:18:02 +08:00
parent c7626723a3
commit 7c8b96927d
8 changed files with 1318 additions and 46 deletions

View File

@@ -536,17 +536,19 @@ class TestJobQueue:
@pytest.mark.asyncio
async def test_dequeue_job(self):
"""测试任务出队"""
"""测试任务出队(使用 BLMOVE"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.brpop = AsyncMock(return_value=("job:queue", "test-job-id"))
mock_redis.blmove = AsyncMock(return_value="test-job-id")
mock_redis.zadd = AsyncMock()
manager._redis_client = mock_redis
result = await manager.dequeue_job(timeout=5)
assert result == "test-job-id"
mock_redis.brpop.assert_called_once()
mock_redis.blmove.assert_called_once()
mock_redis.zadd.assert_called_once()
@pytest.mark.asyncio
async def test_dequeue_job_timeout(self):
@@ -554,7 +556,7 @@ class TestJobQueue:
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.brpop = AsyncMock(return_value=None)
mock_redis.blmove = AsyncMock(return_value=None)
manager._redis_client = mock_redis
result = await manager.dequeue_job(timeout=1)
@@ -585,7 +587,8 @@ class TestDistributedLock:
result = await manager.acquire_job_lock("test-job-id")
assert result is True
assert result is not None # 返回 token
assert len(result) == 32 # 16 字节的十六进制字符串
mock_redis.set.assert_called_once()
call_args = mock_redis.set.call_args
assert call_args[0][0] == "job:lock:test-job-id"
@@ -603,7 +606,7 @@ class TestDistributedLock:
result = await manager.acquire_job_lock("test-job-id")
assert result is False
assert result is None
@pytest.mark.asyncio
async def test_release_job_lock(self):
@@ -611,20 +614,20 @@ class TestDistributedLock:
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.delete = AsyncMock(return_value=1)
mock_redis.eval = AsyncMock(return_value=1)
manager._redis_client = mock_redis
result = await manager.release_job_lock("test-job-id")
result = await manager.release_job_lock("test-job-id", "valid-token")
assert result is True
mock_redis.delete.assert_called_once_with("job:lock:test-job-id")
mock_redis.eval.assert_called_once()
@pytest.mark.asyncio
async def test_release_job_lock_without_redis(self):
"""测试 Redis 不可用时释放锁"""
manager = JobManager()
result = await manager.release_job_lock("test-job-id")
result = await manager.release_job_lock("test-job-id", "token")
assert result is False
@@ -778,3 +781,390 @@ class TestJobRetry:
assert result == 3
mock_redis.hincrby.assert_called_once_with("job:test-job-id", "retry_count", 1)
class TestTransferDequeue:
"""测试转移式出队功能"""
@pytest.mark.asyncio
async def test_dequeue_job_with_blmove(self):
"""测试使用 BLMOVE 转移式出队"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.blmove = AsyncMock(return_value="test-job-id")
mock_redis.zadd = AsyncMock()
manager._redis_client = mock_redis
result = await manager.dequeue_job(timeout=5)
assert result == "test-job-id"
mock_redis.blmove.assert_called_once()
mock_redis.zadd.assert_called_once()
@pytest.mark.asyncio
async def test_dequeue_job_timeout(self):
"""测试出队超时"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.blmove = AsyncMock(return_value=None)
manager._redis_client = mock_redis
result = await manager.dequeue_job(timeout=1)
assert result is None
mock_redis.zadd.assert_not_called()
class TestTokenBasedLock:
"""测试带 Token 的安全锁"""
@pytest.mark.asyncio
async def test_acquire_job_lock_returns_token(self):
"""测试获取锁返回 token"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.set = AsyncMock(return_value=True)
manager._redis_client = mock_redis
result = await manager.acquire_job_lock("test-job-id")
assert result is not None
assert len(result) == 32 # 16 字节的十六进制字符串
mock_redis.set.assert_called_once()
call_args = mock_redis.set.call_args
assert call_args[0][0] == "job:lock:test-job-id"
assert call_args[1]["nx"] is True
@pytest.mark.asyncio
async def test_acquire_job_lock_already_locked(self):
"""测试获取已被锁定的任务锁"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.set = AsyncMock(return_value=None)
manager._redis_client = mock_redis
result = await manager.acquire_job_lock("test-job-id")
assert result is None
@pytest.mark.asyncio
async def test_release_job_lock_with_token(self):
"""测试使用 token 释放锁"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.eval = AsyncMock(return_value=1)
manager._redis_client = mock_redis
result = await manager.release_job_lock("test-job-id", "valid-token")
assert result is True
mock_redis.eval.assert_called_once()
@pytest.mark.asyncio
async def test_release_job_lock_invalid_token(self):
"""测试使用无效 token 释放锁"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.eval = AsyncMock(return_value=0)
manager._redis_client = mock_redis
result = await manager.release_job_lock("test-job-id", "invalid-token")
assert result is False
@pytest.mark.asyncio
async def test_release_job_lock_without_token(self):
"""测试不使用 token 释放锁(向后兼容)"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.delete = AsyncMock()
manager._redis_client = mock_redis
result = await manager.release_job_lock("test-job-id")
assert result is True
mock_redis.delete.assert_called_once_with("job:lock:test-job-id")
class TestAckNack:
"""测试 ACK/NACK 机制"""
@pytest.mark.asyncio
async def test_ack_job(self):
"""测试确认任务完成"""
manager = JobManager()
mock_pipe = AsyncMock()
mock_pipe.lrem = MagicMock()
mock_pipe.zrem = MagicMock()
mock_pipe.execute = AsyncMock()
mock_pipe.__aenter__ = AsyncMock(return_value=mock_pipe)
mock_pipe.__aexit__ = AsyncMock()
mock_redis = AsyncMock()
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
manager._redis_client = mock_redis
result = await manager.ack_job("test-job-id")
assert result is True
mock_pipe.lrem.assert_called_once()
mock_pipe.zrem.assert_called_once()
@pytest.mark.asyncio
async def test_nack_job_requeue(self):
"""测试拒绝任务并重新入队"""
manager = JobManager()
mock_pipe = AsyncMock()
mock_pipe.lrem = MagicMock()
mock_pipe.zrem = MagicMock()
mock_pipe.lpush = MagicMock()
mock_pipe.execute = AsyncMock()
mock_pipe.__aenter__ = AsyncMock(return_value=mock_pipe)
mock_pipe.__aexit__ = AsyncMock()
mock_redis = AsyncMock()
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
mock_redis.hget = AsyncMock(return_value="0") # retry_count = 0
manager._redis_client = mock_redis
result = await manager.nack_job("test-job-id", requeue=True)
assert result is True
assert mock_pipe.lpush.call_count == 1
@pytest.mark.asyncio
async def test_nack_job_to_dlq(self):
"""测试拒绝任务进入死信队列"""
manager = JobManager()
mock_pipe = AsyncMock()
mock_pipe.lrem = MagicMock()
mock_pipe.zrem = MagicMock()
mock_pipe.lpush = MagicMock()
mock_pipe.execute = AsyncMock()
mock_pipe.__aenter__ = AsyncMock(return_value=mock_pipe)
mock_pipe.__aexit__ = AsyncMock()
mock_redis = AsyncMock()
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
mock_redis.hget = AsyncMock(return_value="5") # retry_count > max_retries
manager._redis_client = mock_redis
with patch("functional_scaffold.core.job_manager.settings") as mock_settings:
mock_settings.job_max_retries = 3
mock_settings.job_processing_key = "job:processing"
mock_settings.job_processing_ts_key = "job:processing:ts"
mock_settings.job_dlq_key = "job:dlq"
mock_settings.job_queue_key = "job:queue"
result = await manager.nack_job("test-job-id", requeue=True)
assert result is True
class TestLockRenewal:
"""测试锁续租功能"""
@pytest.mark.asyncio
async def test_renew_job_lock_success(self):
"""测试锁续租成功"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.eval = AsyncMock(return_value=1)
manager._redis_client = mock_redis
result = await manager.renew_job_lock("test-job-id", "valid-token")
assert result is True
mock_redis.eval.assert_called_once()
@pytest.mark.asyncio
async def test_renew_job_lock_invalid_token(self):
"""测试锁续租失败token 不匹配)"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.eval = AsyncMock(return_value=0)
manager._redis_client = mock_redis
result = await manager.renew_job_lock("test-job-id", "invalid-token")
assert result is False
@pytest.mark.asyncio
async def test_renew_job_lock_without_redis(self):
"""测试 Redis 不可用时续租"""
manager = JobManager()
result = await manager.renew_job_lock("test-job-id", "token")
assert result is False
class TestStaleJobRecovery:
"""测试超时任务回收功能"""
@pytest.mark.asyncio
async def test_recover_stale_jobs_empty(self):
"""测试没有超时任务时的回收"""
manager = JobManager()
mock_redis = AsyncMock()
mock_redis.zrangebyscore = AsyncMock(return_value=[])
manager._redis_client = mock_redis
result = await manager.recover_stale_jobs()
assert result == 0
@pytest.mark.asyncio
async def test_recover_stale_jobs_requeue(self):
"""测试回收超时任务并重新入队"""
manager = JobManager()
mock_pipe = AsyncMock()
mock_pipe.lrem = MagicMock()
mock_pipe.zrem = MagicMock()
mock_pipe.lpush = MagicMock()
mock_pipe.execute = AsyncMock()
mock_pipe.__aenter__ = AsyncMock(return_value=mock_pipe)
mock_pipe.__aexit__ = AsyncMock()
mock_redis = AsyncMock()
mock_redis.zrangebyscore = AsyncMock(return_value=["stale-job-1", "stale-job-2"])
mock_redis.hincrby = AsyncMock()
mock_redis.hget = AsyncMock(return_value="1") # retry_count = 1
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
manager._redis_client = mock_redis
with patch("functional_scaffold.core.job_manager.settings") as mock_settings:
mock_settings.job_execution_timeout = 300
mock_settings.job_lock_buffer = 60
mock_settings.job_max_retries = 3
mock_settings.job_processing_key = "job:processing"
mock_settings.job_processing_ts_key = "job:processing:ts"
mock_settings.job_dlq_key = "job:dlq"
mock_settings.job_queue_key = "job:queue"
result = await manager.recover_stale_jobs()
assert result == 2
@pytest.mark.asyncio
async def test_recover_stale_jobs_to_dlq(self):
"""测试回收超时任务进入死信队列"""
manager = JobManager()
mock_pipe = AsyncMock()
mock_pipe.lrem = MagicMock()
mock_pipe.zrem = MagicMock()
mock_pipe.lpush = MagicMock()
mock_pipe.execute = AsyncMock()
mock_pipe.__aenter__ = AsyncMock(return_value=mock_pipe)
mock_pipe.__aexit__ = AsyncMock()
mock_redis = AsyncMock()
mock_redis.zrangebyscore = AsyncMock(return_value=["stale-job-1"])
mock_redis.hincrby = AsyncMock()
mock_redis.hget = AsyncMock(return_value="5") # retry_count > max_retries
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
manager._redis_client = mock_redis
with patch("functional_scaffold.core.job_manager.settings") as mock_settings:
mock_settings.job_execution_timeout = 300
mock_settings.job_lock_buffer = 60
mock_settings.job_max_retries = 3
mock_settings.job_processing_key = "job:processing"
mock_settings.job_processing_ts_key = "job:processing:ts"
mock_settings.job_dlq_key = "job:dlq"
mock_settings.job_queue_key = "job:queue"
result = await manager.recover_stale_jobs()
assert result == 1
@pytest.mark.asyncio
async def test_recover_stale_jobs_without_redis(self):
"""测试 Redis 不可用时回收"""
manager = JobManager()
result = await manager.recover_stale_jobs()
assert result == 0
class TestQueueMetrics:
"""测试队列监控指标收集"""
@pytest.mark.asyncio
async def test_collect_queue_metrics(self):
"""测试收集队列指标"""
manager = JobManager()
mock_pipe = AsyncMock()
mock_pipe.llen = MagicMock()
mock_pipe.zrange = MagicMock()
mock_pipe.execute = AsyncMock(return_value=[5, 2, 1, [("job-1", 1000.0)]])
mock_pipe.__aenter__ = AsyncMock(return_value=mock_pipe)
mock_pipe.__aexit__ = AsyncMock()
mock_redis = AsyncMock()
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
manager._redis_client = mock_redis
with patch("functional_scaffold.core.job_manager.time") as mock_time:
mock_time.time.return_value = 1060.0 # 60 秒后
with patch("functional_scaffold.core.job_manager.set") as mock_set:
result = await manager.collect_queue_metrics()
assert result["queue_length"] == 5
assert result["processing_length"] == 2
assert result["dlq_length"] == 1
assert result["oldest_waiting_seconds"] == 60.0
@pytest.mark.asyncio
async def test_collect_queue_metrics_empty(self):
"""测试空队列时收集指标"""
manager = JobManager()
mock_pipe = AsyncMock()
mock_pipe.llen = MagicMock()
mock_pipe.zrange = MagicMock()
mock_pipe.execute = AsyncMock(return_value=[0, 0, 0, []])
mock_pipe.__aenter__ = AsyncMock(return_value=mock_pipe)
mock_pipe.__aexit__ = AsyncMock()
mock_redis = AsyncMock()
mock_redis.pipeline = MagicMock(return_value=mock_pipe)
manager._redis_client = mock_redis
with patch("functional_scaffold.core.job_manager.set"):
result = await manager.collect_queue_metrics()
assert result["queue_length"] == 0
assert result["processing_length"] == 0
assert result["dlq_length"] == 0
assert result["oldest_waiting_seconds"] == 0
@pytest.mark.asyncio
async def test_collect_queue_metrics_without_redis(self):
"""测试 Redis 不可用时收集指标"""
manager = JobManager()
result = await manager.collect_queue_metrics()
assert result["queue_length"] == 0
assert result["processing_length"] == 0
assert result["dlq_length"] == 0
assert result["oldest_waiting_seconds"] == 0