main:删除多余文档并清理项目目录

变更内容:
- 移除冗余文档,包括 Grafana 指南、指标对比、修复总结、OpenAPI 规范等。
- 精简项目文档结构,优化 README 文件内容。
- 提升文档层次清晰度,集中核心指南。
This commit is contained in:
2026-02-02 14:59:34 +08:00
parent 3254fdc3f0
commit 5feb795d12
23 changed files with 3763 additions and 960 deletions

View File

@@ -2,6 +2,7 @@
from typing import Dict, Any, List
from .base import BaseAlgorithm
from ..core.metrics_unified import incr
class PrimeChecker(BaseAlgorithm):
@@ -30,10 +31,12 @@ class PrimeChecker(BaseAlgorithm):
ValueError: 如果输入不是整数
"""
if not isinstance(number, int):
incr('prime_check',{"status":"invalid_input"})
raise ValueError(f"Input must be an integer, got {type(number).__name__}")
# 小于2的数不是质数
if number < 2:
incr('prime_check', {"status": "number_little_two"})
return {
"number": number,
"is_prime": False,
@@ -47,7 +50,7 @@ class PrimeChecker(BaseAlgorithm):
# 如果不是质数,计算因数
factors = [] if is_prime else self._get_factors(number)
incr('prime_check', {"status": "success"})
return {
"number": number,
"is_prime": is_prime,

View File

@@ -1,5 +1,6 @@
"""API 数据模型"""
from enum import Enum
from pydantic import BaseModel, Field, ConfigDict
from typing import Any, Dict, Optional
@@ -7,13 +8,7 @@ from typing import Any, Dict, Optional
class InvokeRequest(BaseModel):
"""同步调用请求"""
model_config = ConfigDict(
json_schema_extra={
"example": {
"number": 17
}
}
)
model_config = ConfigDict(json_schema_extra={"example": {"number": 17}})
number: int = Field(..., description="待判断的整数")
@@ -30,13 +25,13 @@ class InvokeResponse(BaseModel):
"number": 17,
"is_prime": True,
"factors": [],
"algorithm": "trial_division"
"algorithm": "trial_division",
},
"metadata": {
"algorithm": "PrimeChecker",
"version": "1.0.0",
"elapsed_time": 0.001
}
"elapsed_time": 0.001,
},
}
}
)
@@ -71,7 +66,7 @@ class ErrorResponse(BaseModel):
"error": "VALIDATION_ERROR",
"message": "number must be an integer",
"details": {"field": "number", "value": "abc"},
"request_id": "550e8400-e29b-41d4-a716-446655440000"
"request_id": "550e8400-e29b-41d4-a716-446655440000",
}
}
)
@@ -80,3 +75,80 @@ class ErrorResponse(BaseModel):
message: str = Field(..., description="错误消息")
details: Optional[Dict[str, Any]] = Field(None, description="错误详情")
request_id: Optional[str] = Field(None, description="请求ID")
class JobStatus(str, Enum):
"""任务状态枚举"""
PENDING = "pending" # 等待执行
RUNNING = "running" # 执行中
COMPLETED = "completed" # 已完成
FAILED = "failed" # 执行失败
class JobRequest(BaseModel):
"""异步任务请求"""
model_config = ConfigDict(
json_schema_extra={
"example": {
"algorithm": "PrimeChecker",
"params": {"number": 17},
"webhook": "https://example.com/callback",
}
}
)
algorithm: str = Field(..., description="算法名称")
params: Dict[str, Any] = Field(..., description="算法参数")
webhook: Optional[str] = Field(None, description="回调 URL")
class JobCreateResponse(BaseModel):
"""任务创建响应"""
model_config = ConfigDict(
json_schema_extra={
"example": {
"job_id": "a1b2c3d4e5f6",
"status": "pending",
"message": "任务已创建",
"created_at": "2026-02-02T10:00:00Z",
}
}
)
job_id: str = Field(..., description="任务唯一标识")
status: JobStatus = Field(..., description="任务状态")
message: str = Field(..., description="状态消息")
created_at: str = Field(..., description="创建时间ISO 8601")
class JobStatusResponse(BaseModel):
"""任务状态查询响应"""
model_config = ConfigDict(
json_schema_extra={
"example": {
"job_id": "a1b2c3d4e5f6",
"status": "completed",
"algorithm": "PrimeChecker",
"created_at": "2026-02-02T10:00:00Z",
"started_at": "2026-02-02T10:00:01Z",
"completed_at": "2026-02-02T10:00:02Z",
"result": {"number": 17, "is_prime": True},
"error": None,
"metadata": {"elapsed_time": 0.001},
}
}
)
job_id: str = Field(..., description="任务唯一标识")
status: JobStatus = Field(..., description="任务状态")
algorithm: str = Field(..., description="算法名称")
created_at: str = Field(..., description="创建时间ISO 8601")
started_at: Optional[str] = Field(None, description="开始执行时间ISO 8601")
completed_at: Optional[str] = Field(None, description="完成时间ISO 8601")
result: Optional[Dict[str, Any]] = Field(None, description="执行结果(仅完成时返回)")
error: Optional[str] = Field(None, description="错误信息(仅失败时返回)")
metadata: Optional[Dict[str, Any]] = Field(None, description="元数据信息")

View File

@@ -1,7 +1,7 @@
"""API 路由"""
import asyncio
from fastapi import APIRouter, HTTPException, Depends, status
from fastapi.responses import JSONResponse
import time
import logging
@@ -11,10 +11,15 @@ from .models import (
HealthResponse,
ReadinessResponse,
ErrorResponse,
JobRequest,
JobCreateResponse,
JobStatusResponse,
JobStatus,
)
from .dependencies import get_request_id
from ..algorithms.prime_checker import PrimeChecker
from ..core.errors import FunctionalScaffoldError, ValidationError, AlgorithmError
from ..core.errors import ValidationError, AlgorithmError
from ..core.job_manager import get_job_manager
logger = logging.getLogger(__name__)
@@ -134,17 +139,156 @@ async def readiness_check():
@router.post(
"/jobs",
status_code=status.HTTP_501_NOT_IMPLEMENTED,
summary="异步任务接口(预留)",
description="异步任务接口,当前版本未实现",
response_model=JobCreateResponse,
status_code=status.HTTP_202_ACCEPTED,
summary="创建异步任务",
description="创建异步任务,立即返回任务 ID任务在后台执行",
responses={
202: {"description": "任务已创建", "model": JobCreateResponse},
400: {"description": "请求参数错误", "model": ErrorResponse},
404: {"description": "算法不存在", "model": ErrorResponse},
503: {"description": "服务不可用", "model": ErrorResponse},
},
)
async def create_job():
async def create_job(
request: JobRequest,
request_id: str = Depends(get_request_id),
):
"""
异步任务接口(预留)
创建异步任务
用于提交长时间运行的任务
- **algorithm**: 算法名称(如 PrimeChecker
- **params**: 算法参数
- **webhook**: 任务完成后的回调 URL可选
"""
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail={"error": "NOT_IMPLEMENTED", "message": "Async jobs not implemented yet"},
)
try:
job_manager = await get_job_manager()
# 检查任务管理器是否可用
if not job_manager.is_available():
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail={
"error": "SERVICE_UNAVAILABLE",
"message": "任务服务暂不可用,请稍后重试",
"request_id": request_id,
},
)
# 验证算法存在
available_algorithms = job_manager.get_available_algorithms()
if request.algorithm not in available_algorithms:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={
"error": "ALGORITHM_NOT_FOUND",
"message": f"算法 '{request.algorithm}' 不存在",
"details": {"available_algorithms": available_algorithms},
"request_id": request_id,
},
)
# 创建任务
job_id = await job_manager.create_job(
algorithm=request.algorithm,
params=request.params,
webhook=request.webhook,
request_id=request_id,
)
# 获取任务信息
job_data = await job_manager.get_job(job_id)
# 后台执行任务
asyncio.create_task(job_manager.execute_job(job_id))
logger.info(f"异步任务已创建: job_id={job_id}, request_id={request_id}")
return JobCreateResponse(
job_id=job_id,
status=JobStatus.PENDING,
message="任务已创建",
created_at=job_data["created_at"],
)
except HTTPException:
raise
except Exception as e:
logger.error(f"创建任务失败: {str(e)}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={
"error": "INTERNAL_ERROR",
"message": str(e),
"request_id": request_id,
},
)
@router.get(
"/jobs/{job_id}",
response_model=JobStatusResponse,
summary="查询任务状态",
description="查询异步任务的执行状态和结果",
responses={
200: {"description": "成功", "model": JobStatusResponse},
404: {"description": "任务不存在或已过期", "model": ErrorResponse},
503: {"description": "服务不可用", "model": ErrorResponse},
},
)
async def get_job_status(job_id: str):
"""
查询任务状态
- **job_id**: 任务唯一标识
"""
try:
job_manager = await get_job_manager()
# 检查任务管理器是否可用
if not job_manager.is_available():
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail={
"error": "SERVICE_UNAVAILABLE",
"message": "任务服务暂不可用,请稍后重试",
},
)
# 获取任务信息
job_data = await job_manager.get_job(job_id)
if not job_data:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={
"error": "JOB_NOT_FOUND",
"message": f"任务 '{job_id}' 不存在或已过期",
},
)
return JobStatusResponse(
job_id=job_data["job_id"],
status=JobStatus(job_data["status"]),
algorithm=job_data["algorithm"],
created_at=job_data["created_at"],
started_at=job_data["started_at"],
completed_at=job_data["completed_at"],
result=job_data["result"],
error=job_data["error"],
metadata=job_data["metadata"],
)
except HTTPException:
raise
except Exception as e:
logger.error(f"查询任务状态失败: {str(e)}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={
"error": "INTERNAL_ERROR",
"message": str(e),
},
)

View File

@@ -49,6 +49,11 @@ class Settings(BaseSettings):
metrics_config_path: str = "config/metrics.yaml"
metrics_instance_id: Optional[str] = None # 默认使用 hostname
# 异步任务配置
job_result_ttl: int = 1800 # 结果缓存时间(秒),默认 30 分钟
webhook_max_retries: int = 3 # Webhook 最大重试次数
webhook_timeout: int = 10 # Webhook 超时时间(秒)
# 全局配置实例
settings = Settings()

View File

@@ -0,0 +1,381 @@
"""异步任务管理模块
基于 Redis 的异步任务管理,支持任务创建、执行、状态查询和 Webhook 回调。
"""
import asyncio
import json
import logging
import secrets
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Type
import httpx
import redis.asyncio as aioredis
from ..algorithms.base import BaseAlgorithm
from ..config import settings
from ..core.metrics_unified import incr, observe
logger = logging.getLogger(__name__)
class JobManager:
"""异步任务管理器"""
def __init__(self):
self._redis_client: Optional[aioredis.Redis] = None
self._algorithm_registry: Dict[str, Type[BaseAlgorithm]] = {}
self._http_client: Optional[httpx.AsyncClient] = None
async def initialize(self) -> None:
"""初始化 Redis 连接和 HTTP 客户端"""
# 初始化 Redis 异步连接
try:
self._redis_client = aioredis.Redis(
host=settings.redis_host,
port=settings.redis_port,
db=settings.redis_db,
password=settings.redis_password if settings.redis_password else None,
decode_responses=True,
socket_connect_timeout=5,
socket_timeout=5,
)
# 测试连接
await self._redis_client.ping()
logger.info(f"任务管理器 Redis 连接成功: {settings.redis_host}:{settings.redis_port}")
except Exception as e:
logger.error(f"任务管理器 Redis 连接失败: {e}")
self._redis_client = None
# 初始化 HTTP 客户端
self._http_client = httpx.AsyncClient(timeout=settings.webhook_timeout)
# 注册算法
self._register_algorithms()
async def shutdown(self) -> None:
"""关闭连接"""
if self._redis_client:
await self._redis_client.close()
logger.info("任务管理器 Redis 连接已关闭")
if self._http_client:
await self._http_client.aclose()
logger.info("任务管理器 HTTP 客户端已关闭")
def _register_algorithms(self) -> None:
"""注册可用的算法类"""
from ..algorithms import __all__ as algorithm_names
from .. import algorithms as algorithms_module
for name in algorithm_names:
cls = getattr(algorithms_module, name, None)
if cls and isinstance(cls, type) and issubclass(cls, BaseAlgorithm):
if cls is not BaseAlgorithm:
self._algorithm_registry[name] = cls
logger.debug(f"已注册算法: {name}")
logger.info(f"已注册 {len(self._algorithm_registry)} 个算法")
def get_available_algorithms(self) -> List[str]:
"""获取可用算法列表"""
return list(self._algorithm_registry.keys())
def _generate_job_id(self) -> str:
"""生成 12 位十六进制任务 ID"""
return secrets.token_hex(6)
def _get_timestamp(self) -> str:
"""获取 ISO 8601 格式时间戳"""
return datetime.now(timezone.utc).isoformat()
async def create_job(
self,
algorithm: str,
params: Dict[str, Any],
webhook: Optional[str] = None,
request_id: Optional[str] = None,
) -> str:
"""创建新任务,返回 job_id
Args:
algorithm: 算法名称
params: 算法参数
webhook: 回调 URL可选
request_id: 关联的请求 ID可选
Returns:
str: 任务 ID
Raises:
RuntimeError: Redis 不可用时抛出
ValueError: 算法不存在时抛出
"""
if not self._redis_client:
raise RuntimeError("Redis 不可用,无法创建任务")
if algorithm not in self._algorithm_registry:
raise ValueError(f"算法 '{algorithm}' 不存在")
job_id = self._generate_job_id()
created_at = self._get_timestamp()
# 构建任务数据
job_data = {
"status": "pending",
"algorithm": algorithm,
"params": json.dumps(params),
"webhook": webhook or "",
"request_id": request_id or "",
"created_at": created_at,
"started_at": "",
"completed_at": "",
"result": "",
"error": "",
"metadata": "",
}
# 存储到 Redis
key = f"job:{job_id}"
await self._redis_client.hset(key, mapping=job_data)
# 记录指标
incr("jobs_created_total", {"algorithm": algorithm})
logger.info(f"任务已创建: job_id={job_id}, algorithm={algorithm}")
return job_id
async def get_job(self, job_id: str) -> Optional[Dict[str, Any]]:
"""获取任务信息
Args:
job_id: 任务 ID
Returns:
任务信息字典,不存在时返回 None
"""
if not self._redis_client:
return None
key = f"job:{job_id}"
job_data = await self._redis_client.hgetall(key)
if not job_data:
return None
# 解析 JSON 字段
result = {
"job_id": job_id,
"status": job_data.get("status", ""),
"algorithm": job_data.get("algorithm", ""),
"created_at": job_data.get("created_at", ""),
"started_at": job_data.get("started_at") or None,
"completed_at": job_data.get("completed_at") or None,
"result": None,
"error": job_data.get("error") or None,
"metadata": None,
}
# 解析 result
if job_data.get("result"):
try:
result["result"] = json.loads(job_data["result"])
except json.JSONDecodeError:
result["result"] = None
# 解析 metadata
if job_data.get("metadata"):
try:
result["metadata"] = json.loads(job_data["metadata"])
except json.JSONDecodeError:
result["metadata"] = None
return result
async def execute_job(self, job_id: str) -> None:
"""执行任务(在后台任务中调用)
Args:
job_id: 任务 ID
"""
if not self._redis_client:
logger.error(f"Redis 不可用,无法执行任务: {job_id}")
return
key = f"job:{job_id}"
job_data = await self._redis_client.hgetall(key)
if not job_data:
logger.error(f"任务不存在: {job_id}")
return
algorithm_name = job_data.get("algorithm", "")
webhook_url = job_data.get("webhook", "")
# 解析参数
try:
params = json.loads(job_data.get("params", "{}"))
except json.JSONDecodeError:
params = {}
# 更新状态为 running
started_at = self._get_timestamp()
await self._redis_client.hset(key, mapping={"status": "running", "started_at": started_at})
logger.info(f"开始执行任务: job_id={job_id}, algorithm={algorithm_name}")
import time
start_time = time.time()
status = "completed"
result_data = None
error_msg = None
metadata = None
try:
# 获取算法类并执行
algorithm_cls = self._algorithm_registry.get(algorithm_name)
if not algorithm_cls:
raise ValueError(f"算法 '{algorithm_name}' 不存在")
algorithm = algorithm_cls()
# 根据算法类型传递参数
if algorithm_name == "PrimeChecker":
execution_result = algorithm.execute(params.get("number", 0))
else:
# 通用参数传递
execution_result = algorithm.execute(**params)
if execution_result.get("success"):
result_data = execution_result.get("result", {})
metadata = execution_result.get("metadata", {})
else:
status = "failed"
error_msg = execution_result.get("error", "算法执行失败")
metadata = execution_result.get("metadata", {})
except Exception as e:
status = "failed"
error_msg = str(e)
logger.error(f"任务执行失败: job_id={job_id}, error={e}", exc_info=True)
# 计算执行时间
elapsed_time = time.time() - start_time
completed_at = self._get_timestamp()
# 更新任务状态
update_data = {
"status": status,
"completed_at": completed_at,
"result": json.dumps(result_data) if result_data else "",
"error": error_msg or "",
"metadata": json.dumps(metadata) if metadata else "",
}
await self._redis_client.hset(key, mapping=update_data)
# 设置 TTL
await self._redis_client.expire(key, settings.job_result_ttl)
# 记录指标
incr("jobs_completed_total", {"algorithm": algorithm_name, "status": status})
observe("job_execution_duration_seconds", {"algorithm": algorithm_name}, elapsed_time)
logger.info(f"任务执行完成: job_id={job_id}, status={status}, elapsed={elapsed_time:.3f}s")
# 发送 Webhook 回调
if webhook_url:
await self._send_webhook(job_id, webhook_url)
async def _send_webhook(self, job_id: str, webhook_url: str) -> None:
"""发送 Webhook 回调(带重试)
Args:
job_id: 任务 ID
webhook_url: 回调 URL
"""
if not self._http_client:
logger.warning("HTTP 客户端不可用,无法发送 Webhook")
return
# 获取任务数据
job_data = await self.get_job(job_id)
if not job_data:
logger.error(f"无法获取任务数据用于 Webhook: {job_id}")
return
# 构建回调负载
payload = {
"job_id": job_data["job_id"],
"status": job_data["status"],
"algorithm": job_data["algorithm"],
"result": job_data["result"],
"error": job_data["error"],
"metadata": job_data["metadata"],
"completed_at": job_data["completed_at"],
}
# 重试间隔(指数退避)
retry_delays = [1, 5, 15]
max_retries = settings.webhook_max_retries
for attempt in range(max_retries):
try:
response = await self._http_client.post(
webhook_url,
json=payload,
headers={"Content-Type": "application/json"},
)
if response.status_code < 400:
incr("webhook_deliveries_total", {"status": "success"})
logger.info(
f"Webhook 发送成功: job_id={job_id}, url={webhook_url}, "
f"status_code={response.status_code}"
)
return
else:
logger.warning(
f"Webhook 响应错误: job_id={job_id}, status_code={response.status_code}"
)
except Exception as e:
logger.warning(
f"Webhook 发送失败 (尝试 {attempt + 1}/{max_retries}): "
f"job_id={job_id}, error={e}"
)
# 等待后重试
if attempt < max_retries - 1:
delay = retry_delays[min(attempt, len(retry_delays) - 1)]
await asyncio.sleep(delay)
# 所有重试都失败
incr("webhook_deliveries_total", {"status": "failed"})
logger.error(f"Webhook 发送最终失败: job_id={job_id}, url={webhook_url}")
def is_available(self) -> bool:
"""检查任务管理器是否可用"""
return self._redis_client is not None
# 全局单例
_job_manager: Optional[JobManager] = None
async def get_job_manager() -> JobManager:
"""获取任务管理器单例"""
global _job_manager
if _job_manager is None:
_job_manager = JobManager()
await _job_manager.initialize()
return _job_manager
async def shutdown_job_manager() -> None:
"""关闭任务管理器"""
global _job_manager
if _job_manager is not None:
await _job_manager.shutdown()
_job_manager = None

View File

@@ -17,6 +17,7 @@ from .core.metrics_unified import (
gauge_decr,
export,
)
from .core.job_manager import get_job_manager, shutdown_job_manager
# 设置日志
setup_logging(level=settings.log_level, format_type=settings.log_format)
@@ -132,6 +133,16 @@ async def startup_event():
else:
logger.warning("Redis 不可用,指标将不会被收集")
# 初始化任务管理器
try:
job_manager = await get_job_manager()
if job_manager.is_available():
logger.info("异步任务管理器已启用")
else:
logger.warning("Redis 不可用,异步任务功能将不可用")
except Exception as e:
logger.warning(f"任务管理器初始化失败: {e}")
# 关闭事件
@app.on_event("shutdown")
@@ -139,6 +150,13 @@ async def shutdown_event():
"""应用关闭时执行"""
logger.info(f"Shutting down {settings.app_name}")
# 关闭任务管理器
try:
await shutdown_job_manager()
logger.info("任务管理器已关闭")
except Exception as e:
logger.warning(f"任务管理器关闭失败: {e}")
if __name__ == "__main__":
import uvicorn