main:添加核心文件并初始化项目
新增内容: - 创建基础项目结构。 - 添加 `.gitignore` 和 `.dockerignore` 文件。 - 编写 `pyproject.toml` 和依赖文件。 - 添加算法模块及示例算法。 - 实现核心功能模块(日志、错误处理、指标)。 - 添加开发和运行所需的相关脚本文件及文档。
This commit is contained in:
3
src/functional_scaffold/__init__.py
Normal file
3
src/functional_scaffold/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""FunctionalScaffold - 算法工程化 Serverless 脚手架"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
6
src/functional_scaffold/algorithms/__init__.py
Normal file
6
src/functional_scaffold/algorithms/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""算法模块"""
|
||||
|
||||
from .base import BaseAlgorithm
|
||||
from .prime_checker import PrimeChecker
|
||||
|
||||
__all__ = ["BaseAlgorithm", "PrimeChecker"]
|
||||
77
src/functional_scaffold/algorithms/base.py
Normal file
77
src/functional_scaffold/algorithms/base.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""算法基类"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict
|
||||
import time
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseAlgorithm(ABC):
|
||||
"""算法基类,所有算法必须继承此类"""
|
||||
|
||||
def __init__(self):
|
||||
self.name = self.__class__.__name__
|
||||
self.version = "1.0.0"
|
||||
|
||||
@abstractmethod
|
||||
def process(self, *args, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
算法处理逻辑,子类必须实现此方法
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 算法处理结果
|
||||
"""
|
||||
pass
|
||||
|
||||
def execute(self, *args, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
执行算法,包含埋点和错误处理
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 包含结果和元数据的字典
|
||||
"""
|
||||
from ..core.metrics import algorithm_counter, algorithm_latency
|
||||
|
||||
start_time = time.time()
|
||||
status = "success"
|
||||
|
||||
try:
|
||||
logger.info(f"Starting algorithm: {self.name}")
|
||||
result = self.process(*args, **kwargs)
|
||||
elapsed_time = time.time() - start_time
|
||||
|
||||
logger.info(
|
||||
f"Algorithm {self.name} completed successfully in {elapsed_time:.3f}s"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"result": result,
|
||||
"metadata": {
|
||||
"algorithm": self.name,
|
||||
"version": self.version,
|
||||
"elapsed_time": elapsed_time,
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
status = "error"
|
||||
elapsed_time = time.time() - start_time
|
||||
logger.error(f"Algorithm {self.name} failed: {str(e)}", exc_info=True)
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"metadata": {
|
||||
"algorithm": self.name,
|
||||
"version": self.version,
|
||||
"elapsed_time": elapsed_time,
|
||||
},
|
||||
}
|
||||
finally:
|
||||
# 记录算法执行指标
|
||||
elapsed_time = time.time() - start_time
|
||||
algorithm_counter.labels(algorithm=self.name, status=status).inc()
|
||||
algorithm_latency.labels(algorithm=self.name).observe(elapsed_time)
|
||||
94
src/functional_scaffold/algorithms/prime_checker.py
Normal file
94
src/functional_scaffold/algorithms/prime_checker.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""质数判断算法"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
from .base import BaseAlgorithm
|
||||
|
||||
|
||||
class PrimeChecker(BaseAlgorithm):
|
||||
"""
|
||||
质数判断算法
|
||||
|
||||
使用试除法判断一个整数是否为质数,并返回因数分解结果
|
||||
"""
|
||||
|
||||
def process(self, number: int) -> Dict[str, Any]:
|
||||
"""
|
||||
判断给定数字是否为质数
|
||||
|
||||
Args:
|
||||
number: 待判断的整数
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 包含判断结果的字典
|
||||
- number: 输入的数字
|
||||
- is_prime: 是否为质数
|
||||
- factors: 因数列表(如果不是质数)
|
||||
- reason: 说明(如果适用)
|
||||
- algorithm: 使用的算法名称
|
||||
|
||||
Raises:
|
||||
ValueError: 如果输入不是整数
|
||||
"""
|
||||
if not isinstance(number, int):
|
||||
raise ValueError(f"Input must be an integer, got {type(number).__name__}")
|
||||
|
||||
# 小于2的数不是质数
|
||||
if number < 2:
|
||||
return {
|
||||
"number": number,
|
||||
"is_prime": False,
|
||||
"reason": "Numbers less than 2 are not prime",
|
||||
"factors": [],
|
||||
"algorithm": "trial_division",
|
||||
}
|
||||
|
||||
# 判断是否为质数
|
||||
is_prime = self._is_prime(number)
|
||||
|
||||
# 如果不是质数,计算因数
|
||||
factors = [] if is_prime else self._get_factors(number)
|
||||
|
||||
return {
|
||||
"number": number,
|
||||
"is_prime": is_prime,
|
||||
"factors": factors,
|
||||
"algorithm": "trial_division",
|
||||
}
|
||||
|
||||
def _is_prime(self, n: int) -> bool:
|
||||
"""
|
||||
使用试除法判断是否为质数
|
||||
|
||||
Args:
|
||||
n: 待判断的正整数
|
||||
|
||||
Returns:
|
||||
bool: 是否为质数
|
||||
"""
|
||||
if n == 2:
|
||||
return True
|
||||
if n % 2 == 0:
|
||||
return False
|
||||
|
||||
# 只需检查到sqrt(n)
|
||||
for i in range(3, int(n**0.5) + 1, 2):
|
||||
if n % i == 0:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _get_factors(self, n: int) -> List[int]:
|
||||
"""
|
||||
获取一个数的所有因数(不包括1和自身)
|
||||
|
||||
Args:
|
||||
n: 待分解的正整数
|
||||
|
||||
Returns:
|
||||
List[int]: 因数列表
|
||||
"""
|
||||
factors = []
|
||||
for i in range(2, n):
|
||||
if n % i == 0:
|
||||
factors.append(i)
|
||||
return factors
|
||||
6
src/functional_scaffold/api/__init__.py
Normal file
6
src/functional_scaffold/api/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""API 模块"""
|
||||
|
||||
from .routes import router
|
||||
from .models import InvokeRequest, InvokeResponse, HealthResponse, ErrorResponse
|
||||
|
||||
__all__ = ["router", "InvokeRequest", "InvokeResponse", "HealthResponse", "ErrorResponse"]
|
||||
20
src/functional_scaffold/api/dependencies.py
Normal file
20
src/functional_scaffold/api/dependencies.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""API 依赖注入"""
|
||||
|
||||
from fastapi import Header, HTTPException
|
||||
from typing import Optional
|
||||
from ..core.tracing import set_request_id, generate_request_id
|
||||
|
||||
|
||||
async def get_request_id(x_request_id: Optional[str] = Header(None)) -> str:
|
||||
"""
|
||||
获取或生成请求ID
|
||||
|
||||
Args:
|
||||
x_request_id: 从请求头获取的请求ID
|
||||
|
||||
Returns:
|
||||
str: 请求ID
|
||||
"""
|
||||
request_id = x_request_id or generate_request_id()
|
||||
set_request_id(request_id)
|
||||
return request_id
|
||||
82
src/functional_scaffold/api/models.py
Normal file
82
src/functional_scaffold/api/models.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""API 数据模型"""
|
||||
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
class InvokeRequest(BaseModel):
|
||||
"""同步调用请求"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
json_schema_extra={
|
||||
"example": {
|
||||
"number": 17
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
number: int = Field(..., description="待判断的整数")
|
||||
|
||||
|
||||
class InvokeResponse(BaseModel):
|
||||
"""同步调用响应"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
json_schema_extra={
|
||||
"example": {
|
||||
"request_id": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"status": "success",
|
||||
"result": {
|
||||
"number": 17,
|
||||
"is_prime": True,
|
||||
"factors": [],
|
||||
"algorithm": "trial_division"
|
||||
},
|
||||
"metadata": {
|
||||
"algorithm": "PrimeChecker",
|
||||
"version": "1.0.0",
|
||||
"elapsed_time": 0.001
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
request_id: str = Field(..., description="请求唯一标识")
|
||||
status: str = Field(..., description="处理状态")
|
||||
result: Dict[str, Any] = Field(..., description="算法执行结果")
|
||||
metadata: Dict[str, Any] = Field(..., description="元数据信息")
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
"""健康检查响应"""
|
||||
|
||||
status: str = Field(..., description="健康状态")
|
||||
timestamp: float = Field(..., description="时间戳")
|
||||
|
||||
|
||||
class ReadinessResponse(BaseModel):
|
||||
"""就绪检查响应"""
|
||||
|
||||
status: str = Field(..., description="就绪状态")
|
||||
timestamp: float = Field(..., description="时间戳")
|
||||
checks: Optional[Dict[str, bool]] = Field(None, description="各项检查结果")
|
||||
|
||||
|
||||
class ErrorResponse(BaseModel):
|
||||
"""错误响应"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
json_schema_extra={
|
||||
"example": {
|
||||
"error": "VALIDATION_ERROR",
|
||||
"message": "number must be an integer",
|
||||
"details": {"field": "number", "value": "abc"},
|
||||
"request_id": "550e8400-e29b-41d4-a716-446655440000"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
error: str = Field(..., description="错误代码")
|
||||
message: str = Field(..., description="错误消息")
|
||||
details: Optional[Dict[str, Any]] = Field(None, description="错误详情")
|
||||
request_id: Optional[str] = Field(None, description="请求ID")
|
||||
150
src/functional_scaffold/api/routes.py
Normal file
150
src/functional_scaffold/api/routes.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""API 路由"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends, status
|
||||
from fastapi.responses import JSONResponse
|
||||
import time
|
||||
import logging
|
||||
|
||||
from .models import (
|
||||
InvokeRequest,
|
||||
InvokeResponse,
|
||||
HealthResponse,
|
||||
ReadinessResponse,
|
||||
ErrorResponse,
|
||||
)
|
||||
from .dependencies import get_request_id
|
||||
from ..algorithms.prime_checker import PrimeChecker
|
||||
from ..core.errors import FunctionalScaffoldError, ValidationError, AlgorithmError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post(
|
||||
"/invoke",
|
||||
response_model=InvokeResponse,
|
||||
status_code=status.HTTP_200_OK,
|
||||
summary="同步调用算法",
|
||||
description="同步调用质数判断算法,立即返回结果",
|
||||
responses={
|
||||
200: {"description": "成功", "model": InvokeResponse},
|
||||
400: {"description": "请求参数错误", "model": ErrorResponse},
|
||||
500: {"description": "服务器内部错误", "model": ErrorResponse},
|
||||
},
|
||||
)
|
||||
async def invoke_algorithm(
|
||||
request: InvokeRequest,
|
||||
request_id: str = Depends(get_request_id),
|
||||
):
|
||||
"""
|
||||
同步调用质数判断算法
|
||||
|
||||
- **number**: 待判断的整数
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Processing request {request_id} with number={request.number}")
|
||||
|
||||
# 创建算法实例并执行
|
||||
checker = PrimeChecker()
|
||||
execution_result = checker.execute(request.number)
|
||||
|
||||
if not execution_result["success"]:
|
||||
raise AlgorithmError(
|
||||
execution_result.get("error", "Algorithm execution failed"),
|
||||
details=execution_result.get("metadata", {}),
|
||||
)
|
||||
|
||||
return InvokeResponse(
|
||||
request_id=request_id,
|
||||
status="success",
|
||||
result=execution_result["result"],
|
||||
metadata=execution_result["metadata"],
|
||||
)
|
||||
|
||||
except ValidationError as e:
|
||||
logger.warning(f"Validation error for request {request_id}: {e.message}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=e.to_dict(),
|
||||
)
|
||||
|
||||
except AlgorithmError as e:
|
||||
logger.error(f"Algorithm error for request {request_id}: {e.message}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=e.to_dict(),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error for request {request_id}: {str(e)}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail={
|
||||
"error": "INTERNAL_ERROR",
|
||||
"message": str(e),
|
||||
"request_id": request_id,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/healthz",
|
||||
response_model=HealthResponse,
|
||||
summary="健康检查",
|
||||
description="检查服务是否存活",
|
||||
)
|
||||
async def health_check():
|
||||
"""
|
||||
健康检查端点
|
||||
|
||||
返回服务的健康状态,用于存活探针
|
||||
"""
|
||||
return HealthResponse(
|
||||
status="healthy",
|
||||
timestamp=time.time(),
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/readyz",
|
||||
response_model=ReadinessResponse,
|
||||
summary="就绪检查",
|
||||
description="检查服务是否就绪",
|
||||
)
|
||||
async def readiness_check():
|
||||
"""
|
||||
就绪检查端点
|
||||
|
||||
返回服务的就绪状态,用于就绪探针
|
||||
"""
|
||||
# 这里可以添加更多检查,例如数据库连接、外部服务等
|
||||
checks = {
|
||||
"algorithm": True, # 算法模块可用
|
||||
}
|
||||
|
||||
all_ready = all(checks.values())
|
||||
|
||||
return ReadinessResponse(
|
||||
status="ready" if all_ready else "not_ready",
|
||||
timestamp=time.time(),
|
||||
checks=checks,
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/jobs",
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
summary="异步任务接口(预留)",
|
||||
description="异步任务接口,当前版本未实现",
|
||||
)
|
||||
async def create_job():
|
||||
"""
|
||||
异步任务接口(预留)
|
||||
|
||||
用于提交长时间运行的任务
|
||||
"""
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail={"error": "NOT_IMPLEMENTED", "message": "Async jobs not implemented yet"},
|
||||
)
|
||||
47
src/functional_scaffold/config.py
Normal file
47
src/functional_scaffold/config.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""配置管理模块"""
|
||||
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic import ConfigDict
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""应用配置"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
env_file=".env",
|
||||
case_sensitive=False
|
||||
)
|
||||
|
||||
# 应用信息
|
||||
app_name: str = "FunctionalScaffold"
|
||||
app_version: str = "1.0.0"
|
||||
app_env: str = "development"
|
||||
|
||||
# 服务器配置
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 8000
|
||||
workers: int = 4
|
||||
|
||||
# 日志配置
|
||||
log_level: str = "INFO"
|
||||
log_format: str = "json"
|
||||
|
||||
# 指标配置
|
||||
metrics_enabled: bool = True
|
||||
|
||||
# 追踪配置
|
||||
tracing_enabled: bool = False
|
||||
jaeger_endpoint: Optional[str] = None
|
||||
|
||||
# 外部服务配置(示例)
|
||||
oss_endpoint: Optional[str] = None
|
||||
oss_access_key_id: Optional[str] = None
|
||||
oss_access_key_secret: Optional[str] = None
|
||||
oss_bucket_name: Optional[str] = None
|
||||
|
||||
database_url: Optional[str] = None
|
||||
|
||||
|
||||
# 全局配置实例
|
||||
settings = Settings()
|
||||
21
src/functional_scaffold/core/__init__.py
Normal file
21
src/functional_scaffold/core/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""核心功能模块"""
|
||||
|
||||
from .errors import (
|
||||
FunctionalScaffoldError,
|
||||
ValidationError,
|
||||
AlgorithmError,
|
||||
ConfigurationError,
|
||||
)
|
||||
from .logging import setup_logging
|
||||
from .metrics import metrics_registry, track_request, track_algorithm_execution
|
||||
|
||||
__all__ = [
|
||||
"FunctionalScaffoldError",
|
||||
"ValidationError",
|
||||
"AlgorithmError",
|
||||
"ConfigurationError",
|
||||
"setup_logging",
|
||||
"metrics_registry",
|
||||
"track_request",
|
||||
"track_algorithm_execution",
|
||||
]
|
||||
47
src/functional_scaffold/core/errors.py
Normal file
47
src/functional_scaffold/core/errors.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""错误处理模块"""
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
class FunctionalScaffoldError(Exception):
|
||||
"""基础异常类"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
error_code: Optional[str] = None,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
self.message = message
|
||||
self.error_code = error_code or "INTERNAL_ERROR"
|
||||
self.details = details or {}
|
||||
super().__init__(self.message)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""转换为字典格式"""
|
||||
return {
|
||||
"error": self.error_code,
|
||||
"message": self.message,
|
||||
"details": self.details,
|
||||
}
|
||||
|
||||
|
||||
class ValidationError(FunctionalScaffoldError):
|
||||
"""参数验证错误"""
|
||||
|
||||
def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(message, error_code="VALIDATION_ERROR", details=details)
|
||||
|
||||
|
||||
class AlgorithmError(FunctionalScaffoldError):
|
||||
"""算法执行错误"""
|
||||
|
||||
def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(message, error_code="ALGORITHM_ERROR", details=details)
|
||||
|
||||
|
||||
class ConfigurationError(FunctionalScaffoldError):
|
||||
"""配置错误"""
|
||||
|
||||
def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(message, error_code="CONFIGURATION_ERROR", details=details)
|
||||
50
src/functional_scaffold/core/logging.py
Normal file
50
src/functional_scaffold/core/logging.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""日志配置模块"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from typing import Optional
|
||||
from pythonjsonlogger.json import JsonFormatter
|
||||
|
||||
|
||||
def setup_logging(
|
||||
level: str = "INFO",
|
||||
format_type: str = "json",
|
||||
logger_name: Optional[str] = None,
|
||||
) -> logging.Logger:
|
||||
"""
|
||||
配置日志系统
|
||||
|
||||
Args:
|
||||
level: 日志级别 (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||
format_type: 日志格式 ('json' 或 'text')
|
||||
logger_name: 日志器名称,None表示根日志器
|
||||
|
||||
Returns:
|
||||
logging.Logger: 配置好的日志器
|
||||
"""
|
||||
logger = logging.getLogger(logger_name)
|
||||
logger.setLevel(getattr(logging, level.upper()))
|
||||
|
||||
# 清除现有处理器
|
||||
logger.handlers.clear()
|
||||
|
||||
# 创建控制台处理器
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setLevel(getattr(logging, level.upper()))
|
||||
|
||||
# 设置格式
|
||||
if format_type == "json":
|
||||
formatter = JsonFormatter(
|
||||
"%(asctime)s %(name)s %(levelname)s %(message)s",
|
||||
timestamp=True,
|
||||
)
|
||||
else:
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
return logger
|
||||
111
src/functional_scaffold/core/metrics.py
Normal file
111
src/functional_scaffold/core/metrics.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""Prometheus 指标模块"""
|
||||
|
||||
from prometheus_client import Counter, Histogram, Gauge, CollectorRegistry
|
||||
from functools import wraps
|
||||
import time
|
||||
from typing import Callable
|
||||
|
||||
# 创建指标注册表
|
||||
metrics_registry = CollectorRegistry()
|
||||
|
||||
# 请求计数器
|
||||
request_counter = Counter(
|
||||
"http_requests_total",
|
||||
"Total HTTP requests",
|
||||
["method", "endpoint", "status"],
|
||||
registry=metrics_registry,
|
||||
)
|
||||
|
||||
# 请求延迟直方图
|
||||
request_latency = Histogram(
|
||||
"http_request_duration_seconds",
|
||||
"HTTP request latency",
|
||||
["method", "endpoint"],
|
||||
registry=metrics_registry,
|
||||
)
|
||||
|
||||
# 算法执行计数器
|
||||
algorithm_counter = Counter(
|
||||
"algorithm_executions_total",
|
||||
"Total algorithm executions",
|
||||
["algorithm", "status"],
|
||||
registry=metrics_registry,
|
||||
)
|
||||
|
||||
# 算法执行延迟
|
||||
algorithm_latency = Histogram(
|
||||
"algorithm_execution_duration_seconds",
|
||||
"Algorithm execution latency",
|
||||
["algorithm"],
|
||||
registry=metrics_registry,
|
||||
)
|
||||
|
||||
# 当前处理中的请求数
|
||||
in_progress_requests = Gauge(
|
||||
"http_requests_in_progress",
|
||||
"Number of HTTP requests in progress",
|
||||
registry=metrics_registry,
|
||||
)
|
||||
|
||||
|
||||
def track_request(method: str, endpoint: str):
|
||||
"""
|
||||
装饰器:跟踪HTTP请求指标
|
||||
|
||||
Args:
|
||||
method: HTTP方法
|
||||
endpoint: 端点路径
|
||||
"""
|
||||
|
||||
def decorator(func: Callable):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
in_progress_requests.inc()
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
status = "success"
|
||||
return result
|
||||
except Exception as e:
|
||||
status = "error"
|
||||
raise e
|
||||
finally:
|
||||
elapsed = time.time() - start_time
|
||||
request_counter.labels(method=method, endpoint=endpoint, status=status).inc()
|
||||
request_latency.labels(method=method, endpoint=endpoint).observe(elapsed)
|
||||
in_progress_requests.dec()
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def track_algorithm_execution(algorithm_name: str):
|
||||
"""
|
||||
装饰器:跟踪算法执行指标
|
||||
|
||||
Args:
|
||||
algorithm_name: 算法名称
|
||||
"""
|
||||
|
||||
def decorator(func: Callable):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
status = "success"
|
||||
return result
|
||||
except Exception as e:
|
||||
status = "error"
|
||||
raise e
|
||||
finally:
|
||||
elapsed = time.time() - start_time
|
||||
algorithm_counter.labels(algorithm=algorithm_name, status=status).inc()
|
||||
algorithm_latency.labels(algorithm=algorithm_name).observe(elapsed)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
162
src/functional_scaffold/core/metrics_pushgateway.py
Normal file
162
src/functional_scaffold/core/metrics_pushgateway.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""基于 Pushgateway 的 Prometheus 指标模块"""
|
||||
|
||||
from prometheus_client import Counter, Histogram, Gauge, CollectorRegistry, push_to_gateway
|
||||
from functools import wraps
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import os
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 创建指标注册表
|
||||
metrics_registry = CollectorRegistry()
|
||||
|
||||
# Pushgateway 配置
|
||||
PUSHGATEWAY_URL = os.getenv("PUSHGATEWAY_URL", "localhost:9091")
|
||||
JOB_NAME = os.getenv("METRICS_JOB_NAME", "functional_scaffold")
|
||||
INSTANCE_ID = os.getenv("INSTANCE_ID", os.getenv("HOSTNAME", "unknown"))
|
||||
|
||||
# 请求计数器
|
||||
request_counter = Counter(
|
||||
"http_requests_total",
|
||||
"Total HTTP requests",
|
||||
["method", "endpoint", "status", "instance"],
|
||||
registry=metrics_registry,
|
||||
)
|
||||
|
||||
# 请求延迟直方图
|
||||
request_latency = Histogram(
|
||||
"http_request_duration_seconds",
|
||||
"HTTP request latency",
|
||||
["method", "endpoint", "instance"],
|
||||
registry=metrics_registry,
|
||||
)
|
||||
|
||||
# 算法执行计数器
|
||||
algorithm_counter = Counter(
|
||||
"algorithm_executions_total",
|
||||
"Total algorithm executions",
|
||||
["algorithm", "status", "instance"],
|
||||
registry=metrics_registry,
|
||||
)
|
||||
|
||||
# 算法执行延迟
|
||||
algorithm_latency = Histogram(
|
||||
"algorithm_execution_duration_seconds",
|
||||
"Algorithm execution latency",
|
||||
["algorithm", "instance"],
|
||||
registry=metrics_registry,
|
||||
)
|
||||
|
||||
# 当前处理中的请求数
|
||||
in_progress_requests = Gauge(
|
||||
"http_requests_in_progress",
|
||||
"Number of HTTP requests in progress",
|
||||
["instance"],
|
||||
registry=metrics_registry,
|
||||
)
|
||||
|
||||
|
||||
def push_metrics(grouping_key: Optional[dict] = None):
|
||||
"""
|
||||
推送指标到 Pushgateway
|
||||
|
||||
Args:
|
||||
grouping_key: 额外的分组键
|
||||
"""
|
||||
try:
|
||||
grouping = {"instance": INSTANCE_ID}
|
||||
if grouping_key:
|
||||
grouping.update(grouping_key)
|
||||
|
||||
push_to_gateway(
|
||||
PUSHGATEWAY_URL,
|
||||
job=JOB_NAME,
|
||||
registry=metrics_registry,
|
||||
grouping_key=grouping,
|
||||
)
|
||||
logger.debug(f"成功推送指标到 Pushgateway: {PUSHGATEWAY_URL}")
|
||||
except Exception as e:
|
||||
logger.error(f"推送指标到 Pushgateway 失败: {e}")
|
||||
|
||||
|
||||
def track_request(method: str, endpoint: str, auto_push: bool = True):
|
||||
"""
|
||||
装饰器:跟踪HTTP请求指标
|
||||
|
||||
Args:
|
||||
method: HTTP方法
|
||||
endpoint: 端点路径
|
||||
auto_push: 是否自动推送到 Pushgateway
|
||||
"""
|
||||
|
||||
def decorator(func: Callable):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
in_progress_requests.labels(instance=INSTANCE_ID).inc()
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
status = "success"
|
||||
return result
|
||||
except Exception as e:
|
||||
status = "error"
|
||||
raise e
|
||||
finally:
|
||||
elapsed = time.time() - start_time
|
||||
request_counter.labels(
|
||||
method=method, endpoint=endpoint, status=status, instance=INSTANCE_ID
|
||||
).inc()
|
||||
request_latency.labels(
|
||||
method=method, endpoint=endpoint, instance=INSTANCE_ID
|
||||
).observe(elapsed)
|
||||
in_progress_requests.labels(instance=INSTANCE_ID).dec()
|
||||
|
||||
# 自动推送指标
|
||||
if auto_push:
|
||||
push_metrics()
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def track_algorithm_execution(algorithm_name: str, auto_push: bool = True):
|
||||
"""
|
||||
装饰器:跟踪算法执行指标
|
||||
|
||||
Args:
|
||||
algorithm_name: 算法名称
|
||||
auto_push: 是否自动推送到 Pushgateway
|
||||
"""
|
||||
|
||||
def decorator(func: Callable):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
status = "success"
|
||||
return result
|
||||
except Exception as e:
|
||||
status = "error"
|
||||
raise e
|
||||
finally:
|
||||
elapsed = time.time() - start_time
|
||||
algorithm_counter.labels(
|
||||
algorithm=algorithm_name, status=status, instance=INSTANCE_ID
|
||||
).inc()
|
||||
algorithm_latency.labels(
|
||||
algorithm=algorithm_name, instance=INSTANCE_ID
|
||||
).observe(elapsed)
|
||||
|
||||
# 自动推送指标
|
||||
if auto_push:
|
||||
push_metrics()
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
247
src/functional_scaffold/core/metrics_redis.py
Normal file
247
src/functional_scaffold/core/metrics_redis.py
Normal file
@@ -0,0 +1,247 @@
|
||||
"""基于 Redis 的指标记录模块"""
|
||||
|
||||
from functools import wraps
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import os
|
||||
import logging
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
import redis
|
||||
REDIS_AVAILABLE = True
|
||||
except ImportError:
|
||||
REDIS_AVAILABLE = False
|
||||
logging.warning("Redis 未安装,指标将无法记录到 Redis")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Redis 配置
|
||||
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
|
||||
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
|
||||
REDIS_DB = int(os.getenv("REDIS_METRICS_DB", "0"))
|
||||
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", None)
|
||||
INSTANCE_ID = os.getenv("INSTANCE_ID", os.getenv("HOSTNAME", "unknown"))
|
||||
|
||||
# Redis 键前缀
|
||||
METRICS_PREFIX = "metrics:"
|
||||
REQUEST_COUNTER_KEY = f"{METRICS_PREFIX}request_counter"
|
||||
REQUEST_LATENCY_KEY = f"{METRICS_PREFIX}request_latency"
|
||||
ALGORITHM_COUNTER_KEY = f"{METRICS_PREFIX}algorithm_counter"
|
||||
ALGORITHM_LATENCY_KEY = f"{METRICS_PREFIX}algorithm_latency"
|
||||
IN_PROGRESS_KEY = f"{METRICS_PREFIX}in_progress"
|
||||
|
||||
|
||||
class RedisMetricsClient:
|
||||
"""Redis 指标客户端"""
|
||||
|
||||
def __init__(self):
|
||||
if not REDIS_AVAILABLE:
|
||||
raise ImportError("需要安装 redis 库: pip install redis")
|
||||
|
||||
self.client = redis.Redis(
|
||||
host=REDIS_HOST,
|
||||
port=REDIS_PORT,
|
||||
db=REDIS_DB,
|
||||
password=REDIS_PASSWORD,
|
||||
decode_responses=True,
|
||||
)
|
||||
self.instance_id = INSTANCE_ID
|
||||
|
||||
def increment_counter(self, key: str, labels: dict, value: int = 1):
|
||||
"""
|
||||
增加计数器
|
||||
|
||||
Args:
|
||||
key: 指标键
|
||||
labels: 标签字典
|
||||
value: 增加的值
|
||||
"""
|
||||
try:
|
||||
# 使用 Hash 存储,键为标签组合
|
||||
label_key = self._make_label_key(labels)
|
||||
full_key = f"{key}:{label_key}"
|
||||
self.client.hincrby(key, full_key, value)
|
||||
|
||||
# 记录最后更新时间
|
||||
self.client.hset(key, f"{full_key}:timestamp", int(time.time()))
|
||||
except Exception as e:
|
||||
logger.error(f"Redis 计数器增加失败: {e}")
|
||||
|
||||
def observe_histogram(self, key: str, labels: dict, value: float):
|
||||
"""
|
||||
记录直方图观测值
|
||||
|
||||
Args:
|
||||
key: 指标键
|
||||
labels: 标签字典
|
||||
value: 观测值
|
||||
"""
|
||||
try:
|
||||
label_key = self._make_label_key(labels)
|
||||
full_key = f"{key}:{label_key}"
|
||||
|
||||
# 使用 Sorted Set 存储延迟数据(用于计算分位数)
|
||||
timestamp = time.time()
|
||||
self.client.zadd(full_key, {f"{timestamp}:{value}": timestamp})
|
||||
|
||||
# 保留最近1小时的数据
|
||||
cutoff = timestamp - 3600
|
||||
self.client.zremrangebyscore(full_key, "-inf", cutoff)
|
||||
|
||||
# 同时记录到 Hash 中用于快速统计
|
||||
self.client.hincrby(f"{key}:count", full_key, 1)
|
||||
self.client.hincrbyfloat(f"{key}:sum", full_key, value)
|
||||
except Exception as e:
|
||||
logger.error(f"Redis 直方图记录失败: {e}")
|
||||
|
||||
def set_gauge(self, key: str, labels: dict, value: float):
|
||||
"""
|
||||
设置仪表盘值
|
||||
|
||||
Args:
|
||||
key: 指标键
|
||||
labels: 标签字典
|
||||
value: 值
|
||||
"""
|
||||
try:
|
||||
label_key = self._make_label_key(labels)
|
||||
full_key = f"{key}:{label_key}"
|
||||
self.client.hset(key, full_key, value)
|
||||
self.client.hset(key, f"{full_key}:timestamp", int(time.time()))
|
||||
except Exception as e:
|
||||
logger.error(f"Redis 仪表盘设置失败: {e}")
|
||||
|
||||
def increment_gauge(self, key: str, labels: dict, value: float = 1):
|
||||
"""增加仪表盘值"""
|
||||
try:
|
||||
label_key = self._make_label_key(labels)
|
||||
full_key = f"{key}:{label_key}"
|
||||
self.client.hincrbyfloat(key, full_key, value)
|
||||
except Exception as e:
|
||||
logger.error(f"Redis 仪表盘增加失败: {e}")
|
||||
|
||||
def decrement_gauge(self, key: str, labels: dict, value: float = 1):
|
||||
"""减少仪表盘值"""
|
||||
self.increment_gauge(key, labels, -value)
|
||||
|
||||
def _make_label_key(self, labels: dict) -> str:
|
||||
"""
|
||||
从标签字典生成键
|
||||
|
||||
Args:
|
||||
labels: 标签字典
|
||||
|
||||
Returns:
|
||||
str: 标签键
|
||||
"""
|
||||
# 添加实例ID
|
||||
labels_with_instance = {**labels, "instance": self.instance_id}
|
||||
# 按键排序确保一致性
|
||||
sorted_labels = sorted(labels_with_instance.items())
|
||||
return ",".join(f"{k}={v}" for k, v in sorted_labels)
|
||||
|
||||
def get_metrics_summary(self) -> dict:
|
||||
"""
|
||||
获取指标摘要(用于调试)
|
||||
|
||||
Returns:
|
||||
dict: 指标摘要
|
||||
"""
|
||||
try:
|
||||
return {
|
||||
"request_counter": self.client.hgetall(REQUEST_COUNTER_KEY),
|
||||
"algorithm_counter": self.client.hgetall(ALGORITHM_COUNTER_KEY),
|
||||
"in_progress": self.client.hgetall(IN_PROGRESS_KEY),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"获取指标摘要失败: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
# 全局客户端实例
|
||||
_redis_client: Optional[RedisMetricsClient] = None
|
||||
|
||||
|
||||
def get_redis_client() -> RedisMetricsClient:
|
||||
"""获取 Redis 客户端单例"""
|
||||
global _redis_client
|
||||
if _redis_client is None:
|
||||
_redis_client = RedisMetricsClient()
|
||||
return _redis_client
|
||||
|
||||
|
||||
def track_request(method: str, endpoint: str):
|
||||
"""
|
||||
装饰器:跟踪HTTP请求指标
|
||||
|
||||
Args:
|
||||
method: HTTP方法
|
||||
endpoint: 端点路径
|
||||
"""
|
||||
|
||||
def decorator(func: Callable):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
client = get_redis_client()
|
||||
labels = {"method": method, "endpoint": endpoint}
|
||||
|
||||
# 增加进行中的请求数
|
||||
client.increment_gauge(IN_PROGRESS_KEY, labels)
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
status = "success"
|
||||
return result
|
||||
except Exception as e:
|
||||
status = "error"
|
||||
raise e
|
||||
finally:
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# 记录指标
|
||||
counter_labels = {**labels, "status": status}
|
||||
client.increment_counter(REQUEST_COUNTER_KEY, counter_labels)
|
||||
client.observe_histogram(REQUEST_LATENCY_KEY, labels, elapsed)
|
||||
client.decrement_gauge(IN_PROGRESS_KEY, labels)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def track_algorithm_execution(algorithm_name: str):
|
||||
"""
|
||||
装饰器:跟踪算法执行指标
|
||||
|
||||
Args:
|
||||
algorithm_name: 算法名称
|
||||
"""
|
||||
|
||||
def decorator(func: Callable):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
client = get_redis_client()
|
||||
labels = {"algorithm": algorithm_name}
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
status = "success"
|
||||
return result
|
||||
except Exception as e:
|
||||
status = "error"
|
||||
raise e
|
||||
finally:
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# 记录指标
|
||||
counter_labels = {**labels, "status": status}
|
||||
client.increment_counter(ALGORITHM_COUNTER_KEY, counter_labels)
|
||||
client.observe_histogram(ALGORITHM_LATENCY_KEY, labels, elapsed)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
247
src/functional_scaffold/core/metrics_redis_exporter.py
Normal file
247
src/functional_scaffold/core/metrics_redis_exporter.py
Normal file
@@ -0,0 +1,247 @@
|
||||
"""Redis 指标 Exporter - 将 Redis 中的指标转换为 Prometheus 格式"""
|
||||
|
||||
from prometheus_client import Counter, Histogram, Gauge, CollectorRegistry, generate_latest
|
||||
from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily, HistogramMetricFamily
|
||||
import redis
|
||||
import os
|
||||
import logging
|
||||
from typing import Dict, List, Tuple
|
||||
import time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Redis 配置
|
||||
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
|
||||
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
|
||||
REDIS_DB = int(os.getenv("REDIS_METRICS_DB", "0"))
|
||||
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", None)
|
||||
|
||||
# Redis 键前缀
|
||||
METRICS_PREFIX = "metrics:"
|
||||
REQUEST_COUNTER_KEY = f"{METRICS_PREFIX}request_counter"
|
||||
REQUEST_LATENCY_KEY = f"{METRICS_PREFIX}request_latency"
|
||||
ALGORITHM_COUNTER_KEY = f"{METRICS_PREFIX}algorithm_counter"
|
||||
ALGORITHM_LATENCY_KEY = f"{METRICS_PREFIX}algorithm_latency"
|
||||
IN_PROGRESS_KEY = f"{METRICS_PREFIX}in_progress"
|
||||
|
||||
|
||||
class RedisMetricsCollector:
|
||||
"""从 Redis 收集指标并转换为 Prometheus 格式"""
|
||||
|
||||
def __init__(self):
|
||||
self.redis_client = redis.Redis(
|
||||
host=REDIS_HOST,
|
||||
port=REDIS_PORT,
|
||||
db=REDIS_DB,
|
||||
password=REDIS_PASSWORD,
|
||||
decode_responses=True,
|
||||
)
|
||||
|
||||
def collect(self):
|
||||
"""收集所有指标"""
|
||||
try:
|
||||
# 收集计数器指标
|
||||
yield from self._collect_counter(
|
||||
REQUEST_COUNTER_KEY,
|
||||
"http_requests_total",
|
||||
"Total HTTP requests",
|
||||
)
|
||||
yield from self._collect_counter(
|
||||
ALGORITHM_COUNTER_KEY,
|
||||
"algorithm_executions_total",
|
||||
"Total algorithm executions",
|
||||
)
|
||||
|
||||
# 收集直方图指标
|
||||
yield from self._collect_histogram(
|
||||
REQUEST_LATENCY_KEY,
|
||||
"http_request_duration_seconds",
|
||||
"HTTP request latency",
|
||||
)
|
||||
yield from self._collect_histogram(
|
||||
ALGORITHM_LATENCY_KEY,
|
||||
"algorithm_execution_duration_seconds",
|
||||
"Algorithm execution latency",
|
||||
)
|
||||
|
||||
# 收集仪表盘指标
|
||||
yield from self._collect_gauge(
|
||||
IN_PROGRESS_KEY,
|
||||
"http_requests_in_progress",
|
||||
"Number of HTTP requests in progress",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"收集指标失败: {e}")
|
||||
|
||||
def _collect_counter(self, redis_key: str, metric_name: str, description: str):
|
||||
"""收集计数器指标"""
|
||||
try:
|
||||
data = self.redis_client.hgetall(redis_key)
|
||||
if not data:
|
||||
return
|
||||
|
||||
# 解析标签和值
|
||||
metrics_data = []
|
||||
for key, value in data.items():
|
||||
if key.endswith(":timestamp"):
|
||||
continue
|
||||
labels = self._parse_labels(key)
|
||||
metrics_data.append((labels, float(value)))
|
||||
|
||||
# 创建 Prometheus 指标
|
||||
if metrics_data:
|
||||
label_names = list(metrics_data[0][0].keys())
|
||||
counter = CounterMetricFamily(metric_name, description, labels=label_names)
|
||||
for labels, value in metrics_data:
|
||||
counter.add_metric(list(labels.values()), value)
|
||||
yield counter
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"收集计数器 {redis_key} 失败: {e}")
|
||||
|
||||
def _collect_histogram(self, redis_key: str, metric_name: str, description: str):
|
||||
"""收集直方图指标"""
|
||||
try:
|
||||
# 获取计数和总和
|
||||
count_data = self.redis_client.hgetall(f"{redis_key}:count")
|
||||
sum_data = self.redis_client.hgetall(f"{redis_key}:sum")
|
||||
|
||||
if not count_data:
|
||||
return
|
||||
|
||||
metrics_data = []
|
||||
for key in count_data.keys():
|
||||
labels = self._parse_labels(key)
|
||||
count = float(count_data.get(key, 0))
|
||||
sum_value = float(sum_data.get(key, 0))
|
||||
|
||||
# 计算分位数(从 Sorted Set 中)
|
||||
full_key = f"{redis_key}:{key}"
|
||||
latencies = self._get_latencies(full_key)
|
||||
buckets = self._calculate_buckets(latencies)
|
||||
|
||||
metrics_data.append((labels, count, sum_value, buckets))
|
||||
|
||||
# 创建 Prometheus 指标
|
||||
if metrics_data:
|
||||
label_names = list(metrics_data[0][0].keys())
|
||||
histogram = HistogramMetricFamily(
|
||||
metric_name, description, labels=label_names
|
||||
)
|
||||
for labels, count, sum_value, buckets in metrics_data:
|
||||
histogram.add_metric(
|
||||
list(labels.values()),
|
||||
buckets=buckets,
|
||||
sum_value=sum_value,
|
||||
)
|
||||
yield histogram
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"收集直方图 {redis_key} 失败: {e}")
|
||||
|
||||
def _collect_gauge(self, redis_key: str, metric_name: str, description: str):
|
||||
"""收集仪表盘指标"""
|
||||
try:
|
||||
data = self.redis_client.hgetall(redis_key)
|
||||
if not data:
|
||||
return
|
||||
|
||||
metrics_data = []
|
||||
for key, value in data.items():
|
||||
if key.endswith(":timestamp"):
|
||||
continue
|
||||
labels = self._parse_labels(key)
|
||||
metrics_data.append((labels, float(value)))
|
||||
|
||||
# 创建 Prometheus 指标
|
||||
if metrics_data:
|
||||
label_names = list(metrics_data[0][0].keys())
|
||||
gauge = GaugeMetricFamily(metric_name, description, labels=label_names)
|
||||
for labels, value in metrics_data:
|
||||
gauge.add_metric(list(labels.values()), value)
|
||||
yield gauge
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"收集仪表盘 {redis_key} 失败: {e}")
|
||||
|
||||
def _parse_labels(self, label_key: str) -> Dict[str, str]:
|
||||
"""
|
||||
解析标签键
|
||||
|
||||
Args:
|
||||
label_key: 标签键字符串 (e.g., "method=GET,endpoint=/invoke,instance=host1")
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: 标签字典
|
||||
"""
|
||||
labels = {}
|
||||
for pair in label_key.split(","):
|
||||
if "=" in pair:
|
||||
key, value = pair.split("=", 1)
|
||||
labels[key] = value
|
||||
return labels
|
||||
|
||||
def _get_latencies(self, key: str) -> List[float]:
|
||||
"""从 Sorted Set 获取延迟数据"""
|
||||
try:
|
||||
data = self.redis_client.zrange(key, 0, -1)
|
||||
latencies = []
|
||||
for item in data:
|
||||
# 格式: "timestamp:value"
|
||||
if ":" in item:
|
||||
_, value = item.rsplit(":", 1)
|
||||
latencies.append(float(value))
|
||||
return sorted(latencies)
|
||||
except Exception as e:
|
||||
logger.error(f"获取延迟数据失败: {e}")
|
||||
return []
|
||||
|
||||
def _calculate_buckets(
|
||||
self, latencies: List[float]
|
||||
) -> List[Tuple[str, float]]:
|
||||
"""
|
||||
计算直方图桶
|
||||
|
||||
Args:
|
||||
latencies: 延迟数据列表
|
||||
|
||||
Returns:
|
||||
List[Tuple[str, float]]: 桶列表 [(上限, 计数), ...]
|
||||
"""
|
||||
if not latencies:
|
||||
return [("+Inf", 0)]
|
||||
|
||||
# 定义桶边界(秒)
|
||||
buckets_boundaries = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
|
||||
buckets = []
|
||||
|
||||
for boundary in buckets_boundaries:
|
||||
count = sum(1 for lat in latencies if lat <= boundary)
|
||||
buckets.append((str(boundary), count))
|
||||
|
||||
# +Inf 桶
|
||||
buckets.append(("+Inf", len(latencies)))
|
||||
|
||||
return buckets
|
||||
|
||||
|
||||
# 创建全局收集器
|
||||
redis_collector = RedisMetricsCollector()
|
||||
|
||||
|
||||
def get_metrics() -> bytes:
|
||||
"""
|
||||
获取 Prometheus 格式的指标
|
||||
|
||||
Returns:
|
||||
bytes: Prometheus 格式的指标数据
|
||||
"""
|
||||
registry = CollectorRegistry()
|
||||
registry.register(redis_collector)
|
||||
return generate_latest(registry)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 测试
|
||||
print(get_metrics().decode("utf-8"))
|
||||
39
src/functional_scaffold/core/tracing.py
Normal file
39
src/functional_scaffold/core/tracing.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""分布式追踪模块"""
|
||||
|
||||
import uuid
|
||||
from contextvars import ContextVar
|
||||
from typing import Optional
|
||||
|
||||
# 使用 ContextVar 存储请求ID,支持异步上下文
|
||||
request_id_var: ContextVar[Optional[str]] = ContextVar("request_id", default=None)
|
||||
|
||||
|
||||
def generate_request_id() -> str:
|
||||
"""生成唯一的请求ID"""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def get_request_id() -> Optional[str]:
|
||||
"""获取当前请求ID"""
|
||||
return request_id_var.get()
|
||||
|
||||
|
||||
def set_request_id(request_id: str) -> None:
|
||||
"""设置当前请求ID"""
|
||||
request_id_var.set(request_id)
|
||||
|
||||
|
||||
class TracingContext:
|
||||
"""追踪上下文管理器"""
|
||||
|
||||
def __init__(self, request_id: Optional[str] = None):
|
||||
self.request_id = request_id or generate_request_id()
|
||||
self.token = None
|
||||
|
||||
def __enter__(self):
|
||||
self.token = request_id_var.set(self.request_id)
|
||||
return self.request_id
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.token:
|
||||
request_id_var.reset(self.token)
|
||||
138
src/functional_scaffold/main.py
Normal file
138
src/functional_scaffold/main.py
Normal file
@@ -0,0 +1,138 @@
|
||||
"""FastAPI 应用入口"""
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import Response
|
||||
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
||||
import logging
|
||||
import time
|
||||
|
||||
from .api import router
|
||||
from .config import settings
|
||||
from .core.logging import setup_logging
|
||||
from .core.metrics import metrics_registry, request_counter, request_latency, in_progress_requests
|
||||
|
||||
# 设置日志
|
||||
setup_logging(level=settings.log_level, format_type=settings.log_format)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 创建 FastAPI 应用
|
||||
app = FastAPI(
|
||||
title=settings.app_name,
|
||||
description="算法工程化 Serverless 脚手架 - 提供标准化的算法服务接口",
|
||||
version=settings.app_version,
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc",
|
||||
openapi_url="/openapi.json",
|
||||
)
|
||||
|
||||
# CORS 中间件
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
# 请求日志中间件
|
||||
@app.middleware("http")
|
||||
async def log_requests(request: Request, call_next):
|
||||
"""记录所有HTTP请求"""
|
||||
logger.info(f"Request: {request.method} {request.url.path}")
|
||||
response = await call_next(request)
|
||||
logger.info(f"Response: {response.status_code}")
|
||||
return response
|
||||
|
||||
|
||||
# 指标跟踪中间件
|
||||
@app.middleware("http")
|
||||
async def track_metrics(request: Request, call_next):
|
||||
"""记录所有HTTP请求的指标"""
|
||||
if not settings.metrics_enabled:
|
||||
return await call_next(request)
|
||||
|
||||
# 跳过 /metrics 端点本身,避免循环记录
|
||||
if request.url.path == "/metrics":
|
||||
return await call_next(request)
|
||||
|
||||
in_progress_requests.inc()
|
||||
start_time = time.time()
|
||||
status = "success"
|
||||
|
||||
try:
|
||||
response = await call_next(request)
|
||||
# 根据 HTTP 状态码判断成功或失败
|
||||
if response.status_code >= 400:
|
||||
status = "error"
|
||||
return response
|
||||
except Exception as e:
|
||||
status = "error"
|
||||
raise e
|
||||
finally:
|
||||
elapsed = time.time() - start_time
|
||||
request_counter.labels(
|
||||
method=request.method,
|
||||
endpoint=request.url.path,
|
||||
status=status
|
||||
).inc()
|
||||
request_latency.labels(
|
||||
method=request.method,
|
||||
endpoint=request.url.path
|
||||
).observe(elapsed)
|
||||
in_progress_requests.dec()
|
||||
|
||||
|
||||
# 注册路由
|
||||
app.include_router(router, tags=["Algorithm"])
|
||||
|
||||
|
||||
# Prometheus 指标端点
|
||||
@app.get(
|
||||
"/metrics",
|
||||
tags=["Monitoring"],
|
||||
summary="Prometheus 指标",
|
||||
description="导出 Prometheus 格式的监控指标",
|
||||
)
|
||||
async def metrics():
|
||||
"""
|
||||
Prometheus 指标端点
|
||||
|
||||
返回应用的监控指标,供 Prometheus 抓取
|
||||
"""
|
||||
if not settings.metrics_enabled:
|
||||
return Response(content="Metrics disabled", status_code=404)
|
||||
|
||||
return Response(
|
||||
content=generate_latest(metrics_registry),
|
||||
media_type=CONTENT_TYPE_LATEST,
|
||||
)
|
||||
|
||||
|
||||
# 启动事件
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
"""应用启动时执行"""
|
||||
logger.info(f"Starting {settings.app_name} v{settings.app_version}")
|
||||
logger.info(f"Environment: {settings.app_env}")
|
||||
logger.info(f"Metrics enabled: {settings.metrics_enabled}")
|
||||
|
||||
|
||||
# 关闭事件
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
"""应用关闭时执行"""
|
||||
logger.info(f"Shutting down {settings.app_name}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(
|
||||
"functional_scaffold.main:app",
|
||||
host=settings.host,
|
||||
port=settings.port,
|
||||
reload=settings.app_env == "development",
|
||||
log_level=settings.log_level.lower(),
|
||||
)
|
||||
5
src/functional_scaffold/utils/__init__.py
Normal file
5
src/functional_scaffold/utils/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""工具函数模块"""
|
||||
|
||||
from .validators import validate_integer, validate_positive_integer
|
||||
|
||||
__all__ = ["validate_integer", "validate_positive_integer"]
|
||||
51
src/functional_scaffold/utils/validators.py
Normal file
51
src/functional_scaffold/utils/validators.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""参数校验工具"""
|
||||
|
||||
from typing import Any
|
||||
from ..core.errors import ValidationError
|
||||
|
||||
|
||||
def validate_integer(value: Any, field_name: str = "value") -> int:
|
||||
"""
|
||||
验证值是否为整数
|
||||
|
||||
Args:
|
||||
value: 待验证的值
|
||||
field_name: 字段名称(用于错误消息)
|
||||
|
||||
Returns:
|
||||
int: 验证后的整数值
|
||||
|
||||
Raises:
|
||||
ValidationError: 如果值不是整数
|
||||
"""
|
||||
if not isinstance(value, int) or isinstance(value, bool):
|
||||
raise ValidationError(
|
||||
f"{field_name} must be an integer",
|
||||
details={"field": field_name, "value": value, "type": type(value).__name__},
|
||||
)
|
||||
return value
|
||||
|
||||
|
||||
def validate_positive_integer(value: Any, field_name: str = "value") -> int:
|
||||
"""
|
||||
验证值是否为正整数
|
||||
|
||||
Args:
|
||||
value: 待验证的值
|
||||
field_name: 字段名称(用于错误消息)
|
||||
|
||||
Returns:
|
||||
int: 验证后的正整数值
|
||||
|
||||
Raises:
|
||||
ValidationError: 如果值不是正整数
|
||||
"""
|
||||
value = validate_integer(value, field_name)
|
||||
|
||||
if value <= 0:
|
||||
raise ValidationError(
|
||||
f"{field_name} must be a positive integer",
|
||||
details={"field": field_name, "value": value},
|
||||
)
|
||||
|
||||
return value
|
||||
Reference in New Issue
Block a user