main:添加核心文件并初始化项目

新增内容:
- 创建基础项目结构。
- 添加 `.gitignore` 和 `.dockerignore` 文件。
- 编写 `pyproject.toml` 和依赖文件。
- 添加算法模块及示例算法。
- 实现核心功能模块(日志、错误处理、指标)。
- 添加开发和运行所需的相关脚本文件及文档。
This commit is contained in:
2026-02-02 10:46:01 +08:00
commit 31af5e2286
54 changed files with 5726 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
"""FunctionalScaffold - 算法工程化 Serverless 脚手架"""
__version__ = "1.0.0"

View File

@@ -0,0 +1,6 @@
"""算法模块"""
from .base import BaseAlgorithm
from .prime_checker import PrimeChecker
__all__ = ["BaseAlgorithm", "PrimeChecker"]

View File

@@ -0,0 +1,77 @@
"""算法基类"""
from abc import ABC, abstractmethod
from typing import Any, Dict
import time
import logging
logger = logging.getLogger(__name__)
class BaseAlgorithm(ABC):
"""算法基类,所有算法必须继承此类"""
def __init__(self):
self.name = self.__class__.__name__
self.version = "1.0.0"
@abstractmethod
def process(self, *args, **kwargs) -> Dict[str, Any]:
"""
算法处理逻辑,子类必须实现此方法
Returns:
Dict[str, Any]: 算法处理结果
"""
pass
def execute(self, *args, **kwargs) -> Dict[str, Any]:
"""
执行算法,包含埋点和错误处理
Returns:
Dict[str, Any]: 包含结果和元数据的字典
"""
from ..core.metrics import algorithm_counter, algorithm_latency
start_time = time.time()
status = "success"
try:
logger.info(f"Starting algorithm: {self.name}")
result = self.process(*args, **kwargs)
elapsed_time = time.time() - start_time
logger.info(
f"Algorithm {self.name} completed successfully in {elapsed_time:.3f}s"
)
return {
"success": True,
"result": result,
"metadata": {
"algorithm": self.name,
"version": self.version,
"elapsed_time": elapsed_time,
},
}
except Exception as e:
status = "error"
elapsed_time = time.time() - start_time
logger.error(f"Algorithm {self.name} failed: {str(e)}", exc_info=True)
return {
"success": False,
"error": str(e),
"metadata": {
"algorithm": self.name,
"version": self.version,
"elapsed_time": elapsed_time,
},
}
finally:
# 记录算法执行指标
elapsed_time = time.time() - start_time
algorithm_counter.labels(algorithm=self.name, status=status).inc()
algorithm_latency.labels(algorithm=self.name).observe(elapsed_time)

View File

@@ -0,0 +1,94 @@
"""质数判断算法"""
from typing import Dict, Any, List
from .base import BaseAlgorithm
class PrimeChecker(BaseAlgorithm):
"""
质数判断算法
使用试除法判断一个整数是否为质数,并返回因数分解结果
"""
def process(self, number: int) -> Dict[str, Any]:
"""
判断给定数字是否为质数
Args:
number: 待判断的整数
Returns:
Dict[str, Any]: 包含判断结果的字典
- number: 输入的数字
- is_prime: 是否为质数
- factors: 因数列表(如果不是质数)
- reason: 说明(如果适用)
- algorithm: 使用的算法名称
Raises:
ValueError: 如果输入不是整数
"""
if not isinstance(number, int):
raise ValueError(f"Input must be an integer, got {type(number).__name__}")
# 小于2的数不是质数
if number < 2:
return {
"number": number,
"is_prime": False,
"reason": "Numbers less than 2 are not prime",
"factors": [],
"algorithm": "trial_division",
}
# 判断是否为质数
is_prime = self._is_prime(number)
# 如果不是质数,计算因数
factors = [] if is_prime else self._get_factors(number)
return {
"number": number,
"is_prime": is_prime,
"factors": factors,
"algorithm": "trial_division",
}
def _is_prime(self, n: int) -> bool:
"""
使用试除法判断是否为质数
Args:
n: 待判断的正整数
Returns:
bool: 是否为质数
"""
if n == 2:
return True
if n % 2 == 0:
return False
# 只需检查到sqrt(n)
for i in range(3, int(n**0.5) + 1, 2):
if n % i == 0:
return False
return True
def _get_factors(self, n: int) -> List[int]:
"""
获取一个数的所有因数不包括1和自身
Args:
n: 待分解的正整数
Returns:
List[int]: 因数列表
"""
factors = []
for i in range(2, n):
if n % i == 0:
factors.append(i)
return factors

View File

@@ -0,0 +1,6 @@
"""API 模块"""
from .routes import router
from .models import InvokeRequest, InvokeResponse, HealthResponse, ErrorResponse
__all__ = ["router", "InvokeRequest", "InvokeResponse", "HealthResponse", "ErrorResponse"]

View File

@@ -0,0 +1,20 @@
"""API 依赖注入"""
from fastapi import Header, HTTPException
from typing import Optional
from ..core.tracing import set_request_id, generate_request_id
async def get_request_id(x_request_id: Optional[str] = Header(None)) -> str:
"""
获取或生成请求ID
Args:
x_request_id: 从请求头获取的请求ID
Returns:
str: 请求ID
"""
request_id = x_request_id or generate_request_id()
set_request_id(request_id)
return request_id

View File

@@ -0,0 +1,82 @@
"""API 数据模型"""
from pydantic import BaseModel, Field, ConfigDict
from typing import Any, Dict, Optional
class InvokeRequest(BaseModel):
"""同步调用请求"""
model_config = ConfigDict(
json_schema_extra={
"example": {
"number": 17
}
}
)
number: int = Field(..., description="待判断的整数")
class InvokeResponse(BaseModel):
"""同步调用响应"""
model_config = ConfigDict(
json_schema_extra={
"example": {
"request_id": "550e8400-e29b-41d4-a716-446655440000",
"status": "success",
"result": {
"number": 17,
"is_prime": True,
"factors": [],
"algorithm": "trial_division"
},
"metadata": {
"algorithm": "PrimeChecker",
"version": "1.0.0",
"elapsed_time": 0.001
}
}
}
)
request_id: str = Field(..., description="请求唯一标识")
status: str = Field(..., description="处理状态")
result: Dict[str, Any] = Field(..., description="算法执行结果")
metadata: Dict[str, Any] = Field(..., description="元数据信息")
class HealthResponse(BaseModel):
"""健康检查响应"""
status: str = Field(..., description="健康状态")
timestamp: float = Field(..., description="时间戳")
class ReadinessResponse(BaseModel):
"""就绪检查响应"""
status: str = Field(..., description="就绪状态")
timestamp: float = Field(..., description="时间戳")
checks: Optional[Dict[str, bool]] = Field(None, description="各项检查结果")
class ErrorResponse(BaseModel):
"""错误响应"""
model_config = ConfigDict(
json_schema_extra={
"example": {
"error": "VALIDATION_ERROR",
"message": "number must be an integer",
"details": {"field": "number", "value": "abc"},
"request_id": "550e8400-e29b-41d4-a716-446655440000"
}
}
)
error: str = Field(..., description="错误代码")
message: str = Field(..., description="错误消息")
details: Optional[Dict[str, Any]] = Field(None, description="错误详情")
request_id: Optional[str] = Field(None, description="请求ID")

View File

@@ -0,0 +1,150 @@
"""API 路由"""
from fastapi import APIRouter, HTTPException, Depends, status
from fastapi.responses import JSONResponse
import time
import logging
from .models import (
InvokeRequest,
InvokeResponse,
HealthResponse,
ReadinessResponse,
ErrorResponse,
)
from .dependencies import get_request_id
from ..algorithms.prime_checker import PrimeChecker
from ..core.errors import FunctionalScaffoldError, ValidationError, AlgorithmError
logger = logging.getLogger(__name__)
router = APIRouter()
@router.post(
"/invoke",
response_model=InvokeResponse,
status_code=status.HTTP_200_OK,
summary="同步调用算法",
description="同步调用质数判断算法,立即返回结果",
responses={
200: {"description": "成功", "model": InvokeResponse},
400: {"description": "请求参数错误", "model": ErrorResponse},
500: {"description": "服务器内部错误", "model": ErrorResponse},
},
)
async def invoke_algorithm(
request: InvokeRequest,
request_id: str = Depends(get_request_id),
):
"""
同步调用质数判断算法
- **number**: 待判断的整数
"""
try:
logger.info(f"Processing request {request_id} with number={request.number}")
# 创建算法实例并执行
checker = PrimeChecker()
execution_result = checker.execute(request.number)
if not execution_result["success"]:
raise AlgorithmError(
execution_result.get("error", "Algorithm execution failed"),
details=execution_result.get("metadata", {}),
)
return InvokeResponse(
request_id=request_id,
status="success",
result=execution_result["result"],
metadata=execution_result["metadata"],
)
except ValidationError as e:
logger.warning(f"Validation error for request {request_id}: {e.message}")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=e.to_dict(),
)
except AlgorithmError as e:
logger.error(f"Algorithm error for request {request_id}: {e.message}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=e.to_dict(),
)
except Exception as e:
logger.error(f"Unexpected error for request {request_id}: {str(e)}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={
"error": "INTERNAL_ERROR",
"message": str(e),
"request_id": request_id,
},
)
@router.get(
"/healthz",
response_model=HealthResponse,
summary="健康检查",
description="检查服务是否存活",
)
async def health_check():
"""
健康检查端点
返回服务的健康状态,用于存活探针
"""
return HealthResponse(
status="healthy",
timestamp=time.time(),
)
@router.get(
"/readyz",
response_model=ReadinessResponse,
summary="就绪检查",
description="检查服务是否就绪",
)
async def readiness_check():
"""
就绪检查端点
返回服务的就绪状态,用于就绪探针
"""
# 这里可以添加更多检查,例如数据库连接、外部服务等
checks = {
"algorithm": True, # 算法模块可用
}
all_ready = all(checks.values())
return ReadinessResponse(
status="ready" if all_ready else "not_ready",
timestamp=time.time(),
checks=checks,
)
@router.post(
"/jobs",
status_code=status.HTTP_501_NOT_IMPLEMENTED,
summary="异步任务接口(预留)",
description="异步任务接口,当前版本未实现",
)
async def create_job():
"""
异步任务接口(预留)
用于提交长时间运行的任务
"""
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail={"error": "NOT_IMPLEMENTED", "message": "Async jobs not implemented yet"},
)

View File

@@ -0,0 +1,47 @@
"""配置管理模块"""
from pydantic_settings import BaseSettings
from pydantic import ConfigDict
from typing import Optional
class Settings(BaseSettings):
"""应用配置"""
model_config = ConfigDict(
env_file=".env",
case_sensitive=False
)
# 应用信息
app_name: str = "FunctionalScaffold"
app_version: str = "1.0.0"
app_env: str = "development"
# 服务器配置
host: str = "0.0.0.0"
port: int = 8000
workers: int = 4
# 日志配置
log_level: str = "INFO"
log_format: str = "json"
# 指标配置
metrics_enabled: bool = True
# 追踪配置
tracing_enabled: bool = False
jaeger_endpoint: Optional[str] = None
# 外部服务配置(示例)
oss_endpoint: Optional[str] = None
oss_access_key_id: Optional[str] = None
oss_access_key_secret: Optional[str] = None
oss_bucket_name: Optional[str] = None
database_url: Optional[str] = None
# 全局配置实例
settings = Settings()

View File

@@ -0,0 +1,21 @@
"""核心功能模块"""
from .errors import (
FunctionalScaffoldError,
ValidationError,
AlgorithmError,
ConfigurationError,
)
from .logging import setup_logging
from .metrics import metrics_registry, track_request, track_algorithm_execution
__all__ = [
"FunctionalScaffoldError",
"ValidationError",
"AlgorithmError",
"ConfigurationError",
"setup_logging",
"metrics_registry",
"track_request",
"track_algorithm_execution",
]

View File

@@ -0,0 +1,47 @@
"""错误处理模块"""
from typing import Any, Dict, Optional
class FunctionalScaffoldError(Exception):
"""基础异常类"""
def __init__(
self,
message: str,
error_code: Optional[str] = None,
details: Optional[Dict[str, Any]] = None,
):
self.message = message
self.error_code = error_code or "INTERNAL_ERROR"
self.details = details or {}
super().__init__(self.message)
def to_dict(self) -> Dict[str, Any]:
"""转换为字典格式"""
return {
"error": self.error_code,
"message": self.message,
"details": self.details,
}
class ValidationError(FunctionalScaffoldError):
"""参数验证错误"""
def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
super().__init__(message, error_code="VALIDATION_ERROR", details=details)
class AlgorithmError(FunctionalScaffoldError):
"""算法执行错误"""
def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
super().__init__(message, error_code="ALGORITHM_ERROR", details=details)
class ConfigurationError(FunctionalScaffoldError):
"""配置错误"""
def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
super().__init__(message, error_code="CONFIGURATION_ERROR", details=details)

View File

@@ -0,0 +1,50 @@
"""日志配置模块"""
import logging
import sys
from typing import Optional
from pythonjsonlogger.json import JsonFormatter
def setup_logging(
level: str = "INFO",
format_type: str = "json",
logger_name: Optional[str] = None,
) -> logging.Logger:
"""
配置日志系统
Args:
level: 日志级别 (DEBUG, INFO, WARNING, ERROR, CRITICAL)
format_type: 日志格式 ('json''text')
logger_name: 日志器名称None表示根日志器
Returns:
logging.Logger: 配置好的日志器
"""
logger = logging.getLogger(logger_name)
logger.setLevel(getattr(logging, level.upper()))
# 清除现有处理器
logger.handlers.clear()
# 创建控制台处理器
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(getattr(logging, level.upper()))
# 设置格式
if format_type == "json":
formatter = JsonFormatter(
"%(asctime)s %(name)s %(levelname)s %(message)s",
timestamp=True,
)
else:
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger

View File

@@ -0,0 +1,111 @@
"""Prometheus 指标模块"""
from prometheus_client import Counter, Histogram, Gauge, CollectorRegistry
from functools import wraps
import time
from typing import Callable
# 创建指标注册表
metrics_registry = CollectorRegistry()
# 请求计数器
request_counter = Counter(
"http_requests_total",
"Total HTTP requests",
["method", "endpoint", "status"],
registry=metrics_registry,
)
# 请求延迟直方图
request_latency = Histogram(
"http_request_duration_seconds",
"HTTP request latency",
["method", "endpoint"],
registry=metrics_registry,
)
# 算法执行计数器
algorithm_counter = Counter(
"algorithm_executions_total",
"Total algorithm executions",
["algorithm", "status"],
registry=metrics_registry,
)
# 算法执行延迟
algorithm_latency = Histogram(
"algorithm_execution_duration_seconds",
"Algorithm execution latency",
["algorithm"],
registry=metrics_registry,
)
# 当前处理中的请求数
in_progress_requests = Gauge(
"http_requests_in_progress",
"Number of HTTP requests in progress",
registry=metrics_registry,
)
def track_request(method: str, endpoint: str):
"""
装饰器跟踪HTTP请求指标
Args:
method: HTTP方法
endpoint: 端点路径
"""
def decorator(func: Callable):
@wraps(func)
async def wrapper(*args, **kwargs):
in_progress_requests.inc()
start_time = time.time()
try:
result = await func(*args, **kwargs)
status = "success"
return result
except Exception as e:
status = "error"
raise e
finally:
elapsed = time.time() - start_time
request_counter.labels(method=method, endpoint=endpoint, status=status).inc()
request_latency.labels(method=method, endpoint=endpoint).observe(elapsed)
in_progress_requests.dec()
return wrapper
return decorator
def track_algorithm_execution(algorithm_name: str):
"""
装饰器:跟踪算法执行指标
Args:
algorithm_name: 算法名称
"""
def decorator(func: Callable):
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
try:
result = func(*args, **kwargs)
status = "success"
return result
except Exception as e:
status = "error"
raise e
finally:
elapsed = time.time() - start_time
algorithm_counter.labels(algorithm=algorithm_name, status=status).inc()
algorithm_latency.labels(algorithm=algorithm_name).observe(elapsed)
return wrapper
return decorator

View File

@@ -0,0 +1,162 @@
"""基于 Pushgateway 的 Prometheus 指标模块"""
from prometheus_client import Counter, Histogram, Gauge, CollectorRegistry, push_to_gateway
from functools import wraps
import time
from typing import Callable, Optional
import os
import logging
logger = logging.getLogger(__name__)
# 创建指标注册表
metrics_registry = CollectorRegistry()
# Pushgateway 配置
PUSHGATEWAY_URL = os.getenv("PUSHGATEWAY_URL", "localhost:9091")
JOB_NAME = os.getenv("METRICS_JOB_NAME", "functional_scaffold")
INSTANCE_ID = os.getenv("INSTANCE_ID", os.getenv("HOSTNAME", "unknown"))
# 请求计数器
request_counter = Counter(
"http_requests_total",
"Total HTTP requests",
["method", "endpoint", "status", "instance"],
registry=metrics_registry,
)
# 请求延迟直方图
request_latency = Histogram(
"http_request_duration_seconds",
"HTTP request latency",
["method", "endpoint", "instance"],
registry=metrics_registry,
)
# 算法执行计数器
algorithm_counter = Counter(
"algorithm_executions_total",
"Total algorithm executions",
["algorithm", "status", "instance"],
registry=metrics_registry,
)
# 算法执行延迟
algorithm_latency = Histogram(
"algorithm_execution_duration_seconds",
"Algorithm execution latency",
["algorithm", "instance"],
registry=metrics_registry,
)
# 当前处理中的请求数
in_progress_requests = Gauge(
"http_requests_in_progress",
"Number of HTTP requests in progress",
["instance"],
registry=metrics_registry,
)
def push_metrics(grouping_key: Optional[dict] = None):
"""
推送指标到 Pushgateway
Args:
grouping_key: 额外的分组键
"""
try:
grouping = {"instance": INSTANCE_ID}
if grouping_key:
grouping.update(grouping_key)
push_to_gateway(
PUSHGATEWAY_URL,
job=JOB_NAME,
registry=metrics_registry,
grouping_key=grouping,
)
logger.debug(f"成功推送指标到 Pushgateway: {PUSHGATEWAY_URL}")
except Exception as e:
logger.error(f"推送指标到 Pushgateway 失败: {e}")
def track_request(method: str, endpoint: str, auto_push: bool = True):
"""
装饰器跟踪HTTP请求指标
Args:
method: HTTP方法
endpoint: 端点路径
auto_push: 是否自动推送到 Pushgateway
"""
def decorator(func: Callable):
@wraps(func)
async def wrapper(*args, **kwargs):
in_progress_requests.labels(instance=INSTANCE_ID).inc()
start_time = time.time()
try:
result = await func(*args, **kwargs)
status = "success"
return result
except Exception as e:
status = "error"
raise e
finally:
elapsed = time.time() - start_time
request_counter.labels(
method=method, endpoint=endpoint, status=status, instance=INSTANCE_ID
).inc()
request_latency.labels(
method=method, endpoint=endpoint, instance=INSTANCE_ID
).observe(elapsed)
in_progress_requests.labels(instance=INSTANCE_ID).dec()
# 自动推送指标
if auto_push:
push_metrics()
return wrapper
return decorator
def track_algorithm_execution(algorithm_name: str, auto_push: bool = True):
"""
装饰器:跟踪算法执行指标
Args:
algorithm_name: 算法名称
auto_push: 是否自动推送到 Pushgateway
"""
def decorator(func: Callable):
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
try:
result = func(*args, **kwargs)
status = "success"
return result
except Exception as e:
status = "error"
raise e
finally:
elapsed = time.time() - start_time
algorithm_counter.labels(
algorithm=algorithm_name, status=status, instance=INSTANCE_ID
).inc()
algorithm_latency.labels(
algorithm=algorithm_name, instance=INSTANCE_ID
).observe(elapsed)
# 自动推送指标
if auto_push:
push_metrics()
return wrapper
return decorator

View File

@@ -0,0 +1,247 @@
"""基于 Redis 的指标记录模块"""
from functools import wraps
import time
from typing import Callable, Optional
import os
import logging
import json
from datetime import datetime
try:
import redis
REDIS_AVAILABLE = True
except ImportError:
REDIS_AVAILABLE = False
logging.warning("Redis 未安装,指标将无法记录到 Redis")
logger = logging.getLogger(__name__)
# Redis 配置
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
REDIS_DB = int(os.getenv("REDIS_METRICS_DB", "0"))
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", None)
INSTANCE_ID = os.getenv("INSTANCE_ID", os.getenv("HOSTNAME", "unknown"))
# Redis 键前缀
METRICS_PREFIX = "metrics:"
REQUEST_COUNTER_KEY = f"{METRICS_PREFIX}request_counter"
REQUEST_LATENCY_KEY = f"{METRICS_PREFIX}request_latency"
ALGORITHM_COUNTER_KEY = f"{METRICS_PREFIX}algorithm_counter"
ALGORITHM_LATENCY_KEY = f"{METRICS_PREFIX}algorithm_latency"
IN_PROGRESS_KEY = f"{METRICS_PREFIX}in_progress"
class RedisMetricsClient:
"""Redis 指标客户端"""
def __init__(self):
if not REDIS_AVAILABLE:
raise ImportError("需要安装 redis 库: pip install redis")
self.client = redis.Redis(
host=REDIS_HOST,
port=REDIS_PORT,
db=REDIS_DB,
password=REDIS_PASSWORD,
decode_responses=True,
)
self.instance_id = INSTANCE_ID
def increment_counter(self, key: str, labels: dict, value: int = 1):
"""
增加计数器
Args:
key: 指标键
labels: 标签字典
value: 增加的值
"""
try:
# 使用 Hash 存储,键为标签组合
label_key = self._make_label_key(labels)
full_key = f"{key}:{label_key}"
self.client.hincrby(key, full_key, value)
# 记录最后更新时间
self.client.hset(key, f"{full_key}:timestamp", int(time.time()))
except Exception as e:
logger.error(f"Redis 计数器增加失败: {e}")
def observe_histogram(self, key: str, labels: dict, value: float):
"""
记录直方图观测值
Args:
key: 指标键
labels: 标签字典
value: 观测值
"""
try:
label_key = self._make_label_key(labels)
full_key = f"{key}:{label_key}"
# 使用 Sorted Set 存储延迟数据(用于计算分位数)
timestamp = time.time()
self.client.zadd(full_key, {f"{timestamp}:{value}": timestamp})
# 保留最近1小时的数据
cutoff = timestamp - 3600
self.client.zremrangebyscore(full_key, "-inf", cutoff)
# 同时记录到 Hash 中用于快速统计
self.client.hincrby(f"{key}:count", full_key, 1)
self.client.hincrbyfloat(f"{key}:sum", full_key, value)
except Exception as e:
logger.error(f"Redis 直方图记录失败: {e}")
def set_gauge(self, key: str, labels: dict, value: float):
"""
设置仪表盘值
Args:
key: 指标键
labels: 标签字典
value: 值
"""
try:
label_key = self._make_label_key(labels)
full_key = f"{key}:{label_key}"
self.client.hset(key, full_key, value)
self.client.hset(key, f"{full_key}:timestamp", int(time.time()))
except Exception as e:
logger.error(f"Redis 仪表盘设置失败: {e}")
def increment_gauge(self, key: str, labels: dict, value: float = 1):
"""增加仪表盘值"""
try:
label_key = self._make_label_key(labels)
full_key = f"{key}:{label_key}"
self.client.hincrbyfloat(key, full_key, value)
except Exception as e:
logger.error(f"Redis 仪表盘增加失败: {e}")
def decrement_gauge(self, key: str, labels: dict, value: float = 1):
"""减少仪表盘值"""
self.increment_gauge(key, labels, -value)
def _make_label_key(self, labels: dict) -> str:
"""
从标签字典生成键
Args:
labels: 标签字典
Returns:
str: 标签键
"""
# 添加实例ID
labels_with_instance = {**labels, "instance": self.instance_id}
# 按键排序确保一致性
sorted_labels = sorted(labels_with_instance.items())
return ",".join(f"{k}={v}" for k, v in sorted_labels)
def get_metrics_summary(self) -> dict:
"""
获取指标摘要(用于调试)
Returns:
dict: 指标摘要
"""
try:
return {
"request_counter": self.client.hgetall(REQUEST_COUNTER_KEY),
"algorithm_counter": self.client.hgetall(ALGORITHM_COUNTER_KEY),
"in_progress": self.client.hgetall(IN_PROGRESS_KEY),
}
except Exception as e:
logger.error(f"获取指标摘要失败: {e}")
return {}
# 全局客户端实例
_redis_client: Optional[RedisMetricsClient] = None
def get_redis_client() -> RedisMetricsClient:
"""获取 Redis 客户端单例"""
global _redis_client
if _redis_client is None:
_redis_client = RedisMetricsClient()
return _redis_client
def track_request(method: str, endpoint: str):
"""
装饰器跟踪HTTP请求指标
Args:
method: HTTP方法
endpoint: 端点路径
"""
def decorator(func: Callable):
@wraps(func)
async def wrapper(*args, **kwargs):
client = get_redis_client()
labels = {"method": method, "endpoint": endpoint}
# 增加进行中的请求数
client.increment_gauge(IN_PROGRESS_KEY, labels)
start_time = time.time()
try:
result = await func(*args, **kwargs)
status = "success"
return result
except Exception as e:
status = "error"
raise e
finally:
elapsed = time.time() - start_time
# 记录指标
counter_labels = {**labels, "status": status}
client.increment_counter(REQUEST_COUNTER_KEY, counter_labels)
client.observe_histogram(REQUEST_LATENCY_KEY, labels, elapsed)
client.decrement_gauge(IN_PROGRESS_KEY, labels)
return wrapper
return decorator
def track_algorithm_execution(algorithm_name: str):
"""
装饰器:跟踪算法执行指标
Args:
algorithm_name: 算法名称
"""
def decorator(func: Callable):
@wraps(func)
def wrapper(*args, **kwargs):
client = get_redis_client()
labels = {"algorithm": algorithm_name}
start_time = time.time()
try:
result = func(*args, **kwargs)
status = "success"
return result
except Exception as e:
status = "error"
raise e
finally:
elapsed = time.time() - start_time
# 记录指标
counter_labels = {**labels, "status": status}
client.increment_counter(ALGORITHM_COUNTER_KEY, counter_labels)
client.observe_histogram(ALGORITHM_LATENCY_KEY, labels, elapsed)
return wrapper
return decorator

View File

@@ -0,0 +1,247 @@
"""Redis 指标 Exporter - 将 Redis 中的指标转换为 Prometheus 格式"""
from prometheus_client import Counter, Histogram, Gauge, CollectorRegistry, generate_latest
from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily, HistogramMetricFamily
import redis
import os
import logging
from typing import Dict, List, Tuple
import time
logger = logging.getLogger(__name__)
# Redis 配置
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
REDIS_DB = int(os.getenv("REDIS_METRICS_DB", "0"))
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", None)
# Redis 键前缀
METRICS_PREFIX = "metrics:"
REQUEST_COUNTER_KEY = f"{METRICS_PREFIX}request_counter"
REQUEST_LATENCY_KEY = f"{METRICS_PREFIX}request_latency"
ALGORITHM_COUNTER_KEY = f"{METRICS_PREFIX}algorithm_counter"
ALGORITHM_LATENCY_KEY = f"{METRICS_PREFIX}algorithm_latency"
IN_PROGRESS_KEY = f"{METRICS_PREFIX}in_progress"
class RedisMetricsCollector:
"""从 Redis 收集指标并转换为 Prometheus 格式"""
def __init__(self):
self.redis_client = redis.Redis(
host=REDIS_HOST,
port=REDIS_PORT,
db=REDIS_DB,
password=REDIS_PASSWORD,
decode_responses=True,
)
def collect(self):
"""收集所有指标"""
try:
# 收集计数器指标
yield from self._collect_counter(
REQUEST_COUNTER_KEY,
"http_requests_total",
"Total HTTP requests",
)
yield from self._collect_counter(
ALGORITHM_COUNTER_KEY,
"algorithm_executions_total",
"Total algorithm executions",
)
# 收集直方图指标
yield from self._collect_histogram(
REQUEST_LATENCY_KEY,
"http_request_duration_seconds",
"HTTP request latency",
)
yield from self._collect_histogram(
ALGORITHM_LATENCY_KEY,
"algorithm_execution_duration_seconds",
"Algorithm execution latency",
)
# 收集仪表盘指标
yield from self._collect_gauge(
IN_PROGRESS_KEY,
"http_requests_in_progress",
"Number of HTTP requests in progress",
)
except Exception as e:
logger.error(f"收集指标失败: {e}")
def _collect_counter(self, redis_key: str, metric_name: str, description: str):
"""收集计数器指标"""
try:
data = self.redis_client.hgetall(redis_key)
if not data:
return
# 解析标签和值
metrics_data = []
for key, value in data.items():
if key.endswith(":timestamp"):
continue
labels = self._parse_labels(key)
metrics_data.append((labels, float(value)))
# 创建 Prometheus 指标
if metrics_data:
label_names = list(metrics_data[0][0].keys())
counter = CounterMetricFamily(metric_name, description, labels=label_names)
for labels, value in metrics_data:
counter.add_metric(list(labels.values()), value)
yield counter
except Exception as e:
logger.error(f"收集计数器 {redis_key} 失败: {e}")
def _collect_histogram(self, redis_key: str, metric_name: str, description: str):
"""收集直方图指标"""
try:
# 获取计数和总和
count_data = self.redis_client.hgetall(f"{redis_key}:count")
sum_data = self.redis_client.hgetall(f"{redis_key}:sum")
if not count_data:
return
metrics_data = []
for key in count_data.keys():
labels = self._parse_labels(key)
count = float(count_data.get(key, 0))
sum_value = float(sum_data.get(key, 0))
# 计算分位数(从 Sorted Set 中)
full_key = f"{redis_key}:{key}"
latencies = self._get_latencies(full_key)
buckets = self._calculate_buckets(latencies)
metrics_data.append((labels, count, sum_value, buckets))
# 创建 Prometheus 指标
if metrics_data:
label_names = list(metrics_data[0][0].keys())
histogram = HistogramMetricFamily(
metric_name, description, labels=label_names
)
for labels, count, sum_value, buckets in metrics_data:
histogram.add_metric(
list(labels.values()),
buckets=buckets,
sum_value=sum_value,
)
yield histogram
except Exception as e:
logger.error(f"收集直方图 {redis_key} 失败: {e}")
def _collect_gauge(self, redis_key: str, metric_name: str, description: str):
"""收集仪表盘指标"""
try:
data = self.redis_client.hgetall(redis_key)
if not data:
return
metrics_data = []
for key, value in data.items():
if key.endswith(":timestamp"):
continue
labels = self._parse_labels(key)
metrics_data.append((labels, float(value)))
# 创建 Prometheus 指标
if metrics_data:
label_names = list(metrics_data[0][0].keys())
gauge = GaugeMetricFamily(metric_name, description, labels=label_names)
for labels, value in metrics_data:
gauge.add_metric(list(labels.values()), value)
yield gauge
except Exception as e:
logger.error(f"收集仪表盘 {redis_key} 失败: {e}")
def _parse_labels(self, label_key: str) -> Dict[str, str]:
"""
解析标签键
Args:
label_key: 标签键字符串 (e.g., "method=GET,endpoint=/invoke,instance=host1")
Returns:
Dict[str, str]: 标签字典
"""
labels = {}
for pair in label_key.split(","):
if "=" in pair:
key, value = pair.split("=", 1)
labels[key] = value
return labels
def _get_latencies(self, key: str) -> List[float]:
"""从 Sorted Set 获取延迟数据"""
try:
data = self.redis_client.zrange(key, 0, -1)
latencies = []
for item in data:
# 格式: "timestamp:value"
if ":" in item:
_, value = item.rsplit(":", 1)
latencies.append(float(value))
return sorted(latencies)
except Exception as e:
logger.error(f"获取延迟数据失败: {e}")
return []
def _calculate_buckets(
self, latencies: List[float]
) -> List[Tuple[str, float]]:
"""
计算直方图桶
Args:
latencies: 延迟数据列表
Returns:
List[Tuple[str, float]]: 桶列表 [(上限, 计数), ...]
"""
if not latencies:
return [("+Inf", 0)]
# 定义桶边界(秒)
buckets_boundaries = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
buckets = []
for boundary in buckets_boundaries:
count = sum(1 for lat in latencies if lat <= boundary)
buckets.append((str(boundary), count))
# +Inf 桶
buckets.append(("+Inf", len(latencies)))
return buckets
# 创建全局收集器
redis_collector = RedisMetricsCollector()
def get_metrics() -> bytes:
"""
获取 Prometheus 格式的指标
Returns:
bytes: Prometheus 格式的指标数据
"""
registry = CollectorRegistry()
registry.register(redis_collector)
return generate_latest(registry)
if __name__ == "__main__":
# 测试
print(get_metrics().decode("utf-8"))

View File

@@ -0,0 +1,39 @@
"""分布式追踪模块"""
import uuid
from contextvars import ContextVar
from typing import Optional
# 使用 ContextVar 存储请求ID支持异步上下文
request_id_var: ContextVar[Optional[str]] = ContextVar("request_id", default=None)
def generate_request_id() -> str:
"""生成唯一的请求ID"""
return str(uuid.uuid4())
def get_request_id() -> Optional[str]:
"""获取当前请求ID"""
return request_id_var.get()
def set_request_id(request_id: str) -> None:
"""设置当前请求ID"""
request_id_var.set(request_id)
class TracingContext:
"""追踪上下文管理器"""
def __init__(self, request_id: Optional[str] = None):
self.request_id = request_id or generate_request_id()
self.token = None
def __enter__(self):
self.token = request_id_var.set(self.request_id)
return self.request_id
def __exit__(self, exc_type, exc_val, exc_tb):
if self.token:
request_id_var.reset(self.token)

View File

@@ -0,0 +1,138 @@
"""FastAPI 应用入口"""
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import Response
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
import logging
import time
from .api import router
from .config import settings
from .core.logging import setup_logging
from .core.metrics import metrics_registry, request_counter, request_latency, in_progress_requests
# 设置日志
setup_logging(level=settings.log_level, format_type=settings.log_format)
logger = logging.getLogger(__name__)
# 创建 FastAPI 应用
app = FastAPI(
title=settings.app_name,
description="算法工程化 Serverless 脚手架 - 提供标准化的算法服务接口",
version=settings.app_version,
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json",
)
# CORS 中间件
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 请求日志中间件
@app.middleware("http")
async def log_requests(request: Request, call_next):
"""记录所有HTTP请求"""
logger.info(f"Request: {request.method} {request.url.path}")
response = await call_next(request)
logger.info(f"Response: {response.status_code}")
return response
# 指标跟踪中间件
@app.middleware("http")
async def track_metrics(request: Request, call_next):
"""记录所有HTTP请求的指标"""
if not settings.metrics_enabled:
return await call_next(request)
# 跳过 /metrics 端点本身,避免循环记录
if request.url.path == "/metrics":
return await call_next(request)
in_progress_requests.inc()
start_time = time.time()
status = "success"
try:
response = await call_next(request)
# 根据 HTTP 状态码判断成功或失败
if response.status_code >= 400:
status = "error"
return response
except Exception as e:
status = "error"
raise e
finally:
elapsed = time.time() - start_time
request_counter.labels(
method=request.method,
endpoint=request.url.path,
status=status
).inc()
request_latency.labels(
method=request.method,
endpoint=request.url.path
).observe(elapsed)
in_progress_requests.dec()
# 注册路由
app.include_router(router, tags=["Algorithm"])
# Prometheus 指标端点
@app.get(
"/metrics",
tags=["Monitoring"],
summary="Prometheus 指标",
description="导出 Prometheus 格式的监控指标",
)
async def metrics():
"""
Prometheus 指标端点
返回应用的监控指标,供 Prometheus 抓取
"""
if not settings.metrics_enabled:
return Response(content="Metrics disabled", status_code=404)
return Response(
content=generate_latest(metrics_registry),
media_type=CONTENT_TYPE_LATEST,
)
# 启动事件
@app.on_event("startup")
async def startup_event():
"""应用启动时执行"""
logger.info(f"Starting {settings.app_name} v{settings.app_version}")
logger.info(f"Environment: {settings.app_env}")
logger.info(f"Metrics enabled: {settings.metrics_enabled}")
# 关闭事件
@app.on_event("shutdown")
async def shutdown_event():
"""应用关闭时执行"""
logger.info(f"Shutting down {settings.app_name}")
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"functional_scaffold.main:app",
host=settings.host,
port=settings.port,
reload=settings.app_env == "development",
log_level=settings.log_level.lower(),
)

View File

@@ -0,0 +1,5 @@
"""工具函数模块"""
from .validators import validate_integer, validate_positive_integer
__all__ = ["validate_integer", "validate_positive_integer"]

View File

@@ -0,0 +1,51 @@
"""参数校验工具"""
from typing import Any
from ..core.errors import ValidationError
def validate_integer(value: Any, field_name: str = "value") -> int:
"""
验证值是否为整数
Args:
value: 待验证的值
field_name: 字段名称(用于错误消息)
Returns:
int: 验证后的整数值
Raises:
ValidationError: 如果值不是整数
"""
if not isinstance(value, int) or isinstance(value, bool):
raise ValidationError(
f"{field_name} must be an integer",
details={"field": field_name, "value": value, "type": type(value).__name__},
)
return value
def validate_positive_integer(value: Any, field_name: str = "value") -> int:
"""
验证值是否为正整数
Args:
value: 待验证的值
field_name: 字段名称(用于错误消息)
Returns:
int: 验证后的正整数值
Raises:
ValidationError: 如果值不是正整数
"""
value = validate_integer(value, field_name)
if value <= 0:
raise ValidationError(
f"{field_name} must be a positive integer",
details={"field": field_name, "value": value},
)
return value