main:删除指标脚本并优化指标记录逻辑

变更内容:
- 删除 `start_metrics.sh` 脚本,精简项目结构,移除不再需要的启动逻辑。
- 优化 HTTP 请求指标记录,新增健康检查端点过滤和路径参数规范化功能。
- 更新文档,添加指标过滤及路径规范化的详细说明。
- 提高 Prometheus 指标的性能和可维护性,避免标签基数爆炸。
This commit is contained in:
2026-02-02 15:53:00 +08:00
parent 5feb795d12
commit c6f8714c4d
4 changed files with 167 additions and 118 deletions

View File

@@ -53,6 +53,27 @@ async def log_requests(request: Request, call_next):
return response
def normalize_path(path: str) -> str:
"""
规范化路径,将路径参数替换为模板形式
Args:
path: 原始路径
Returns:
规范化后的路径
Examples:
/jobs/a1b2c3d4e5f6 -> /jobs/{job_id}
/invoke -> /invoke
"""
# 匹配 /jobs/{任意字符串} 模式
if path.startswith("/jobs/") and len(path) > 6:
return "/jobs/{job_id}"
return path
# 指标跟踪中间件
@app.middleware("http")
async def track_metrics(request: Request, call_next):
@@ -60,8 +81,9 @@ async def track_metrics(request: Request, call_next):
if not settings.metrics_enabled:
return await call_next(request)
# 跳过 /metrics 端点本身,避免循环记录
if request.url.path == "/metrics":
# 跳过不需要记录指标的端点
skip_paths = {"/metrics", "/readyz", "/healthz"}
if request.url.path in skip_paths:
return await call_next(request)
gauge_incr("http_requests_in_progress")
@@ -79,13 +101,15 @@ async def track_metrics(request: Request, call_next):
raise e
finally:
elapsed = time.time() - start_time
# 使用规范化后的路径记录指标
normalized_path = normalize_path(request.url.path)
incr(
"http_requests_total",
{"method": request.method, "endpoint": request.url.path, "status": status},
{"method": request.method, "endpoint": normalized_path, "status": status},
)
observe(
"http_request_duration_seconds",
{"method": request.method, "endpoint": request.url.path},
{"method": request.method, "endpoint": normalized_path},
elapsed,
)
gauge_decr("http_requests_in_progress")