Compare commits
6 Commits
a4d2ad1e93
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| b47be9dda4 | |||
| 55419443cd | |||
| e0138d5531 | |||
| c92cac6ebb | |||
| c76ece8f48 | |||
| d211074576 |
17
CLAUDE.md
17
CLAUDE.md
@@ -372,10 +372,23 @@ kubectl apply -f deployment/kubernetes/service.yaml
|
||||
- 资源限制:256Mi-512Mi 内存,250m-500m CPU
|
||||
- 健康检查:存活探针 (/healthz),就绪探针 (/readyz)
|
||||
|
||||
### 阿里云函数计算
|
||||
### 阿里云函数计算(FC 3.0)
|
||||
|
||||
```bash
|
||||
fun deploy -t deployment/serverless/aliyun-fc.yaml
|
||||
# 安装 Serverless Devs(如未安装)
|
||||
npm install -g @serverless-devs/s
|
||||
|
||||
# 配置阿里云凭证(首次使用)
|
||||
s config add
|
||||
|
||||
# 部署到阿里云函数计算
|
||||
cd deployment/serverless && s deploy
|
||||
|
||||
# 验证配置语法
|
||||
cd deployment/serverless && s plan
|
||||
|
||||
# 查看函数日志
|
||||
cd deployment/serverless && s logs --tail
|
||||
```
|
||||
|
||||
### AWS Lambda
|
||||
|
||||
19
README.md
19
README.md
@@ -18,15 +18,16 @@
|
||||
|
||||
## 文档
|
||||
|
||||
| 文档 | 描述 |
|
||||
|-----------------------------------------|--------------|
|
||||
| [快速入门](docs/getting-started.md) | 10 分钟上手指南 |
|
||||
| [算法开发指南](docs/algorithm-development.md) | 详细的算法开发教程 |
|
||||
| [API 参考](docs/api-reference.md) | 完整的 API 文档 |
|
||||
| [监控指南](docs/monitoring.md) | 监控和告警配置 |
|
||||
| [API 规范](docs/api/README.md) | OpenAPI 规范说明 |
|
||||
| [Kubernetes 部署](docs/kubernetes-deployment.md) | K8s 集群部署指南 |
|
||||
| [日志集成(Loki)](docs/loki-quick-reference.md) | 日志收集部署说明 |
|
||||
| 文档 | 描述 |
|
||||
|------------------------------------------------|--------------|
|
||||
| [快速入门](docs/getting-started.md) | 10 分钟上手指南 |
|
||||
| [算法开发指南](docs/algorithm-development.md) | 详细的算法开发教程 |
|
||||
| [API 参考](docs/api-reference.md) | 完整的 API 文档 |
|
||||
| [监控指南](docs/monitoring.md) | 监控和告警配置 |
|
||||
| [API 规范](docs/api/README.md) | OpenAPI 规范说明 |
|
||||
| [Kubernetes 部署](docs/kubernetes-deployment.md) | K8s 集群部署指南 |
|
||||
| [日志集成(Loki)](docs/loki-quick-reference.md) | 日志收集部署说明 |
|
||||
| [阿里云函数运算FC部署入门](docs/fc-deploy.md) | 阿里云FC部署入门 |
|
||||
|
||||
## 快速开始
|
||||
|
||||
|
||||
@@ -45,7 +45,9 @@ services:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: deployment/Dockerfile
|
||||
image: crpi-om2xd9y8cmaizszf.cn-beijing.personal.cr.aliyuncs.com/test-namespace-gu/fc-test:latest
|
||||
platform: linux/amd64
|
||||
ports:
|
||||
- "8112:8000"
|
||||
environment:
|
||||
- APP_ENV=development
|
||||
- LOG_LEVEL=INFO
|
||||
@@ -70,6 +72,12 @@ services:
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/healthz')"]
|
||||
interval: 30s
|
||||
timeout: 3s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
deploy:
|
||||
replicas: 2
|
||||
|
||||
|
||||
@@ -127,16 +127,25 @@ spec:
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
# Worker 没有 HTTP 端口,使用命令探针
|
||||
# Worker 现在有 HTTP 健康检查端点
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- python
|
||||
- -c
|
||||
- "import redis; r = redis.Redis(host='functional-scaffold-redis'); r.ping()"
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8000
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readyz
|
||||
port: 8000
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
|
||||
---
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
# 阿里云函数计算配置
|
||||
ROSTemplateFormatVersion: '2015-09-01'
|
||||
Transform: 'Aliyun::Serverless-2018-04-03'
|
||||
Resources:
|
||||
functional-scaffold:
|
||||
Type: 'Aliyun::Serverless::Service'
|
||||
Properties:
|
||||
Description: '算法工程化 Serverless 脚手架'
|
||||
LogConfig:
|
||||
Project: functional-scaffold-logs
|
||||
Logstore: function-logs
|
||||
VpcConfig:
|
||||
VpcId: 'vpc-xxxxx'
|
||||
VSwitchIds:
|
||||
- 'vsw-xxxxx'
|
||||
SecurityGroupId: 'sg-xxxxx'
|
||||
prime-checker:
|
||||
Type: 'Aliyun::Serverless::Function'
|
||||
Properties:
|
||||
Description: '质数判断算法服务(API)'
|
||||
Runtime: custom-container
|
||||
MemorySize: 512
|
||||
Timeout: 60
|
||||
InstanceConcurrency: 10
|
||||
CAPort: 8000
|
||||
CustomContainerConfig:
|
||||
Image: 'registry.cn-hangzhou.aliyuncs.com/your-namespace/functional-scaffold:latest'
|
||||
Command: '["/app/entrypoint.sh"]'
|
||||
EnvironmentVariables:
|
||||
APP_ENV: production
|
||||
LOG_LEVEL: INFO
|
||||
METRICS_ENABLED: 'true'
|
||||
RUN_MODE: api
|
||||
REDIS_HOST: 'r-xxxxx.redis.rds.aliyuncs.com'
|
||||
REDIS_PORT: '6379'
|
||||
Events:
|
||||
httpTrigger:
|
||||
Type: HTTP
|
||||
Properties:
|
||||
AuthType: ANONYMOUS
|
||||
Methods:
|
||||
- GET
|
||||
- POST
|
||||
job-worker:
|
||||
Type: 'Aliyun::Serverless::Function'
|
||||
Properties:
|
||||
Description: '异步任务 Worker'
|
||||
Runtime: custom-container
|
||||
MemorySize: 512
|
||||
Timeout: 900
|
||||
InstanceConcurrency: 1
|
||||
CustomContainerConfig:
|
||||
Image: 'registry.cn-hangzhou.aliyuncs.com/your-namespace/functional-scaffold:latest'
|
||||
Command: '["/app/entrypoint.sh"]'
|
||||
EnvironmentVariables:
|
||||
APP_ENV: production
|
||||
LOG_LEVEL: INFO
|
||||
METRICS_ENABLED: 'true'
|
||||
RUN_MODE: worker
|
||||
REDIS_HOST: 'r-xxxxx.redis.rds.aliyuncs.com'
|
||||
REDIS_PORT: '6379'
|
||||
WORKER_POLL_INTERVAL: '1.0'
|
||||
MAX_CONCURRENT_JOBS: '5'
|
||||
JOB_MAX_RETRIES: '3'
|
||||
JOB_EXECUTION_TIMEOUT: '300'
|
||||
Events:
|
||||
timerTrigger:
|
||||
Type: Timer
|
||||
Properties:
|
||||
CronExpression: '0 */1 * * * *'
|
||||
Enable: true
|
||||
Payload: '{}'
|
||||
108
deployment/serverless/s.yaml
Normal file
108
deployment/serverless/s.yaml
Normal file
@@ -0,0 +1,108 @@
|
||||
# 阿里云函数计算 FC 3.0 配置
|
||||
# 使用 Serverless Devs 部署: cd deployment/serverless && s deploy
|
||||
edition: 3.0.0
|
||||
name: functional-scaffold
|
||||
access: default
|
||||
|
||||
vars:
|
||||
region: cn-beijing
|
||||
image: crpi-om2xd9y8cmaizszf-vpc.cn-beijing.personal.cr.aliyuncs.com/your-namespace/fc-test:test-v1
|
||||
redis_host: 127.31.1.1
|
||||
redis_port: "6379"
|
||||
redis_password: "your-password"
|
||||
|
||||
resources:
|
||||
# API 服务函数
|
||||
prime-checker-api:
|
||||
component: fc3
|
||||
props:
|
||||
region: ${vars.region}
|
||||
functionName: prime-checker-api
|
||||
description: 质数判断算法服务(API)
|
||||
runtime: custom-container
|
||||
cpu: 0.35
|
||||
memorySize: 512
|
||||
diskSize: 512
|
||||
timeout: 60
|
||||
instanceConcurrency: 10
|
||||
handler: not-used
|
||||
customContainerConfig:
|
||||
image: ${vars.image}
|
||||
port: 8000
|
||||
command:
|
||||
- /app/entrypoint.sh
|
||||
healthCheckConfig:
|
||||
httpGetUrl: /healthz
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
successThreshold: 1
|
||||
environmentVariables:
|
||||
APP_ENV: production
|
||||
LOG_LEVEL: INFO
|
||||
METRICS_ENABLED: "true"
|
||||
RUN_MODE: api
|
||||
REDIS_HOST: ${vars.redis_host}
|
||||
REDIS_PORT: ${vars.redis_port}
|
||||
REDIS_PASSWORD: ${vars.redis_password}
|
||||
vpcConfig: auto
|
||||
logConfig: auto
|
||||
triggers:
|
||||
- triggerName: http-trigger
|
||||
triggerType: http
|
||||
triggerConfig:
|
||||
authType: anonymous
|
||||
methods:
|
||||
- GET
|
||||
- POST
|
||||
- PUT
|
||||
- DELETE
|
||||
|
||||
# 异步任务 Worker 函数
|
||||
job-worker:
|
||||
component: fc3
|
||||
props:
|
||||
region: ${vars.region}
|
||||
functionName: job-worker
|
||||
description: 异步任务 Worker
|
||||
runtime: custom-container
|
||||
cpu: 0.35
|
||||
memorySize: 512
|
||||
diskSize: 512
|
||||
timeout: 900
|
||||
instanceConcurrency: 1
|
||||
handler: not-used
|
||||
customContainerConfig:
|
||||
image: ${vars.image}
|
||||
port: 8000
|
||||
command:
|
||||
- /app/entrypoint.sh
|
||||
healthCheckConfig:
|
||||
httpGetUrl: /healthz
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
successThreshold: 1
|
||||
environmentVariables:
|
||||
APP_ENV: production
|
||||
LOG_LEVEL: INFO
|
||||
METRICS_ENABLED: "true"
|
||||
RUN_MODE: worker
|
||||
REDIS_HOST: ${vars.redis_host}
|
||||
REDIS_PORT: ${vars.redis_port}
|
||||
REDIS_PASSWORD: ${vars.redis_password}
|
||||
WORKER_POLL_INTERVAL: "1.0"
|
||||
MAX_CONCURRENT_JOBS: "5"
|
||||
JOB_MAX_RETRIES: "3"
|
||||
JOB_EXECUTION_TIMEOUT: "300"
|
||||
vpcConfig: auto
|
||||
logConfig: auto
|
||||
triggers:
|
||||
- triggerName: timer-trigger
|
||||
triggerType: timer
|
||||
triggerConfig:
|
||||
cronExpression: "0 */1 * * * *"
|
||||
enable: true
|
||||
payload: "{}"
|
||||
58
docs/fc-deploy.md
Normal file
58
docs/fc-deploy.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# 阿里云 函数运算FC 部署入门
|
||||
|
||||
本指南帮助快速上手 FunctionalScaffold 脚手架,在 10 分钟内完成第一个算法服务的开发和部署。
|
||||
|
||||
## 环境准备
|
||||
|
||||
- 安装 [Serverless Devs CLI](https://serverless-devs.com/docs/overview)
|
||||
|
||||
1. 首先安装Node 环境,在Node官网下载
|
||||
- [Node.js 下载地址](https://nodejs.org/en/download/)
|
||||
2. 安装 Serverless Devs CLI
|
||||
|
||||
```bash
|
||||
npm install @serverless-devs/s -g
|
||||
```
|
||||
|
||||
## 初始化 serverless dev cli 配置
|
||||
|
||||
执行以下命令初始化 serverless dev cli 配置
|
||||
|
||||
```bash
|
||||
s config add
|
||||
```
|
||||
|
||||
根据引导进行操作,填入你的access key id 和 access key secret
|
||||
|
||||
## 部署算法服务
|
||||
|
||||
部署算法服务前,请确保已经完成环境准备和配置。
|
||||
|
||||
修改 `s.yaml` 文件中的 vars 部分
|
||||
|
||||
```yaml
|
||||
# 阿里云函数计算 FC 3.0 配置
|
||||
# 使用 Serverless Devs 部署: cd deployment/serverless && s deploy
|
||||
edition: 3.0.0
|
||||
name: functional-scaffold
|
||||
access: default
|
||||
|
||||
vars:
|
||||
region: cn-hangzhou # 换成你的区域
|
||||
image: registry.cn-hangzhou.aliyuncs.com/your-namespace/functional-scaffold:latest # 换成你的docker 镜像
|
||||
redis_host: r-xxxxx.redis.rds.aliyuncs.com # 换成你的redis连接
|
||||
redis_port: "6379" # redis 端口号
|
||||
redis_password: "your-password" #redis 密码,如果没有可留空
|
||||
```
|
||||
|
||||
```bash
|
||||
cd deployment && s deploy
|
||||
```
|
||||
|
||||
部署完成后,可以在控制台查看服务的运行状态和日志。
|
||||
|
||||
## 删除算法服务
|
||||
|
||||
```bash
|
||||
cd deployment && s remove
|
||||
```
|
||||
@@ -25,6 +25,8 @@ dependencies = [
|
||||
"pyyaml>=6.0.0",
|
||||
# HTTP 客户端(Webhook 回调)
|
||||
"httpx>=0.27.0",
|
||||
# 轻量级 HTTP 服务器(Worker 健康检查)
|
||||
"aiohttp>=3.9.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
@@ -5,6 +5,7 @@ pydantic>=2.5.0
|
||||
pydantic-settings>=2.0.0
|
||||
prometheus-client>=0.19.0
|
||||
python-json-logger>=2.0.7
|
||||
aiohttp>=3.9.0
|
||||
|
||||
# Redis - 任务队列和指标存储
|
||||
redis>=5.0.0
|
||||
|
||||
@@ -9,6 +9,8 @@ import signal
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
from aiohttp import web
|
||||
|
||||
from .config import settings
|
||||
from .core.job_manager import JobManager
|
||||
from .core.logging import setup_logging
|
||||
@@ -17,6 +19,53 @@ from .core.tracing import set_request_id
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HealthCheckServer:
|
||||
"""轻量级健康检查 HTTP 服务器
|
||||
|
||||
为 Worker 模式提供健康检查端点,满足 FC 3.0 容器健康检查要求。
|
||||
"""
|
||||
|
||||
def __init__(self, host: str = "0.0.0.0", port: int = 8000):
|
||||
self._host = host
|
||||
self._port = port
|
||||
self._app: Optional[web.Application] = None
|
||||
self._runner: Optional[web.AppRunner] = None
|
||||
self._site: Optional[web.TCPSite] = None
|
||||
self._healthy = True
|
||||
|
||||
async def start(self) -> None:
|
||||
"""启动健康检查服务器"""
|
||||
self._app = web.Application()
|
||||
self._app.router.add_get("/healthz", self._healthz_handler)
|
||||
self._app.router.add_get("/readyz", self._readyz_handler)
|
||||
|
||||
self._runner = web.AppRunner(self._app)
|
||||
await self._runner.setup()
|
||||
self._site = web.TCPSite(self._runner, self._host, self._port)
|
||||
await self._site.start()
|
||||
logger.info(f"健康检查服务器已启动: http://{self._host}:{self._port}")
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""停止健康检查服务器"""
|
||||
if self._runner:
|
||||
await self._runner.cleanup()
|
||||
logger.info("健康检查服务器已停止")
|
||||
|
||||
def set_healthy(self, healthy: bool) -> None:
|
||||
"""设置健康状态"""
|
||||
self._healthy = healthy
|
||||
|
||||
async def _healthz_handler(self, request: web.Request) -> web.Response:
|
||||
"""存活检查端点"""
|
||||
return web.json_response({"status": "healthy", "mode": "worker"})
|
||||
|
||||
async def _readyz_handler(self, request: web.Request) -> web.Response:
|
||||
"""就绪检查端点"""
|
||||
if self._healthy:
|
||||
return web.json_response({"status": "ready", "mode": "worker"})
|
||||
return web.json_response({"status": "not ready"}, status=503)
|
||||
|
||||
|
||||
class JobWorker:
|
||||
"""任务 Worker
|
||||
|
||||
@@ -272,12 +321,21 @@ class JobWorker:
|
||||
logger.error(f"超时任务回收异常: {e}")
|
||||
|
||||
|
||||
def setup_signal_handlers(worker: JobWorker, loop: asyncio.AbstractEventLoop) -> None:
|
||||
def setup_signal_handlers(
|
||||
worker: JobWorker,
|
||||
health_server: HealthCheckServer,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
) -> None:
|
||||
"""设置信号处理器"""
|
||||
|
||||
async def shutdown_all() -> None:
|
||||
"""关闭所有服务"""
|
||||
await worker.shutdown()
|
||||
await health_server.stop()
|
||||
|
||||
def signal_handler(sig: signal.Signals) -> None:
|
||||
logger.info(f"收到信号 {sig.name},准备关闭...")
|
||||
loop.create_task(worker.shutdown())
|
||||
loop.create_task(shutdown_all())
|
||||
|
||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||
loop.add_signal_handler(sig, signal_handler, sig)
|
||||
@@ -288,13 +346,19 @@ async def main() -> None:
|
||||
# 设置日志
|
||||
setup_logging(level=settings.log_level, format_type=settings.log_format)
|
||||
|
||||
# 创建健康检查服务器和 Worker
|
||||
health_server = HealthCheckServer(port=8000)
|
||||
worker = JobWorker()
|
||||
|
||||
# 设置信号处理
|
||||
loop = asyncio.get_running_loop()
|
||||
setup_signal_handlers(worker, loop)
|
||||
setup_signal_handlers(worker, health_server, loop)
|
||||
|
||||
try:
|
||||
# 先启动健康检查服务器,确保 FC 健康检查能通过
|
||||
await health_server.start()
|
||||
|
||||
# 初始化并运行 Worker
|
||||
await worker.initialize()
|
||||
await worker.run()
|
||||
except Exception as e:
|
||||
@@ -302,6 +366,7 @@ async def main() -> None:
|
||||
sys.exit(1)
|
||||
finally:
|
||||
await worker.shutdown()
|
||||
await health_server.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user