57 lines
1.6 KiB
Python
57 lines
1.6 KiB
Python
import asyncio
|
|
import logging
|
|
from app.engine import run_checks
|
|
from app.store import update
|
|
|
|
logger = logging.getLogger("health-agent")
|
|
|
|
async def run_service(service_cfg):
|
|
interval = service_cfg.get("interval", 30)
|
|
service_name = service_cfg.get("service", "unknown")
|
|
|
|
while True:
|
|
try:
|
|
healthy, checks = await run_checks(service_cfg)
|
|
|
|
# 🔹 Health result (PASS / FAIL) → dữ liệu hợp lệ
|
|
update(
|
|
service_name,
|
|
{
|
|
"status": "HEALTHY" if healthy else "UNHEALTHY",
|
|
"checks": checks,
|
|
}
|
|
)
|
|
|
|
if not healthy:
|
|
logger.warning(
|
|
"Service %s is UNHEALTHY", service_name
|
|
)
|
|
|
|
except asyncio.CancelledError:
|
|
# 🔹 App shutdown → task bị cancel
|
|
logger.info(
|
|
"Health check task stopped for service %s",
|
|
service_name
|
|
)
|
|
raise
|
|
|
|
except Exception as e:
|
|
# ❗ BUG của agent / engine (KHÔNG phải health fail)
|
|
update(
|
|
service_name,
|
|
{
|
|
"status": "UNHEALTHY",
|
|
"checks": [],
|
|
"error": str(e),
|
|
}
|
|
)
|
|
|
|
logger.error(
|
|
"Agent error while checking service %s: %s",
|
|
service_name,
|
|
e,
|
|
exc_info=True, # stacktrace chỉ dùng cho BUG
|
|
)
|
|
|
|
await asyncio.sleep(interval)
|