fix: complete remaining 8 optimization items
- HTTP connection pooling: UpstreamClient & WebsiteClient reuse httpx.Client - Deduplicate decimal_string into shared app/utils/number.py - Split scheduler transaction: snapshot write → webhook/website sync in separate sessions - Remove hardcoded 170.106.100.210 migration from database.py - Reset consecutive_failures on upstream update - Healthcheck: install curl, replace python -c with curl -f - Add .dockerignore to reduce build context - Frontend: add axios-retry with exponential backoff (5xx/network errors only)
This commit is contained in:
@@ -28,9 +28,16 @@ def get_scheduler() -> BackgroundScheduler:
|
||||
|
||||
|
||||
def _check_upstream(upstream_id: int) -> None:
|
||||
"""Full upstream check executed by scheduler (runs in thread)."""
|
||||
"""Full upstream check executed by scheduler (runs in thread).
|
||||
|
||||
Phase 1 — upstream API call + snapshot write (single transaction).
|
||||
Phase 2 — webhook/website sync (separate sessions, so a notification
|
||||
failure never rolls back the snapshot).
|
||||
"""
|
||||
settings = get_settings()
|
||||
# ── Phase 1: upstream check + DB write ──────────────────────────
|
||||
db: Session = SessionLocal()
|
||||
client = None
|
||||
try:
|
||||
upstream = db.query(Upstream).filter(Upstream.id == upstream_id).first()
|
||||
if not upstream or not upstream.enabled:
|
||||
@@ -47,6 +54,8 @@ def _check_upstream(upstream_id: int) -> None:
|
||||
)
|
||||
|
||||
was_unhealthy = upstream.last_status == "unhealthy"
|
||||
snapshot = None
|
||||
changes = None
|
||||
|
||||
try:
|
||||
client.login()
|
||||
@@ -61,16 +70,18 @@ def _check_upstream(upstream_id: int) -> None:
|
||||
upstream.last_error = str(exc)
|
||||
upstream.last_checked_at = datetime.now(timezone.utc)
|
||||
threshold = settings.unhealthy_threshold
|
||||
if upstream.consecutive_failures >= threshold and upstream.last_status != "unhealthy":
|
||||
became_unhealthy = (
|
||||
upstream.consecutive_failures >= threshold
|
||||
and upstream.last_status != "unhealthy"
|
||||
)
|
||||
if became_unhealthy:
|
||||
upstream.last_status = "unhealthy"
|
||||
db.commit()
|
||||
webhook_service.send_status_event(
|
||||
db, upstream.id, upstream.name, upstream.base_url,
|
||||
"upstream_unhealthy", str(exc)
|
||||
)
|
||||
else:
|
||||
db.commit()
|
||||
db.commit()
|
||||
logger.warning("upstream %s check failed: %s", upstream.name, exc)
|
||||
# Phase 2: notify unhealthy in a fresh session
|
||||
if became_unhealthy:
|
||||
_notify_status(upstream.id, upstream.name, upstream.base_url,
|
||||
"upstream_unhealthy", str(exc))
|
||||
return
|
||||
|
||||
# success path
|
||||
@@ -90,29 +101,70 @@ def _check_upstream(upstream_id: int) -> None:
|
||||
captured_at=datetime.now(timezone.utc),
|
||||
)
|
||||
db.add(new_row)
|
||||
prune_snapshots(db, upstream_id, settings.snapshot_retention_count)
|
||||
|
||||
# update upstream status
|
||||
upstream.last_status = "healthy"
|
||||
upstream.last_checked_at = datetime.now(timezone.utc)
|
||||
upstream.last_error = None
|
||||
upstream.consecutive_failures = 0
|
||||
prune_snapshots(db, upstream_id, settings.snapshot_retention_count)
|
||||
db.commit()
|
||||
|
||||
if was_unhealthy:
|
||||
webhook_service.send_status_event(
|
||||
db, upstream.id, upstream.name, upstream.base_url, "upstream_recovered"
|
||||
)
|
||||
logger.info(
|
||||
"upstream %s: %d rate change(s)" if changes else "upstream %s: no changes",
|
||||
upstream.name, len(changes) if changes else 0,
|
||||
)
|
||||
|
||||
if changes:
|
||||
webhook_service.send_rate_changed(
|
||||
db, upstream.id, upstream.name, upstream.base_url, changes
|
||||
)
|
||||
website_sync.sync_affected_bindings(db, upstream.id, changes)
|
||||
logger.info("upstream %s: %d rate change(s)", upstream.name, len(changes))
|
||||
else:
|
||||
logger.debug("upstream %s: no changes", upstream.name)
|
||||
finally:
|
||||
client.close()
|
||||
db.close()
|
||||
|
||||
# ── Phase 2: notifications (independent sessions) ──────────────
|
||||
if was_unhealthy:
|
||||
_notify_status(upstream_id, upstream.name, upstream.base_url, "upstream_recovered")
|
||||
|
||||
if changes:
|
||||
_notify_rate_changed(upstream_id, upstream.name, upstream.base_url, changes)
|
||||
_sync_website_bindings(upstream_id, changes)
|
||||
|
||||
|
||||
def _notify_status(
|
||||
upstream_id: int,
|
||||
upstream_name: str,
|
||||
base_url: str,
|
||||
event: str,
|
||||
error: str = "",
|
||||
) -> None:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
webhook_service.send_status_event(db, upstream_id, upstream_name, base_url, event, error)
|
||||
except Exception:
|
||||
logger.exception("status webhook failed for upstream %s", upstream_name)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _notify_rate_changed(
|
||||
upstream_id: int,
|
||||
upstream_name: str,
|
||||
base_url: str,
|
||||
changes: list[dict[str, Any]],
|
||||
) -> None:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
webhook_service.send_rate_changed(db, upstream_id, upstream_name, base_url, changes)
|
||||
except Exception:
|
||||
logger.exception("rate webhook failed for upstream %s", upstream_name)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _sync_website_bindings(upstream_id: int, changes: list[dict[str, Any]]) -> None:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
website_sync.sync_affected_bindings(db, upstream_id, changes)
|
||||
except Exception:
|
||||
logger.exception("website sync failed for upstream %s", upstream_id)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user