perf: remote browser CPU / zombie optimization
- Add tini (init: true in compose) to reap orphan Chromium processes - Reduce screenshot push frequency (active 0.12→0.20s, idle 0.35→1.00s, deep idle 1.00→5.00s, backoff 0.60→2.00s) - Add 5s timeout to screenshot in WebSocket push loop - close() now wraps context.close() in asyncio.wait_for(10s) with browser.close() fallback on timeout - Two-phase close logging (closing → closed / close_failed) - Auth-capture sessions evicted after 10min TTL - shutdown() with timeout protection and logging - close_ok correctly tracks success through browser fallback path
This commit is contained in:
@@ -21,6 +21,9 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
|
||||
RUN sed -i 's|http://deb.debian.org|https://mirrors.aliyun.com|g; s|http://security.debian.org|https://mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources
|
||||
|
||||
# Install tini as PID 1 to properly reap orphan Chromium zombie processes
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends tini && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 系统依赖层:apt 包安装,缓存 deb 包避免重复下载
|
||||
RUN --mount=type=cache,target=/var/cache/apt \
|
||||
apt-get update \
|
||||
@@ -59,4 +62,5 @@ ENV DATABASE_URL=sqlite:////app/data/app.db
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
@@ -265,12 +265,12 @@ async def clear_profile(custom_page_id: int, _=Depends(get_current_user)):
|
||||
|
||||
|
||||
# ——— WebSocket stream ———
|
||||
# Frame interval & diff detection
|
||||
_WS_MIN_INTERVAL = 0.10
|
||||
_WS_IDLE_INTERVAL = 0.35
|
||||
_WS_ACTIVE_INTERVAL = 0.12
|
||||
_WS_BACKOFF_INTERVAL = 0.60
|
||||
_WS_DEEP_IDLE_INTERVAL = 1.00
|
||||
# Frame interval & diff detection (tuned for CPU efficiency)
|
||||
_WS_MIN_INTERVAL = 0.15
|
||||
_WS_IDLE_INTERVAL = 1.00
|
||||
_WS_ACTIVE_INTERVAL = 0.20
|
||||
_WS_BACKOFF_INTERVAL = 2.00
|
||||
_WS_DEEP_IDLE_INTERVAL = 5.00
|
||||
_WS_ACTIVE_WINDOW = 1.25
|
||||
|
||||
|
||||
@@ -361,7 +361,12 @@ async def session_ws(
|
||||
state = await browser_sessions.state(session_id)
|
||||
await websocket.send_json({"type": "state", "session": state})
|
||||
|
||||
frame = await browser_sessions.screenshot(session_id)
|
||||
frame = await asyncio.wait_for(
|
||||
browser_sessions.screenshot(session_id), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("ws screenshot timeout for %s", session_id[:12])
|
||||
await asyncio.sleep(interval)
|
||||
continue
|
||||
except KeyError:
|
||||
await websocket.send_json({"error": "session_not_found"})
|
||||
break
|
||||
|
||||
@@ -154,6 +154,7 @@ class BrowserSessionService:
|
||||
except Exception:
|
||||
await self.close(session.id)
|
||||
raise
|
||||
logger.info("session created: %s (page=%s, profile=%s)", session.id[:12], custom_page_id, profile_key)
|
||||
return session
|
||||
|
||||
def _touch(self, session_id: str) -> None:
|
||||
@@ -325,6 +326,7 @@ class BrowserSessionService:
|
||||
session = self._discard_session(session_id)
|
||||
if not session:
|
||||
return
|
||||
logger.info("session closing: %s (page=%s, profile=%s)", session_id[:12], session.custom_page_id, session.profile_key)
|
||||
|
||||
# 在完全关闭 context 前,强制将最新的状态落盘保存
|
||||
if session.profile_key and not session.profile_key.startswith("auth-capture-"):
|
||||
@@ -340,10 +342,29 @@ class BrowserSessionService:
|
||||
await session.cdp_session.detach()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
close_ok = True
|
||||
# 关闭 context 带超时,避免永远挂起
|
||||
try:
|
||||
await session.context.close()
|
||||
except Exception:
|
||||
pass
|
||||
await asyncio.wait_for(session.context.close(), timeout=10.0)
|
||||
logger.info("session context closed: %s", session_id[:12])
|
||||
except asyncio.TimeoutError:
|
||||
close_ok = False
|
||||
logger.warning("session close timeout: %s (falling back to browser.close)", session_id[:12])
|
||||
try:
|
||||
browser = getattr(session.context, "browser", None)
|
||||
if browser is not None:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
close_ok = True
|
||||
logger.info("session browser fallback closed: %s", session_id[:12])
|
||||
else:
|
||||
logger.warning("session context.browser is None, cannot fallback: %s", session_id[:12])
|
||||
except Exception as exc:
|
||||
logger.warning("session browser fallback failed: %s: %s", session_id[:12], exc)
|
||||
except Exception as exc:
|
||||
close_ok = False
|
||||
logger.warning("session close error: %s: %s", session_id[:12], exc)
|
||||
|
||||
# Clean up ephemeral (auth-capture) profile directories
|
||||
if session.profile_key and session.profile_key.startswith("auth-capture-"):
|
||||
profile_dir = self._profile_dir(session.profile_key)
|
||||
@@ -353,6 +374,11 @@ class BrowserSessionService:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if close_ok:
|
||||
logger.info("session closed: %s", session_id[:12])
|
||||
else:
|
||||
logger.warning("session close_failed: %s", session_id[:12])
|
||||
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
# Cancel the background eviction loop
|
||||
@@ -364,10 +390,19 @@ class BrowserSessionService:
|
||||
pass
|
||||
self._evict_task = None
|
||||
sessions = list(self._sessions)
|
||||
if sessions:
|
||||
logger.info("shutdown: closing %d browser sessions", len(sessions))
|
||||
for session_id in sessions:
|
||||
await self.close(session_id)
|
||||
try:
|
||||
await asyncio.wait_for(self.close(session_id), timeout=15.0)
|
||||
except Exception as exc:
|
||||
logger.warning("shutdown close failed for %s: %s", session_id[:12], exc)
|
||||
if self._playwright:
|
||||
await self._playwright.stop()
|
||||
logger.info("shutdown: stopping playwright")
|
||||
try:
|
||||
await asyncio.wait_for(self._playwright.stop(), timeout=10.0)
|
||||
except Exception as exc:
|
||||
logger.warning("shutdown playwright stop failed: %s", exc)
|
||||
self._playwright = None
|
||||
|
||||
async def state(self, session_id: str) -> dict[str, Any]:
|
||||
@@ -638,18 +673,29 @@ class BrowserSessionService:
|
||||
logger.exception("idle eviction loop error")
|
||||
|
||||
async def _evict_idle_sessions(self) -> None:
|
||||
"""Close oldest idle sessions when over cap, or any past TTL."""
|
||||
"""Close oldest idle sessions when over cap, or any past TTL.
|
||||
|
||||
- Auth-capture sessions: max 10 minutes lifetime.
|
||||
- Remote browser sessions: close after IDLE_TTL_SECONDS of no WebSocket activity.
|
||||
"""
|
||||
now = asyncio.get_event_loop().time()
|
||||
# First: drop sessions past idle TTL (excluding just-created ones)
|
||||
to_remove: list[str] = []
|
||||
for sid, session in self._sessions.items():
|
||||
if session.profile_key and session.profile_key.startswith("auth-capture-"):
|
||||
continue # ephemeral sessions are handled separately
|
||||
last_active = self._last_event_at.get(sid, 0.0)
|
||||
if last_active > 0 and (now - last_active) > self.IDLE_TTL_SECONDS:
|
||||
to_remove.append(sid)
|
||||
# auth-capture: max 10 minute TTL from creation
|
||||
created = session.tabs.get(session.active_tab_id)
|
||||
if created:
|
||||
age = now - created.created_at
|
||||
if age > 600:
|
||||
to_remove.append(sid)
|
||||
logger.info("evicting auth-capture session %s (age=%ds > 600s)", sid[:12], int(age))
|
||||
else:
|
||||
# remote browser sessions: idle TTL
|
||||
last_active = self._last_event_at.get(sid, 0.0)
|
||||
if last_active > 0 and (now - last_active) > self.IDLE_TTL_SECONDS:
|
||||
to_remove.append(sid)
|
||||
logger.info("evicting idle session %s (no activity for >%ds)", sid[:12], self.IDLE_TTL_SECONDS)
|
||||
for sid in to_remove:
|
||||
logger.info("evicting idle session %s (no activity for >%ds)", sid[:12], self.IDLE_TTL_SECONDS)
|
||||
await self.close(sid)
|
||||
|
||||
# Second: if still over cap, evict oldest by last_event_at
|
||||
|
||||
@@ -24,6 +24,7 @@ services:
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
init: true
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/healthz"]
|
||||
interval: 30s
|
||||
|
||||
Reference in New Issue
Block a user