diff --git a/Dockerfile b/Dockerfile index d2b17c0..b37774d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,6 +21,9 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright RUN sed -i 's|http://deb.debian.org|https://mirrors.aliyun.com|g; s|http://security.debian.org|https://mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources +# Install tini as PID 1 to properly reap orphan Chromium zombie processes +RUN apt-get update && apt-get install -y --no-install-recommends tini && rm -rf /var/lib/apt/lists/* + # 系统依赖层:apt 包安装,缓存 deb 包避免重复下载 RUN --mount=type=cache,target=/var/cache/apt \ apt-get update \ @@ -59,4 +62,5 @@ ENV DATABASE_URL=sqlite:////app/data/app.db EXPOSE 8000 +ENTRYPOINT ["tini", "--"] CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/app/routers/browser_sessions.py b/backend/app/routers/browser_sessions.py index 74bcf66..3a565f0 100644 --- a/backend/app/routers/browser_sessions.py +++ b/backend/app/routers/browser_sessions.py @@ -265,12 +265,12 @@ async def clear_profile(custom_page_id: int, _=Depends(get_current_user)): # ——— WebSocket stream ——— -# Frame interval & diff detection -_WS_MIN_INTERVAL = 0.10 -_WS_IDLE_INTERVAL = 0.35 -_WS_ACTIVE_INTERVAL = 0.12 -_WS_BACKOFF_INTERVAL = 0.60 -_WS_DEEP_IDLE_INTERVAL = 1.00 +# Frame interval & diff detection (tuned for CPU efficiency) +_WS_MIN_INTERVAL = 0.15 +_WS_IDLE_INTERVAL = 1.00 +_WS_ACTIVE_INTERVAL = 0.20 +_WS_BACKOFF_INTERVAL = 2.00 +_WS_DEEP_IDLE_INTERVAL = 5.00 _WS_ACTIVE_WINDOW = 1.25 @@ -361,7 +361,12 @@ async def session_ws( state = await browser_sessions.state(session_id) await websocket.send_json({"type": "state", "session": state}) - frame = await browser_sessions.screenshot(session_id) + frame = await asyncio.wait_for( + browser_sessions.screenshot(session_id), timeout=5.0) + except asyncio.TimeoutError: + logger.warning("ws screenshot timeout for %s", session_id[:12]) + await asyncio.sleep(interval) + continue except KeyError: await websocket.send_json({"error": "session_not_found"}) break diff --git a/backend/app/services/browser_session_service.py b/backend/app/services/browser_session_service.py index 89ddfeb..c4faba5 100644 --- a/backend/app/services/browser_session_service.py +++ b/backend/app/services/browser_session_service.py @@ -154,6 +154,7 @@ class BrowserSessionService: except Exception: await self.close(session.id) raise + logger.info("session created: %s (page=%s, profile=%s)", session.id[:12], custom_page_id, profile_key) return session def _touch(self, session_id: str) -> None: @@ -325,6 +326,7 @@ class BrowserSessionService: session = self._discard_session(session_id) if not session: return + logger.info("session closing: %s (page=%s, profile=%s)", session_id[:12], session.custom_page_id, session.profile_key) # 在完全关闭 context 前,强制将最新的状态落盘保存 if session.profile_key and not session.profile_key.startswith("auth-capture-"): @@ -340,10 +342,29 @@ class BrowserSessionService: await session.cdp_session.detach() except Exception: pass + + close_ok = True + # 关闭 context 带超时,避免永远挂起 try: - await session.context.close() - except Exception: - pass + await asyncio.wait_for(session.context.close(), timeout=10.0) + logger.info("session context closed: %s", session_id[:12]) + except asyncio.TimeoutError: + close_ok = False + logger.warning("session close timeout: %s (falling back to browser.close)", session_id[:12]) + try: + browser = getattr(session.context, "browser", None) + if browser is not None: + await asyncio.wait_for(browser.close(), timeout=5.0) + close_ok = True + logger.info("session browser fallback closed: %s", session_id[:12]) + else: + logger.warning("session context.browser is None, cannot fallback: %s", session_id[:12]) + except Exception as exc: + logger.warning("session browser fallback failed: %s: %s", session_id[:12], exc) + except Exception as exc: + close_ok = False + logger.warning("session close error: %s: %s", session_id[:12], exc) + # Clean up ephemeral (auth-capture) profile directories if session.profile_key and session.profile_key.startswith("auth-capture-"): profile_dir = self._profile_dir(session.profile_key) @@ -353,6 +374,11 @@ class BrowserSessionService: except Exception: pass + if close_ok: + logger.info("session closed: %s", session_id[:12]) + else: + logger.warning("session close_failed: %s", session_id[:12]) + async def shutdown(self) -> None: # Cancel the background eviction loop @@ -364,10 +390,19 @@ class BrowserSessionService: pass self._evict_task = None sessions = list(self._sessions) + if sessions: + logger.info("shutdown: closing %d browser sessions", len(sessions)) for session_id in sessions: - await self.close(session_id) + try: + await asyncio.wait_for(self.close(session_id), timeout=15.0) + except Exception as exc: + logger.warning("shutdown close failed for %s: %s", session_id[:12], exc) if self._playwright: - await self._playwright.stop() + logger.info("shutdown: stopping playwright") + try: + await asyncio.wait_for(self._playwright.stop(), timeout=10.0) + except Exception as exc: + logger.warning("shutdown playwright stop failed: %s", exc) self._playwright = None async def state(self, session_id: str) -> dict[str, Any]: @@ -638,18 +673,29 @@ class BrowserSessionService: logger.exception("idle eviction loop error") async def _evict_idle_sessions(self) -> None: - """Close oldest idle sessions when over cap, or any past TTL.""" + """Close oldest idle sessions when over cap, or any past TTL. + + - Auth-capture sessions: max 10 minutes lifetime. + - Remote browser sessions: close after IDLE_TTL_SECONDS of no WebSocket activity. + """ now = asyncio.get_event_loop().time() - # First: drop sessions past idle TTL (excluding just-created ones) to_remove: list[str] = [] for sid, session in self._sessions.items(): if session.profile_key and session.profile_key.startswith("auth-capture-"): - continue # ephemeral sessions are handled separately - last_active = self._last_event_at.get(sid, 0.0) - if last_active > 0 and (now - last_active) > self.IDLE_TTL_SECONDS: - to_remove.append(sid) + # auth-capture: max 10 minute TTL from creation + created = session.tabs.get(session.active_tab_id) + if created: + age = now - created.created_at + if age > 600: + to_remove.append(sid) + logger.info("evicting auth-capture session %s (age=%ds > 600s)", sid[:12], int(age)) + else: + # remote browser sessions: idle TTL + last_active = self._last_event_at.get(sid, 0.0) + if last_active > 0 and (now - last_active) > self.IDLE_TTL_SECONDS: + to_remove.append(sid) + logger.info("evicting idle session %s (no activity for >%ds)", sid[:12], self.IDLE_TTL_SECONDS) for sid in to_remove: - logger.info("evicting idle session %s (no activity for >%ds)", sid[:12], self.IDLE_TTL_SECONDS) await self.close(sid) # Second: if still over cap, evict oldest by last_event_at diff --git a/docker-compose.yml b/docker-compose.yml index 816d8f5..27db26d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,6 +24,7 @@ services: options: max-size: "10m" max-file: "3" + init: true healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/healthz"] interval: 30s