860 lines
36 KiB
Python
860 lines
36 KiB
Python
"""Managed Playwright browser sessions for custom pages."""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import re
|
|
import time
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any, Optional
|
|
from urllib.parse import urlparse
|
|
from uuid import uuid4
|
|
|
|
from app.config import get_settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class BrowserDependencyError(RuntimeError):
|
|
"""Raised when Playwright or its browser runtime is unavailable."""
|
|
|
|
|
|
class BrowserSessionError(RuntimeError):
|
|
"""Raised when an existing browser session can no longer be used."""
|
|
|
|
|
|
@dataclass
|
|
class BrowserTab:
|
|
id: str
|
|
page: Any
|
|
created_at: float
|
|
|
|
|
|
@dataclass
|
|
class BrowserSession:
|
|
id: str
|
|
custom_page_id: int
|
|
profile_key: str
|
|
context: Any
|
|
tabs: dict[str, BrowserTab]
|
|
active_tab_id: str
|
|
lock: asyncio.Lock
|
|
tab_revision: int = 0
|
|
cdp_session: Any = None
|
|
captured_headers: list[dict] = None # auth headers from CDP
|
|
last_saved_state_at: float = 0.0
|
|
|
|
@property
|
|
def active_tab(self) -> BrowserTab:
|
|
return self.tabs[self.active_tab_id]
|
|
|
|
@property
|
|
def page(self) -> Any:
|
|
return self.active_tab.page
|
|
|
|
|
|
class BrowserSessionService:
|
|
# Idle TTL: close sessions that haven't had activity for this long
|
|
IDLE_TTL_SECONDS = 1800 # 30 minutes
|
|
# Cap: max concurrent persistent sessions (excludes auth-capture)
|
|
MAX_SESSIONS = 10
|
|
|
|
def __init__(self) -> None:
|
|
self._playwright: Optional[Any] = None
|
|
self._sessions: dict[str, BrowserSession] = {}
|
|
self._profiles: dict[str, str] = {}
|
|
self._lock = asyncio.Lock()
|
|
self._last_event_at: dict[str, float] = {}
|
|
self._evict_task: Optional[asyncio.Task[None]] = None
|
|
|
|
async def create(
|
|
self,
|
|
custom_page_id: int,
|
|
url: str,
|
|
width: int = 1280,
|
|
height: int = 720,
|
|
login_config: Optional[dict[str, Any]] = None,
|
|
) -> BrowserSession:
|
|
if not url.startswith(("http://", "https://")):
|
|
raise ValueError("Only http/https URLs are allowed")
|
|
width = max(320, min(width, 2560))
|
|
height = max(240, min(height, 1600))
|
|
async with self._lock:
|
|
await self._ensure_playwright()
|
|
profile_key = self._profile_key(custom_page_id, url)
|
|
existing_id = self._profiles.get(profile_key)
|
|
existing = self._sessions.get(existing_id or "")
|
|
if existing and not existing.page.is_closed():
|
|
# Health check: verify session can actually serve content
|
|
healthy = True
|
|
try:
|
|
async with existing.lock:
|
|
url_before = existing.page.url
|
|
await existing.page.evaluate("1") # ping
|
|
await existing.page.screenshot(type="jpeg", quality=10, timeout=5000)
|
|
await existing.page.set_viewport_size({"width": width, "height": height})
|
|
if url_before == "about:blank":
|
|
await existing.page.goto(url, wait_until="domcontentloaded", timeout=45000)
|
|
await self._autofill_login(existing.page, login_config)
|
|
await self._reset_page_zoom(existing)
|
|
self._touch(existing.id)
|
|
except Exception:
|
|
logger.info("existing session %s unhealthy, recreating", existing.id[:12])
|
|
healthy = False
|
|
if healthy:
|
|
return existing
|
|
# Close unhealthy session (profile stays on disk)
|
|
await self.close(existing.id)
|
|
if existing_id:
|
|
self._profiles.pop(profile_key, None)
|
|
# Idle cleanup: close stale sessions before spawning new ones
|
|
await self._evict_idle_sessions()
|
|
|
|
context = await self._playwright.chromium.launch_persistent_context(
|
|
str(self._profile_dir(profile_key)),
|
|
headless=get_settings().browser_headless,
|
|
viewport={"width": width, "height": height},
|
|
color_scheme="dark",
|
|
args=["--no-sandbox", "--disable-dev-shm-usage"],
|
|
)
|
|
await self._restore_session_state(context, profile_key)
|
|
# Grant clipboard access for the page origin
|
|
try:
|
|
parsed = urlparse(url)
|
|
origin = f"{parsed.scheme}://{parsed.netloc}"
|
|
await context.grant_permissions(["clipboard-read", "clipboard-write"], origin=origin)
|
|
except Exception:
|
|
logger.debug("clipboard permission grant failed (non-fatal)")
|
|
page = context.pages[0] if context.pages else await context.new_page()
|
|
tab_id = uuid4().hex
|
|
tab = BrowserTab(id=tab_id, page=page, created_at=asyncio.get_event_loop().time())
|
|
session = BrowserSession(
|
|
id=uuid4().hex,
|
|
custom_page_id=custom_page_id,
|
|
profile_key=profile_key,
|
|
context=context,
|
|
tabs={tab_id: tab},
|
|
active_tab_id=tab_id,
|
|
lock=asyncio.Lock(),
|
|
)
|
|
self._sessions[session.id] = session
|
|
self._profiles[profile_key] = session.id
|
|
self._touch(session.id)
|
|
# Register page capture for multi-tab support
|
|
context.on("page", lambda p: self._handle_new_page(session, p))
|
|
# Evict again after adding the new session so cap is enforced immediately
|
|
await self._evict_idle_sessions()
|
|
try:
|
|
await page.goto(url, wait_until="domcontentloaded", timeout=45000)
|
|
await self._autofill_login(page, login_config)
|
|
await self._reset_page_zoom(session)
|
|
except Exception:
|
|
await self.close(session.id)
|
|
raise
|
|
return session
|
|
|
|
def _touch(self, session_id: str) -> None:
|
|
"""Mark a session as recently active (reset idle timer)."""
|
|
self._last_event_at[session_id] = asyncio.get_event_loop().time()
|
|
|
|
def _handle_new_page(self, session: BrowserSession, page: Any) -> None:
|
|
"""Capture a new page opened by the remote browser (e.g. target="_blank")."""
|
|
tab_id = uuid4().hex
|
|
tab = BrowserTab(id=tab_id, page=page, created_at=asyncio.get_event_loop().time())
|
|
session.tabs[tab_id] = tab
|
|
session.active_tab_id = tab_id
|
|
session.tab_revision += 1
|
|
logger.info("session %s: captured new tab %s (total: %d)", session.id[:12], tab_id[:8], len(session.tabs))
|
|
# Best-effort: bring to front and reset zoom
|
|
asyncio.create_task(self._init_new_tab(session, tab))
|
|
|
|
async def _init_new_tab(self, session: BrowserSession, tab: BrowserTab) -> None:
|
|
try:
|
|
await tab.page.bring_to_front()
|
|
await self._reset_page_zoom(session)
|
|
# Grant clipboard permission for the new page's origin if possible
|
|
try:
|
|
url = tab.page.url
|
|
if url.startswith("http"):
|
|
parsed = urlparse(url)
|
|
origin = f"{parsed.scheme}://{parsed.netloc}"
|
|
await session.context.grant_permissions(["clipboard-read", "clipboard-write"], origin=origin)
|
|
except Exception:
|
|
pass
|
|
except Exception as exc:
|
|
logger.debug("new tab %s init failed: %s", tab.id[:8], exc)
|
|
|
|
async def screenshot(self, session_id: str) -> bytes:
|
|
session = self._get(session_id)
|
|
self._touch(session_id)
|
|
async with session.lock:
|
|
self._ensure_open(session)
|
|
if session.profile_key and not session.profile_key.startswith("auth-capture-"):
|
|
now = time.monotonic()
|
|
if now - session.last_saved_state_at > 10.0:
|
|
await self._save_session_state(session)
|
|
session.last_saved_state_at = now
|
|
return await session.page.screenshot(type="jpeg", quality=65, full_page=False)
|
|
|
|
async def event(
|
|
self,
|
|
session_id: str,
|
|
event_type: str,
|
|
payload: dict[str, Any],
|
|
*,
|
|
include_state: bool = True,
|
|
) -> dict[str, Any] | None:
|
|
session = self._get(session_id)
|
|
self._last_event_at[session_id] = asyncio.get_event_loop().time()
|
|
async with session.lock:
|
|
self._ensure_open(session)
|
|
page = session.page
|
|
if event_type == "click":
|
|
await page.mouse.click(float(payload["x"]), float(payload["y"]), button=payload.get("button", "left"))
|
|
elif event_type == "dblclick":
|
|
await page.mouse.dblclick(float(payload["x"]), float(payload["y"]), button=payload.get("button", "left"))
|
|
elif event_type == "mousemove":
|
|
await page.mouse.move(float(payload["x"]), float(payload["y"]))
|
|
elif event_type == "mousedown":
|
|
await page.mouse.move(float(payload["x"]), float(payload["y"]))
|
|
await page.mouse.down(button=payload.get("button", "left"))
|
|
elif event_type == "mouseup":
|
|
await page.mouse.move(float(payload["x"]), float(payload["y"]))
|
|
await page.mouse.up(button=payload.get("button", "left"))
|
|
elif event_type == "type":
|
|
text = str(payload.get("text", ""))
|
|
if text:
|
|
await page.keyboard.insert_text(text)
|
|
elif event_type == "key":
|
|
key = str(payload.get("key", ""))
|
|
if key:
|
|
await page.keyboard.press(key)
|
|
elif event_type == "scroll":
|
|
if payload.get("x") is not None and payload.get("y") is not None:
|
|
await page.mouse.move(float(payload["x"]), float(payload["y"]))
|
|
await page.mouse.wheel(float(payload.get("delta_x", 0)), float(payload.get("delta_y", 0)))
|
|
elif event_type == "reload":
|
|
await page.reload(wait_until="domcontentloaded", timeout=45000)
|
|
elif event_type == "back":
|
|
await page.go_back(wait_until="domcontentloaded", timeout=45000)
|
|
elif event_type == "forward":
|
|
await page.go_forward(wait_until="domcontentloaded", timeout=45000)
|
|
elif event_type == "resize":
|
|
width = max(320, min(int(payload.get("width", 1280)), 2560))
|
|
height = max(240, min(int(payload.get("height", 720)), 1600))
|
|
await page.set_viewport_size({"width": width, "height": height})
|
|
else:
|
|
raise ValueError("Unsupported browser event")
|
|
if session.profile_key and not session.profile_key.startswith("auth-capture-"):
|
|
now = time.monotonic()
|
|
if now - session.last_saved_state_at > 5.0:
|
|
await self._save_session_state(session)
|
|
session.last_saved_state_at = now
|
|
|
|
if not include_state:
|
|
return None
|
|
return await self._session_state(session)
|
|
|
|
async def selected_text(self, session_id: str) -> str:
|
|
session = self._get(session_id)
|
|
self._touch(session_id)
|
|
async with session.lock:
|
|
self._ensure_open(session)
|
|
value = await session.page.evaluate("() => window.getSelection()?.toString() || ''")
|
|
return str(value or "")
|
|
|
|
async def read_clipboard(self, session_id: str) -> tuple[Optional[str], Optional[str]]:
|
|
"""Read the remote browser's clipboard text.
|
|
|
|
Returns (text, error_reason).
|
|
text is None when the clipboard is empty or unreadable.
|
|
error_reason is None on success or "empty" — non-None indicates a genuine failure.
|
|
"""
|
|
session = self._get(session_id)
|
|
self._touch(session_id)
|
|
async with session.lock:
|
|
self._ensure_open(session)
|
|
try:
|
|
result = await session.page.evaluate("""
|
|
async () => {
|
|
try {
|
|
const text = await navigator.clipboard.readText();
|
|
return text || null;
|
|
} catch (e) {
|
|
if (e instanceof DOMException) {
|
|
if (e.name === 'NotAllowedError') return 'ERROR:denied';
|
|
if (e.name === 'NotFoundError') return null;
|
|
}
|
|
return 'ERROR:' + (e.message || String(e));
|
|
}
|
|
}
|
|
""")
|
|
if result is None:
|
|
return None, None # empty clipboard
|
|
if isinstance(result, str) and result.startswith("ERROR:"):
|
|
reason = result[6:]
|
|
logger.debug("clipboard read error for %s: %s", session_id[:12], reason)
|
|
return None, reason
|
|
return str(result), None
|
|
except Exception as exc:
|
|
logger.warning("clipboard read failed for %s: %s", session_id[:12], exc)
|
|
return None, "read_failed"
|
|
|
|
async def close(self, session_id: str) -> None:
|
|
self._last_event_at.pop(session_id, None)
|
|
session = self._discard_session(session_id)
|
|
if not session:
|
|
return
|
|
|
|
# 在完全关闭 context 前,强制将最新的状态落盘保存
|
|
if session.profile_key and not session.profile_key.startswith("auth-capture-"):
|
|
try:
|
|
if not session.page.is_closed():
|
|
await self._save_session_state(session)
|
|
except Exception as exc:
|
|
logger.debug("failed to save state during close: %s", exc)
|
|
|
|
# Detach CDP session if active
|
|
if session.cdp_session:
|
|
try:
|
|
await session.cdp_session.detach()
|
|
except Exception:
|
|
pass
|
|
try:
|
|
await session.context.close()
|
|
except Exception:
|
|
pass
|
|
# Clean up ephemeral (auth-capture) profile directories
|
|
if session.profile_key and session.profile_key.startswith("auth-capture-"):
|
|
profile_dir = self._profile_dir(session.profile_key)
|
|
import shutil
|
|
try:
|
|
shutil.rmtree(profile_dir, ignore_errors=True)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
async def shutdown(self) -> None:
|
|
# Cancel the background eviction loop
|
|
if self._evict_task is not None and not self._evict_task.done():
|
|
self._evict_task.cancel()
|
|
try:
|
|
await self._evict_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
self._evict_task = None
|
|
sessions = list(self._sessions)
|
|
for session_id in sessions:
|
|
await self.close(session_id)
|
|
if self._playwright:
|
|
await self._playwright.stop()
|
|
self._playwright = None
|
|
|
|
async def state(self, session_id: str) -> dict[str, Any]:
|
|
session = self._get(session_id)
|
|
self._touch(session_id)
|
|
async with session.lock:
|
|
self._ensure_open(session)
|
|
return await self._session_state(session)
|
|
|
|
async def activate_tab(self, session_id: str, tab_id: str) -> dict[str, Any]:
|
|
session = self._get(session_id)
|
|
self._touch(session_id)
|
|
async with session.lock:
|
|
self._ensure_open(session)
|
|
if tab_id not in session.tabs:
|
|
raise KeyError("tab not found")
|
|
session.active_tab_id = tab_id
|
|
session.tab_revision += 1
|
|
await session.page.bring_to_front()
|
|
return await self._session_state(session)
|
|
|
|
async def close_tab(self, session_id: str, tab_id: str) -> dict[str, Any]:
|
|
session = self._get(session_id)
|
|
self._touch(session_id)
|
|
async with session.lock:
|
|
self._ensure_open(session)
|
|
if tab_id not in session.tabs:
|
|
raise KeyError("tab not found")
|
|
if len(session.tabs) <= 1:
|
|
raise ValueError("cannot close the last tab")
|
|
|
|
tab = session.tabs.pop(tab_id)
|
|
try:
|
|
await tab.page.close()
|
|
except Exception:
|
|
pass
|
|
|
|
if session.active_tab_id == tab_id:
|
|
# Pick the latest remaining tab
|
|
latest = max(session.tabs.values(), key=lambda t: t.created_at)
|
|
session.active_tab_id = latest.id
|
|
await session.page.bring_to_front()
|
|
|
|
session.tab_revision += 1
|
|
return await self._session_state(session)
|
|
|
|
async def _session_state(self, session: BrowserSession) -> dict[str, Any]:
|
|
tabs = []
|
|
# We might need to prune closed pages during state generation too
|
|
closed_ids = []
|
|
for tid, tab in session.tabs.items():
|
|
if tab.page.is_closed():
|
|
closed_ids.append(tid)
|
|
continue
|
|
try:
|
|
title = await tab.page.title()
|
|
url = tab.page.url
|
|
except Exception:
|
|
title, url = "Loading...", "about:blank"
|
|
tabs.append({
|
|
"id": tid,
|
|
"title": title,
|
|
"url": url,
|
|
"created_at": tab.created_at,
|
|
})
|
|
|
|
if closed_ids:
|
|
for cid in closed_ids:
|
|
session.tabs.pop(cid, None)
|
|
if not session.tabs:
|
|
raise BrowserSessionError("all browser pages are closed")
|
|
if session.active_tab_id in closed_ids:
|
|
latest = max(session.tabs.values(), key=lambda t: t.created_at)
|
|
session.active_tab_id = latest.id
|
|
session.tab_revision += 1
|
|
|
|
tabs.sort(key=lambda x: x["created_at"])
|
|
return {
|
|
"id": session.id,
|
|
"custom_page_id": session.custom_page_id,
|
|
"url": session.page.url,
|
|
"title": await session.page.title(),
|
|
"active_tab_id": session.active_tab_id,
|
|
"tabs": tabs,
|
|
"tab_revision": session.tab_revision,
|
|
}
|
|
|
|
async def _ensure_playwright(self) -> None:
|
|
if self._playwright:
|
|
return
|
|
try:
|
|
from playwright.async_api import async_playwright
|
|
except ImportError as exc:
|
|
raise BrowserDependencyError("Playwright is not installed. Run `pip install -r requirements.txt`.") from exc
|
|
try:
|
|
self._playwright = await async_playwright().start()
|
|
except Exception as exc:
|
|
raise BrowserDependencyError(f"Unable to start Playwright: {exc}") from exc
|
|
# Start background eviction loop
|
|
if self._evict_task is None or self._evict_task.done():
|
|
self._evict_task = asyncio.create_task(self._evict_loop())
|
|
|
|
async def _reset_page_zoom(self, session: BrowserSession) -> None:
|
|
try:
|
|
cdp = await session.context.new_cdp_session(session.page)
|
|
try:
|
|
await cdp.send("Emulation.setPageScaleFactor", {"pageScaleFactor": 1})
|
|
finally:
|
|
await cdp.detach()
|
|
except Exception:
|
|
pass
|
|
|
|
async def autofill_login(
|
|
self,
|
|
session_id: str,
|
|
login_config: Optional[dict[str, Any]],
|
|
) -> bool:
|
|
"""Public: manually trigger login autofill for an active session.
|
|
|
|
Only fills username/password fields — never auto-submits.
|
|
Returns True if fields were found and filled, False otherwise.
|
|
Never returns password data to the caller.
|
|
"""
|
|
session = self._get(session_id)
|
|
self._touch(session_id)
|
|
async with session.lock:
|
|
self._ensure_open(session)
|
|
return await self._autofill_login(session.page, login_config, max_wait_seconds=3.0, skip_submit=True)
|
|
|
|
async def _autofill_login(
|
|
self,
|
|
page: Any,
|
|
config: Optional[dict[str, Any]],
|
|
*,
|
|
max_wait_seconds: float = 2.0,
|
|
poll_interval_seconds: float = 0.25,
|
|
skip_submit: bool = False,
|
|
) -> bool:
|
|
if not config or not config.get("enabled"):
|
|
return False
|
|
username = str(config.get("username") or "")
|
|
password = str(config.get("password") or "")
|
|
if not username or not password:
|
|
return False
|
|
try:
|
|
username_selectors = [
|
|
config.get("username_selector"),
|
|
"input[type='email']",
|
|
"input[name*='user' i]",
|
|
"input[id*='user' i]",
|
|
"input[name*='email' i]",
|
|
"input[id*='email' i]",
|
|
"input[name*='login' i]",
|
|
"input[id*='login' i]",
|
|
"input[autocomplete='username']",
|
|
"input:not([type]), input[type='text']",
|
|
]
|
|
password_selectors = [
|
|
config.get("password_selector"),
|
|
"input[type='password']",
|
|
"input[autocomplete='current-password']",
|
|
]
|
|
username_locator, password_locator = await self._wait_for_login_locators(
|
|
page,
|
|
username_selectors,
|
|
password_selectors,
|
|
max_wait_seconds=max_wait_seconds,
|
|
poll_interval_seconds=poll_interval_seconds,
|
|
)
|
|
if not username_locator or not password_locator:
|
|
logger.info("Login autofill skipped: login fields not found")
|
|
return False
|
|
await username_locator.fill(username, timeout=3000)
|
|
await password_locator.fill(password, timeout=3000)
|
|
if not skip_submit:
|
|
submit_selector = str(config.get("submit_selector") or "").strip()
|
|
if submit_selector:
|
|
submit = await self._first_visible_locator(page, [submit_selector], timeout=500)
|
|
if submit:
|
|
await submit.click(timeout=3000)
|
|
return True
|
|
except Exception as exc:
|
|
logger.info("Login autofill skipped: %s", exc)
|
|
return False
|
|
|
|
async def _wait_for_login_locators(
|
|
self,
|
|
page: Any,
|
|
username_selectors: list[Optional[str]],
|
|
password_selectors: list[Optional[str]],
|
|
*,
|
|
max_wait_seconds: float,
|
|
poll_interval_seconds: float,
|
|
) -> tuple[Optional[Any], Optional[Any]]:
|
|
deadline = time.monotonic() + max_wait_seconds
|
|
while True:
|
|
username_locator = await self._first_visible_locator(page, username_selectors, timeout=150)
|
|
password_locator = await self._first_visible_locator(page, password_selectors, timeout=150)
|
|
if username_locator and password_locator:
|
|
return username_locator, password_locator
|
|
if time.monotonic() >= deadline:
|
|
return None, None
|
|
await asyncio.sleep(poll_interval_seconds)
|
|
|
|
async def _first_visible_locator(
|
|
self,
|
|
page: Any,
|
|
selectors: list[Optional[str]],
|
|
*,
|
|
timeout: float = 1500,
|
|
) -> Optional[Any]:
|
|
for selector in selectors:
|
|
selector = str(selector or "").strip()
|
|
if not selector:
|
|
continue
|
|
try:
|
|
locator = page.locator(selector).first
|
|
if await locator.count() and await locator.is_visible(timeout=timeout):
|
|
return locator
|
|
except Exception:
|
|
continue
|
|
return None
|
|
|
|
def get_session(self, session_id: str) -> BrowserSession:
|
|
"""Retrieve a session by id — raises KeyError if missing."""
|
|
session = self._sessions.get(session_id)
|
|
if not session:
|
|
raise KeyError("browser session not found")
|
|
return session
|
|
|
|
def find_by_page_id(self, custom_page_id: int) -> BrowserSession:
|
|
"""Find the active session for a custom page. Raises KeyError if none."""
|
|
for session in self._sessions.values():
|
|
if session.custom_page_id == custom_page_id and not session.page.is_closed():
|
|
return session
|
|
raise KeyError(f"no active browser session for page {custom_page_id}")
|
|
|
|
_get = get_session # alias for internal use
|
|
|
|
def _ensure_open(self, session: BrowserSession) -> None:
|
|
if session.active_tab.page.is_closed():
|
|
# Current tab closed? Try to cleanup and find another one
|
|
session.tabs.pop(session.active_tab_id, None)
|
|
if session.tabs:
|
|
# Pick the latest created tab
|
|
latest = max(session.tabs.values(), key=lambda t: t.created_at)
|
|
session.active_tab_id = latest.id
|
|
session.tab_revision += 1
|
|
logger.info("active tab closed, switched to %s", latest.id[:8])
|
|
else:
|
|
self._discard_session(session.id)
|
|
raise BrowserSessionError("all browser pages are closed")
|
|
|
|
def _discard_session(self, session_id: str) -> BrowserSession | None:
|
|
session = self._sessions.pop(session_id, None)
|
|
if session and self._profiles.get(session.profile_key) == session_id:
|
|
self._profiles.pop(session.profile_key, None)
|
|
return session
|
|
|
|
async def _evict_loop(self) -> None:
|
|
"""Background loop that runs every 5 minutes to evict idle sessions."""
|
|
while True:
|
|
await asyncio.sleep(300) # 5 minutes
|
|
try:
|
|
await self._evict_idle_sessions()
|
|
except Exception:
|
|
logger.exception("idle eviction loop error")
|
|
|
|
async def _evict_idle_sessions(self) -> None:
|
|
"""Close oldest idle sessions when over cap, or any past TTL."""
|
|
now = asyncio.get_event_loop().time()
|
|
# First: drop sessions past idle TTL (excluding just-created ones)
|
|
to_remove: list[str] = []
|
|
for sid, session in self._sessions.items():
|
|
if session.profile_key and session.profile_key.startswith("auth-capture-"):
|
|
continue # ephemeral sessions are handled separately
|
|
last_active = self._last_event_at.get(sid, 0.0)
|
|
if last_active > 0 and (now - last_active) > self.IDLE_TTL_SECONDS:
|
|
to_remove.append(sid)
|
|
for sid in to_remove:
|
|
logger.info("evicting idle session %s (no activity for >%ds)", sid[:12], self.IDLE_TTL_SECONDS)
|
|
await self.close(sid)
|
|
|
|
# Second: if still over cap, evict oldest by last_event_at
|
|
persistent = [(sid, s) for sid, s in self._sessions.items()
|
|
if not (s.profile_key or "").startswith("auth-capture-")]
|
|
if len(persistent) > self.MAX_SESSIONS:
|
|
persistent.sort(key=lambda x: self._last_event_at.get(x[0], 0.0))
|
|
excess = len(persistent) - self.MAX_SESSIONS
|
|
for sid, _ in persistent[:excess]:
|
|
logger.info("evicting session %s (over cap of %d)", sid[:12], self.MAX_SESSIONS)
|
|
await self.close(sid)
|
|
|
|
async def clear_profile(self, custom_page_id: int, url: str) -> None:
|
|
"""Close session for the page if active, then delete profile directory.
|
|
|
|
Raises RuntimeError if the directory cannot be fully removed.
|
|
"""
|
|
import shutil
|
|
# Close active session and use its profile_key (precise match)
|
|
profile_key: Optional[str] = None
|
|
try:
|
|
session = self.find_by_page_id(custom_page_id)
|
|
profile_key = session.profile_key
|
|
await self.close(session.id)
|
|
except KeyError:
|
|
pass
|
|
|
|
# Fallback: compute from URL (may be wrong if URL changed since session was created)
|
|
if not profile_key:
|
|
profile_key = self._profile_key(custom_page_id, url)
|
|
|
|
profile_dir = self._profile_dir(profile_key)
|
|
if profile_dir.exists():
|
|
shutil.rmtree(profile_dir) # no ignore_errors — let failure surface
|
|
if profile_dir.exists():
|
|
raise RuntimeError(
|
|
f"Failed to fully remove browser profile directory: {profile_dir}"
|
|
)
|
|
logger.info("cleared browser profile for page %d: %s", custom_page_id, profile_dir)
|
|
|
|
def _profile_dir(self, profile_key: str) -> Path:
|
|
root = Path(get_settings().browser_profiles_dir)
|
|
root.mkdir(parents=True, exist_ok=True)
|
|
profile = root / profile_key
|
|
profile.mkdir(parents=True, exist_ok=True)
|
|
return profile
|
|
|
|
def _cookies_path(self, profile_key: str) -> Path:
|
|
return self._profile_dir(profile_key) / "session-cookies.json"
|
|
|
|
def _profile_key(self, custom_page_id: int, url: str) -> str:
|
|
parsed = urlparse(url)
|
|
origin = f"{parsed.scheme}-{parsed.netloc}".lower()
|
|
safe_origin = re.sub(r"[^a-z0-9_.-]+", "_", origin).strip("_") or "page"
|
|
return f"page-{custom_page_id}-{safe_origin[:80]}"
|
|
|
|
async def create_ephemeral(
|
|
self,
|
|
url: str,
|
|
width: int = 1280,
|
|
height: int = 720,
|
|
) -> BrowserSession:
|
|
"""Create a temporary browser session without a CustomPage record.
|
|
|
|
The session uses an isolated random-named profile so it never collides
|
|
with persistent custom-page profiles. Caller MUST close() when done.
|
|
"""
|
|
if not url.startswith(("http://", "https://")):
|
|
raise ValueError("Only http/https URLs are allowed")
|
|
width = max(320, min(width, 2560))
|
|
height = max(240, min(height, 1600))
|
|
async with self._lock:
|
|
await self._ensure_playwright()
|
|
session_id = uuid4().hex
|
|
profile_key = f"auth-capture-{session_id[:12]}"
|
|
context = await self._playwright.chromium.launch_persistent_context(
|
|
str(self._profile_dir(profile_key)),
|
|
headless=get_settings().browser_headless,
|
|
viewport={"width": width, "height": height},
|
|
color_scheme="dark",
|
|
args=["--no-sandbox", "--disable-dev-shm-usage"],
|
|
)
|
|
# Grant clipboard access for the page origin
|
|
try:
|
|
parsed = urlparse(url)
|
|
origin = f"{parsed.scheme}://{parsed.netloc}"
|
|
await context.grant_permissions(["clipboard-read", "clipboard-write"], origin=origin)
|
|
except Exception:
|
|
logger.debug("clipboard permission grant failed (non-fatal)")
|
|
page = context.pages[0] if context.pages else await context.new_page()
|
|
tab_id = uuid4().hex
|
|
tab = BrowserTab(id=tab_id, page=page, created_at=asyncio.get_event_loop().time())
|
|
session = BrowserSession(
|
|
id=session_id,
|
|
custom_page_id=0,
|
|
profile_key=profile_key,
|
|
context=context,
|
|
tabs={tab_id: tab},
|
|
active_tab_id=tab_id,
|
|
lock=asyncio.Lock(),
|
|
captured_headers=[],
|
|
)
|
|
self._sessions[session.id] = session
|
|
self._touch(session.id)
|
|
# Register page capture
|
|
context.on("page", lambda p: self._handle_new_page(session, p))
|
|
# Start CDP network capture BEFORE the initial page load,
|
|
# so we capture login redirects and auth headers from the start.
|
|
await self._start_cdp_capture(session)
|
|
try:
|
|
await page.goto(url, wait_until="domcontentloaded", timeout=45000)
|
|
except Exception:
|
|
await self.close(session.id)
|
|
raise
|
|
return session
|
|
|
|
async def _start_cdp_capture(self, session: BrowserSession) -> None:
|
|
"""Enable CDP Network domain and capture Authorization headers."""
|
|
try:
|
|
cdp = await session.context.new_cdp_session(session.page)
|
|
await cdp.send("Network.enable")
|
|
|
|
def on_request(params: dict) -> None:
|
|
headers = params.get("request", {}).get("headers", {})
|
|
auth = headers.get("authorization") or headers.get("Authorization")
|
|
api_key = headers.get("x-api-key") or headers.get("X-API-Key")
|
|
url = params.get("request", {}).get("url", "")
|
|
if auth:
|
|
session.captured_headers.append({
|
|
"type": "authorization",
|
|
"value": auth,
|
|
"url": url,
|
|
})
|
|
if api_key:
|
|
session.captured_headers.append({
|
|
"type": "api_key",
|
|
"value": api_key,
|
|
"url": url,
|
|
})
|
|
|
|
cdp.on("Network.requestWillBeSent", on_request)
|
|
session.cdp_session = cdp
|
|
except Exception as exc:
|
|
logger.debug("CDP capture not available: %s", exc)
|
|
|
|
async def _save_session_state(self, session: BrowserSession) -> None:
|
|
if not session.profile_key or session.profile_key.startswith("auth-capture-"):
|
|
return
|
|
try:
|
|
state = await session.context.storage_state()
|
|
cookies_path = self._cookies_path(session.profile_key)
|
|
import json
|
|
import tempfile
|
|
import os
|
|
# Ensure parent directories exist
|
|
cookies_path.parent.mkdir(parents=True, exist_ok=True)
|
|
temp_fd, temp_path = tempfile.mkstemp(dir=str(cookies_path.parent))
|
|
try:
|
|
with os.fdopen(temp_fd, 'w', encoding='utf-8') as f:
|
|
json.dump(state, f, ensure_ascii=False, indent=2)
|
|
os.replace(temp_path, cookies_path)
|
|
except Exception:
|
|
try:
|
|
os.unlink(temp_path)
|
|
except Exception:
|
|
pass
|
|
raise
|
|
except Exception as exc:
|
|
logger.debug("failed to save session state for %s: %s", session.profile_key, exc)
|
|
|
|
async def _restore_session_state(self, context: Any, profile_key: str) -> None:
|
|
if profile_key.startswith("auth-capture-"):
|
|
return
|
|
cookies_path = self._cookies_path(profile_key)
|
|
if not cookies_path.exists() or cookies_path.stat().st_size == 0:
|
|
return
|
|
try:
|
|
import json
|
|
import time
|
|
with open(cookies_path, 'r', encoding='utf-8') as f:
|
|
state = json.load(f)
|
|
cookies = state.get("cookies", [])
|
|
if cookies:
|
|
now = time.time()
|
|
valid_cookies = []
|
|
for c in cookies:
|
|
expires = c.get("expires")
|
|
if expires is not None and expires > 0 and expires <= now:
|
|
continue
|
|
if expires is not None and expires <= 0:
|
|
c.pop("expires", None)
|
|
valid_cookies.append(c)
|
|
if valid_cookies:
|
|
await context.add_cookies(valid_cookies)
|
|
logger.info("restored %d cookies for profile %s", len(valid_cookies), profile_key)
|
|
|
|
# 还原 LocalStorage
|
|
origins = state.get("origins", [])
|
|
if origins:
|
|
origins_json = json.dumps(origins)
|
|
init_script = f"""
|
|
(() => {{
|
|
try {{
|
|
const origins = {origins_json};
|
|
const currentOrigin = window.location.origin;
|
|
const target = origins.find(o => o.origin === currentOrigin);
|
|
if (target && target.localStorage) {{
|
|
for (const item of target.localStorage) {{
|
|
try {{
|
|
window.localStorage.setItem(item.name, item.value);
|
|
}} catch (e) {{
|
|
console.error('Failed to restore localStorage key', item.name, e);
|
|
}}
|
|
}}
|
|
}}
|
|
}} catch (err) {{
|
|
console.error('LocalStorage restore initialization script failed', err);
|
|
}}
|
|
}})();
|
|
"""
|
|
await context.add_init_script(init_script)
|
|
logger.info("registered LocalStorage init script for profile %s (origins: %d)", profile_key, len(origins))
|
|
except Exception as exc:
|
|
logger.warning("failed to restore cookies/state for profile %s: %s", profile_key, exc)
|
|
|
|
|
|
browser_sessions = BrowserSessionService()
|