Remove server remote browser support
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
"""Auth credential extraction from remote browser sessions."""
|
||||
"""Credential candidate curation for real-browser auth imports."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
@@ -23,116 +23,6 @@ SESSION_COOKIE_NAMES = frozenset({
|
||||
})
|
||||
|
||||
|
||||
async def extract_cookies(session: Any) -> list[dict[str, Any]]:
|
||||
"""Extract all cookies from the browser context."""
|
||||
cookies = await session.context.cookies()
|
||||
return [
|
||||
{
|
||||
"name": c.get("name", ""),
|
||||
"value": c.get("value", ""),
|
||||
"domain": c.get("domain", ""),
|
||||
"httpOnly": c.get("httpOnly", False),
|
||||
"secure": c.get("secure", False),
|
||||
}
|
||||
for c in cookies
|
||||
]
|
||||
|
||||
|
||||
async def extract_local_storage(page: Any) -> dict[str, str]:
|
||||
try:
|
||||
raw = await page.evaluate("() => JSON.stringify(window.localStorage)")
|
||||
if isinstance(raw, str):
|
||||
return json.loads(raw)
|
||||
return raw or {}
|
||||
except Exception as exc:
|
||||
logger.debug("localStorage extraction failed: %s", exc)
|
||||
return {}
|
||||
|
||||
|
||||
async def extract_session_storage(page: Any) -> dict[str, str]:
|
||||
try:
|
||||
raw = await page.evaluate("() => JSON.stringify(window.sessionStorage)")
|
||||
if isinstance(raw, str):
|
||||
return json.loads(raw)
|
||||
return raw or {}
|
||||
except Exception as exc:
|
||||
logger.debug("sessionStorage extraction failed: %s", exc)
|
||||
return {}
|
||||
|
||||
|
||||
async def extract_new_api_user_id(page: Any) -> str:
|
||||
try:
|
||||
value = await page.evaluate("""
|
||||
async () => {
|
||||
const uid = localStorage.getItem('uid')
|
||||
if (uid) return uid
|
||||
const userRaw = localStorage.getItem('user')
|
||||
if (userRaw) {
|
||||
try {
|
||||
const user = JSON.parse(userRaw)
|
||||
if (user?.id) return String(user.id)
|
||||
} catch {}
|
||||
}
|
||||
const response = await fetch('/api/user/self', { credentials: 'include' })
|
||||
if (!response.ok) return ''
|
||||
const payload = await response.json()
|
||||
const data = payload?.data || payload
|
||||
return data?.id ? String(data.id) : ''
|
||||
}
|
||||
""")
|
||||
return str(value or "").strip()
|
||||
except Exception as exc:
|
||||
logger.debug("New-API user id extraction failed: %s", exc)
|
||||
return ""
|
||||
|
||||
|
||||
async def extract_request_headers(session: Any) -> list[dict[str, str]]:
|
||||
"""Return Authorization / API-Key headers captured continuously by CDP.
|
||||
|
||||
The CDP Network listener is started when the ephemeral session is created
|
||||
(in BrowserSessionService.create_ephemeral), so headers from the login
|
||||
flow are captured in real-time without needing a fresh CDP attach.
|
||||
"""
|
||||
if hasattr(session, "captured_headers") and session.captured_headers:
|
||||
logger.debug("auth-capture: returning %d cached headers", len(session.captured_headers))
|
||||
return list(session.captured_headers)
|
||||
return []
|
||||
|
||||
|
||||
async def extract_all(session: Any) -> dict[str, Any]:
|
||||
"""Extract all auth credentials from a browser session.
|
||||
|
||||
Returns:
|
||||
cookies, storage, session_storage, auth_headers, candidates
|
||||
"""
|
||||
page = session.page
|
||||
cookies = await extract_cookies(session)
|
||||
local_storage = await extract_local_storage(page)
|
||||
session_storage = await extract_session_storage(page)
|
||||
auth_headers = await extract_request_headers(session)
|
||||
new_api_user = _find_new_api_user(local_storage, session_storage) or await extract_new_api_user_id(page)
|
||||
|
||||
# 获取当前浏览器页面的真实 URL(比 session.url 更准确)
|
||||
page_url = ""
|
||||
try:
|
||||
page_url = page.url or ""
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
candidates = _curate_candidates(
|
||||
cookies, local_storage, session_storage, auth_headers, new_api_user,
|
||||
page_url=page_url,
|
||||
)
|
||||
|
||||
return {
|
||||
"cookies": cookies,
|
||||
"storage": local_storage,
|
||||
"session_storage": session_storage,
|
||||
"auth_headers": auth_headers,
|
||||
"candidates": candidates,
|
||||
}
|
||||
|
||||
|
||||
def _cookie_matches_hostname(cookie_domain: str, hostname: str) -> bool:
|
||||
"""判断 cookie domain 是否适用于给定 hostname。
|
||||
|
||||
|
||||
@@ -1,951 +0,0 @@
|
||||
"""Managed Playwright browser sessions for custom pages."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import urlparse
|
||||
from uuid import uuid4
|
||||
|
||||
from app.config import get_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BrowserDependencyError(RuntimeError):
|
||||
"""Raised when Playwright or its browser runtime is unavailable."""
|
||||
|
||||
|
||||
class BrowserSessionError(RuntimeError):
|
||||
"""Raised when an existing browser session can no longer be used."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrowserTab:
|
||||
id: str
|
||||
page: Any
|
||||
created_at: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrowserSession:
|
||||
id: str
|
||||
custom_page_id: int
|
||||
profile_key: str
|
||||
context: Any
|
||||
tabs: dict[str, BrowserTab]
|
||||
active_tab_id: str
|
||||
lock: asyncio.Lock
|
||||
tab_revision: int = 0
|
||||
cdp_session: Any = None
|
||||
captured_headers: list[dict] = None # auth headers from CDP
|
||||
last_saved_state_at: float = 0.0
|
||||
|
||||
@property
|
||||
def active_tab(self) -> BrowserTab:
|
||||
return self.tabs[self.active_tab_id]
|
||||
|
||||
@property
|
||||
def page(self) -> Any:
|
||||
return self.active_tab.page
|
||||
|
||||
|
||||
class BrowserSessionService:
|
||||
# Idle TTL: close sessions that haven't had activity for this long
|
||||
IDLE_TTL_SECONDS = 1800 # 30 minutes
|
||||
# Cap: max concurrent persistent sessions (excludes auth-capture)
|
||||
MAX_SESSIONS = 10
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._playwright: Optional[Any] = None
|
||||
self._sessions: dict[str, BrowserSession] = {}
|
||||
self._profiles: dict[str, str] = {}
|
||||
self._lock = asyncio.Lock()
|
||||
self._last_event_at: dict[str, float] = {}
|
||||
self._evict_task: Optional[asyncio.Task[None]] = None
|
||||
|
||||
def _browser_launch_kwargs(self, width: int, height: int) -> dict[str, Any]:
|
||||
return {
|
||||
"headless": get_settings().browser_headless,
|
||||
"viewport": {"width": width, "height": height},
|
||||
"color_scheme": "dark",
|
||||
"locale": "zh-CN",
|
||||
"timezone_id": get_settings().tz,
|
||||
"ignore_default_args": ["--enable-automation"],
|
||||
"args": [
|
||||
"--no-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
"--window-size=%d,%d" % (width, height),
|
||||
],
|
||||
}
|
||||
|
||||
async def _install_browser_init_scripts(self, context: Any) -> None:
|
||||
await context.add_init_script("""
|
||||
(() => {
|
||||
try {
|
||||
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
||||
Object.defineProperty(navigator, 'languages', { get: () => ['zh-CN', 'zh', 'en-US', 'en'] });
|
||||
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
|
||||
window.chrome = window.chrome || { runtime: {} };
|
||||
} catch (_) {}
|
||||
})();
|
||||
""")
|
||||
|
||||
async def create(
|
||||
self,
|
||||
custom_page_id: int,
|
||||
url: str,
|
||||
width: int = 1280,
|
||||
height: int = 720,
|
||||
login_config: Optional[dict[str, Any]] = None,
|
||||
) -> BrowserSession:
|
||||
if not url.startswith(("http://", "https://")):
|
||||
raise ValueError("Only http/https URLs are allowed")
|
||||
width = max(320, min(width, 2560))
|
||||
height = max(240, min(height, 1600))
|
||||
async with self._lock:
|
||||
await self._ensure_playwright()
|
||||
profile_key = self._profile_key(custom_page_id, url)
|
||||
existing_id = self._profiles.get(profile_key)
|
||||
existing = self._sessions.get(existing_id or "")
|
||||
if existing and not existing.page.is_closed():
|
||||
# Health check: verify session can actually serve content
|
||||
healthy = True
|
||||
try:
|
||||
async with existing.lock:
|
||||
url_before = existing.page.url
|
||||
await existing.page.evaluate("1") # ping
|
||||
await existing.page.screenshot(type="jpeg", quality=10, timeout=5000)
|
||||
await existing.page.set_viewport_size({"width": width, "height": height})
|
||||
if url_before == "about:blank":
|
||||
await existing.page.goto(url, wait_until="domcontentloaded", timeout=45000)
|
||||
await self._autofill_login(existing.page, login_config)
|
||||
await self._reset_page_zoom(existing)
|
||||
self._touch(existing.id)
|
||||
except Exception:
|
||||
logger.info("existing session %s unhealthy, recreating", existing.id[:12])
|
||||
healthy = False
|
||||
if healthy:
|
||||
return existing
|
||||
# Close unhealthy session (profile stays on disk)
|
||||
await self.close(existing.id)
|
||||
if existing_id:
|
||||
self._profiles.pop(profile_key, None)
|
||||
# Idle cleanup: close stale sessions before spawning new ones
|
||||
await self._evict_idle_sessions()
|
||||
|
||||
context = await self._playwright.chromium.launch_persistent_context(
|
||||
str(self._profile_dir(profile_key)),
|
||||
**self._browser_launch_kwargs(width, height),
|
||||
)
|
||||
await self._install_browser_init_scripts(context)
|
||||
await self._restore_session_state(context, profile_key)
|
||||
# Grant clipboard access for the page origin
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
origin = f"{parsed.scheme}://{parsed.netloc}"
|
||||
await context.grant_permissions(["clipboard-read", "clipboard-write"], origin=origin)
|
||||
except Exception:
|
||||
logger.debug("clipboard permission grant failed (non-fatal)")
|
||||
page = context.pages[0] if context.pages else await context.new_page()
|
||||
tab_id = uuid4().hex
|
||||
tab = BrowserTab(id=tab_id, page=page, created_at=asyncio.get_event_loop().time())
|
||||
session = BrowserSession(
|
||||
id=uuid4().hex,
|
||||
custom_page_id=custom_page_id,
|
||||
profile_key=profile_key,
|
||||
context=context,
|
||||
tabs={tab_id: tab},
|
||||
active_tab_id=tab_id,
|
||||
lock=asyncio.Lock(),
|
||||
)
|
||||
self._sessions[session.id] = session
|
||||
self._profiles[profile_key] = session.id
|
||||
self._touch(session.id)
|
||||
# Register listeners for the initial tab
|
||||
self._setup_tab_listeners(session, page)
|
||||
# Register page capture for multi-tab support
|
||||
context.on("page", lambda p: self._handle_new_page(session, p))
|
||||
# Evict again after adding the new session so cap is enforced immediately
|
||||
await self._evict_idle_sessions()
|
||||
try:
|
||||
await page.goto(url, wait_until="domcontentloaded", timeout=45000)
|
||||
await self._autofill_login(page, login_config)
|
||||
await self._reset_page_zoom(session)
|
||||
except Exception:
|
||||
await self.close(session.id)
|
||||
raise
|
||||
logger.info("session created: %s (page=%s, profile=%s)", session.id[:12], custom_page_id, profile_key)
|
||||
return session
|
||||
|
||||
def _touch(self, session_id: str) -> None:
|
||||
"""Mark a session as recently active (reset idle timer)."""
|
||||
self._last_event_at[session_id] = asyncio.get_event_loop().time()
|
||||
|
||||
def _handle_new_page(self, session: BrowserSession, page: Any) -> None:
|
||||
"""Capture a new page opened by the remote browser (e.g. target="_blank")."""
|
||||
asyncio.create_task(self._register_new_page(session, page))
|
||||
|
||||
def _setup_tab_listeners(self, session: BrowserSession, page: Any) -> None:
|
||||
"""Register navigation and state listeners to bump tab_revision."""
|
||||
def bump_revision(_=None):
|
||||
session.tab_revision += 1
|
||||
|
||||
page.on("domcontentloaded", bump_revision)
|
||||
page.on("load", bump_revision)
|
||||
page.on("framenavigated", bump_revision)
|
||||
page.on("close", bump_revision)
|
||||
|
||||
async def _register_new_page(self, session: BrowserSession, page: Any) -> None:
|
||||
tab_id = uuid4().hex
|
||||
tab = BrowserTab(id=tab_id, page=page, created_at=asyncio.get_event_loop().time())
|
||||
|
||||
async with session.lock:
|
||||
session.tabs[tab_id] = tab
|
||||
session.active_tab_id = tab_id
|
||||
session.tab_revision += 1
|
||||
logger.info("session %s: captured new tab %s (total: %d)", session.id[:12], tab_id[:8], len(session.tabs))
|
||||
|
||||
self._setup_tab_listeners(session, page)
|
||||
# Best-effort: bring to front and reset zoom
|
||||
await self._init_new_tab(session, tab)
|
||||
|
||||
async def _init_new_tab(self, session: BrowserSession, tab: BrowserTab) -> None:
|
||||
try:
|
||||
await tab.page.bring_to_front()
|
||||
await self._reset_page_zoom(session)
|
||||
# Grant clipboard permission for the new page's origin if possible
|
||||
try:
|
||||
url = tab.page.url
|
||||
if url.startswith("http"):
|
||||
parsed = urlparse(url)
|
||||
origin = f"{parsed.scheme}://{parsed.netloc}"
|
||||
await session.context.grant_permissions(["clipboard-read", "clipboard-write"], origin=origin)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
logger.debug("new tab %s init failed: %s", tab.id[:8], exc)
|
||||
|
||||
async def screenshot(self, session_id: str) -> bytes:
|
||||
session = self._get(session_id)
|
||||
self._touch(session_id)
|
||||
async with session.lock:
|
||||
self._ensure_open(session)
|
||||
if session.profile_key and not session.profile_key.startswith("auth-capture-"):
|
||||
now = time.monotonic()
|
||||
if now - session.last_saved_state_at > 10.0:
|
||||
await self._save_session_state(session)
|
||||
session.last_saved_state_at = now
|
||||
return await session.page.screenshot(type="jpeg", quality=65, full_page=False)
|
||||
|
||||
async def event(
|
||||
self,
|
||||
session_id: str,
|
||||
event_type: str,
|
||||
payload: dict[str, Any],
|
||||
*,
|
||||
include_state: bool = True,
|
||||
) -> dict[str, Any] | None:
|
||||
session = self._get(session_id)
|
||||
self._last_event_at[session_id] = asyncio.get_event_loop().time()
|
||||
async with session.lock:
|
||||
self._ensure_open(session)
|
||||
page = session.page
|
||||
if event_type == "click":
|
||||
await page.mouse.click(float(payload["x"]), float(payload["y"]), button=payload.get("button", "left"))
|
||||
elif event_type == "dblclick":
|
||||
await page.mouse.dblclick(float(payload["x"]), float(payload["y"]), button=payload.get("button", "left"))
|
||||
elif event_type == "mousemove":
|
||||
await page.mouse.move(float(payload["x"]), float(payload["y"]))
|
||||
elif event_type == "mousedown":
|
||||
await page.mouse.move(float(payload["x"]), float(payload["y"]))
|
||||
await page.mouse.down(button=payload.get("button", "left"))
|
||||
elif event_type == "mouseup":
|
||||
await page.mouse.move(float(payload["x"]), float(payload["y"]))
|
||||
await page.mouse.up(button=payload.get("button", "left"))
|
||||
elif event_type == "type":
|
||||
text = str(payload.get("text", ""))
|
||||
if text:
|
||||
await page.keyboard.insert_text(text)
|
||||
elif event_type == "key":
|
||||
key = str(payload.get("key", ""))
|
||||
if key:
|
||||
await page.keyboard.press(key)
|
||||
elif event_type == "scroll":
|
||||
if payload.get("x") is not None and payload.get("y") is not None:
|
||||
await page.mouse.move(float(payload["x"]), float(payload["y"]))
|
||||
await page.mouse.wheel(float(payload.get("delta_x", 0)), float(payload.get("delta_y", 0)))
|
||||
elif event_type == "reload":
|
||||
await page.reload(wait_until="domcontentloaded", timeout=45000)
|
||||
elif event_type == "back":
|
||||
await page.go_back(wait_until="domcontentloaded", timeout=45000)
|
||||
elif event_type == "forward":
|
||||
await page.go_forward(wait_until="domcontentloaded", timeout=45000)
|
||||
elif event_type == "resize":
|
||||
width = max(320, min(int(payload.get("width", 1280)), 2560))
|
||||
height = max(240, min(int(payload.get("height", 720)), 1600))
|
||||
await page.set_viewport_size({"width": width, "height": height})
|
||||
else:
|
||||
raise ValueError("Unsupported browser event")
|
||||
if session.profile_key and not session.profile_key.startswith("auth-capture-"):
|
||||
now = time.monotonic()
|
||||
if now - session.last_saved_state_at > 5.0:
|
||||
await self._save_session_state(session)
|
||||
session.last_saved_state_at = now
|
||||
|
||||
if not include_state:
|
||||
return None
|
||||
return await self._session_state(session)
|
||||
|
||||
async def selected_text(self, session_id: str) -> str:
|
||||
session = self._get(session_id)
|
||||
self._touch(session_id)
|
||||
async with session.lock:
|
||||
self._ensure_open(session)
|
||||
value = await session.page.evaluate("() => window.getSelection()?.toString() || ''")
|
||||
return str(value or "")
|
||||
|
||||
async def read_clipboard(self, session_id: str) -> tuple[Optional[str], Optional[str]]:
|
||||
"""Read the remote browser's clipboard text.
|
||||
|
||||
Returns (text, error_reason).
|
||||
text is None when the clipboard is empty or unreadable.
|
||||
error_reason is None on success or "empty" — non-None indicates a genuine failure.
|
||||
"""
|
||||
session = self._get(session_id)
|
||||
self._touch(session_id)
|
||||
async with session.lock:
|
||||
self._ensure_open(session)
|
||||
try:
|
||||
result = await session.page.evaluate("""
|
||||
async () => {
|
||||
try {
|
||||
const text = await navigator.clipboard.readText();
|
||||
return text || null;
|
||||
} catch (e) {
|
||||
if (e instanceof DOMException) {
|
||||
if (e.name === 'NotAllowedError') return 'ERROR:denied';
|
||||
if (e.name === 'NotFoundError') return null;
|
||||
}
|
||||
return 'ERROR:' + (e.message || String(e));
|
||||
}
|
||||
}
|
||||
""")
|
||||
if result is None:
|
||||
return None, None # empty clipboard
|
||||
if isinstance(result, str) and result.startswith("ERROR:"):
|
||||
reason = result[6:]
|
||||
logger.debug("clipboard read error for %s: %s", session_id[:12], reason)
|
||||
return None, reason
|
||||
return str(result), None
|
||||
except Exception as exc:
|
||||
logger.warning("clipboard read failed for %s: %s", session_id[:12], exc)
|
||||
return None, "read_failed"
|
||||
|
||||
async def close(self, session_id: str) -> None:
|
||||
self._last_event_at.pop(session_id, None)
|
||||
session = self._discard_session(session_id)
|
||||
if not session:
|
||||
return
|
||||
logger.info("session closing: %s (page=%s, profile=%s)", session_id[:12], session.custom_page_id, session.profile_key)
|
||||
|
||||
# 在完全关闭 context 前,强制将最新的状态落盘保存
|
||||
if session.profile_key and not session.profile_key.startswith("auth-capture-"):
|
||||
try:
|
||||
if not session.page.is_closed():
|
||||
await self._save_session_state(session)
|
||||
except Exception as exc:
|
||||
logger.debug("failed to save state during close: %s", exc)
|
||||
|
||||
# Detach CDP session if active
|
||||
if session.cdp_session:
|
||||
try:
|
||||
await session.cdp_session.detach()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
close_ok = True
|
||||
# 关闭 context 带超时,避免永远挂起
|
||||
try:
|
||||
await asyncio.wait_for(session.context.close(), timeout=10.0)
|
||||
logger.info("session context closed: %s", session_id[:12])
|
||||
except asyncio.TimeoutError:
|
||||
close_ok = False
|
||||
logger.warning("session close timeout: %s (falling back to browser.close)", session_id[:12])
|
||||
try:
|
||||
browser = getattr(session.context, "browser", None)
|
||||
if browser is not None:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
close_ok = True
|
||||
logger.info("session browser fallback closed: %s", session_id[:12])
|
||||
else:
|
||||
logger.warning("session context.browser is None, cannot fallback: %s", session_id[:12])
|
||||
except Exception as exc:
|
||||
logger.warning("session browser fallback failed: %s: %s", session_id[:12], exc)
|
||||
except Exception as exc:
|
||||
close_ok = False
|
||||
logger.warning("session close error: %s: %s", session_id[:12], exc)
|
||||
|
||||
# Clean up ephemeral (auth-capture) profile directories
|
||||
if session.profile_key and session.profile_key.startswith("auth-capture-"):
|
||||
profile_dir = self._profile_dir(session.profile_key)
|
||||
import shutil
|
||||
try:
|
||||
shutil.rmtree(profile_dir, ignore_errors=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if close_ok:
|
||||
logger.info("session closed: %s", session_id[:12])
|
||||
else:
|
||||
logger.warning("session close_failed: %s", session_id[:12])
|
||||
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
# Cancel the background eviction loop
|
||||
if self._evict_task is not None and not self._evict_task.done():
|
||||
self._evict_task.cancel()
|
||||
try:
|
||||
await self._evict_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._evict_task = None
|
||||
sessions = list(self._sessions)
|
||||
if sessions:
|
||||
logger.info("shutdown: closing %d browser sessions", len(sessions))
|
||||
for session_id in sessions:
|
||||
try:
|
||||
await asyncio.wait_for(self.close(session_id), timeout=15.0)
|
||||
except Exception as exc:
|
||||
logger.warning("shutdown close failed for %s: %s", session_id[:12], exc)
|
||||
if self._playwright:
|
||||
logger.info("shutdown: stopping playwright")
|
||||
try:
|
||||
await asyncio.wait_for(self._playwright.stop(), timeout=10.0)
|
||||
except Exception as exc:
|
||||
logger.warning("shutdown playwright stop failed: %s", exc)
|
||||
self._playwright = None
|
||||
|
||||
async def state(self, session_id: str) -> dict[str, Any]:
|
||||
session = self._get(session_id)
|
||||
self._touch(session_id)
|
||||
async with session.lock:
|
||||
self._ensure_open(session)
|
||||
return await self._session_state(session)
|
||||
|
||||
async def activate_tab(self, session_id: str, tab_id: str) -> dict[str, Any]:
|
||||
session = self._get(session_id)
|
||||
self._touch(session_id)
|
||||
async with session.lock:
|
||||
self._ensure_open(session)
|
||||
if tab_id not in session.tabs:
|
||||
raise KeyError("tab not found")
|
||||
session.active_tab_id = tab_id
|
||||
session.tab_revision += 1
|
||||
await session.page.bring_to_front()
|
||||
return await self._session_state(session)
|
||||
|
||||
async def close_tab(self, session_id: str, tab_id: str) -> dict[str, Any]:
|
||||
session = self._get(session_id)
|
||||
self._touch(session_id)
|
||||
async with session.lock:
|
||||
self._ensure_open(session)
|
||||
if tab_id not in session.tabs:
|
||||
raise KeyError("tab not found")
|
||||
if len(session.tabs) <= 1:
|
||||
raise ValueError("cannot close the last tab")
|
||||
|
||||
tab = session.tabs.pop(tab_id)
|
||||
try:
|
||||
await tab.page.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if session.active_tab_id == tab_id:
|
||||
# Pick the latest remaining tab
|
||||
latest = max(session.tabs.values(), key=lambda t: t.created_at)
|
||||
session.active_tab_id = latest.id
|
||||
await session.page.bring_to_front()
|
||||
|
||||
session.tab_revision += 1
|
||||
return await self._session_state(session)
|
||||
|
||||
async def _session_state(self, session: BrowserSession) -> dict[str, Any]:
|
||||
tabs = []
|
||||
# We might need to prune closed pages during state generation too
|
||||
closed_ids = []
|
||||
# Use list() to avoid RuntimeError if tabs dict changes during iteration
|
||||
for tid, tab in list(session.tabs.items()):
|
||||
if tab.page.is_closed():
|
||||
closed_ids.append(tid)
|
||||
continue
|
||||
try:
|
||||
title = await tab.page.title()
|
||||
url = tab.page.url
|
||||
except Exception:
|
||||
title, url = "Loading...", "about:blank"
|
||||
tabs.append({
|
||||
"id": tid,
|
||||
"title": title,
|
||||
"url": url,
|
||||
"created_at": tab.created_at,
|
||||
})
|
||||
|
||||
if closed_ids:
|
||||
for cid in closed_ids:
|
||||
session.tabs.pop(cid, None)
|
||||
if not session.tabs:
|
||||
raise BrowserSessionError("all browser pages are closed")
|
||||
if session.active_tab_id in closed_ids:
|
||||
latest = max(session.tabs.values(), key=lambda t: t.created_at)
|
||||
session.active_tab_id = latest.id
|
||||
session.tab_revision += 1
|
||||
|
||||
tabs.sort(key=lambda x: x["created_at"])
|
||||
return {
|
||||
"id": session.id,
|
||||
"custom_page_id": session.custom_page_id,
|
||||
"url": session.page.url,
|
||||
"title": await session.page.title(),
|
||||
"active_tab_id": session.active_tab_id,
|
||||
"tabs": tabs,
|
||||
"tab_revision": session.tab_revision,
|
||||
}
|
||||
|
||||
async def _ensure_playwright(self) -> None:
|
||||
if self._playwright:
|
||||
return
|
||||
try:
|
||||
from playwright.async_api import async_playwright
|
||||
except ImportError as exc:
|
||||
raise BrowserDependencyError("Playwright is not installed. Run `pip install -r requirements.txt`.") from exc
|
||||
try:
|
||||
self._playwright = await async_playwright().start()
|
||||
except Exception as exc:
|
||||
raise BrowserDependencyError(f"Unable to start Playwright: {exc}") from exc
|
||||
# Start background eviction loop
|
||||
if self._evict_task is None or self._evict_task.done():
|
||||
self._evict_task = asyncio.create_task(self._evict_loop())
|
||||
|
||||
async def _reset_page_zoom(self, session: BrowserSession) -> None:
|
||||
try:
|
||||
cdp = await session.context.new_cdp_session(session.page)
|
||||
try:
|
||||
await cdp.send("Emulation.setPageScaleFactor", {"pageScaleFactor": 1})
|
||||
finally:
|
||||
await cdp.detach()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def autofill_login(
|
||||
self,
|
||||
session_id: str,
|
||||
login_config: Optional[dict[str, Any]],
|
||||
) -> bool:
|
||||
"""Public: manually trigger login autofill for an active session.
|
||||
|
||||
Only fills username/password fields — never auto-submits.
|
||||
Returns True if fields were found and filled, False otherwise.
|
||||
Never returns password data to the caller.
|
||||
"""
|
||||
session = self._get(session_id)
|
||||
self._touch(session_id)
|
||||
async with session.lock:
|
||||
self._ensure_open(session)
|
||||
return await self._autofill_login(session.page, login_config, max_wait_seconds=3.0, skip_submit=True)
|
||||
|
||||
async def _autofill_login(
|
||||
self,
|
||||
page: Any,
|
||||
config: Optional[dict[str, Any]],
|
||||
*,
|
||||
max_wait_seconds: float = 2.0,
|
||||
poll_interval_seconds: float = 0.25,
|
||||
skip_submit: bool = False,
|
||||
) -> bool:
|
||||
if not config or not config.get("enabled"):
|
||||
return False
|
||||
username = str(config.get("username") or "")
|
||||
password = str(config.get("password") or "")
|
||||
if not username or not password:
|
||||
return False
|
||||
try:
|
||||
username_selectors = [
|
||||
config.get("username_selector"),
|
||||
"input[type='email']",
|
||||
"input[name*='user' i]",
|
||||
"input[id*='user' i]",
|
||||
"input[name*='email' i]",
|
||||
"input[id*='email' i]",
|
||||
"input[name*='login' i]",
|
||||
"input[id*='login' i]",
|
||||
"input[autocomplete='username']",
|
||||
"input:not([type]), input[type='text']",
|
||||
]
|
||||
password_selectors = [
|
||||
config.get("password_selector"),
|
||||
"input[type='password']",
|
||||
"input[autocomplete='current-password']",
|
||||
]
|
||||
username_locator, password_locator = await self._wait_for_login_locators(
|
||||
page,
|
||||
username_selectors,
|
||||
password_selectors,
|
||||
max_wait_seconds=max_wait_seconds,
|
||||
poll_interval_seconds=poll_interval_seconds,
|
||||
)
|
||||
if not username_locator or not password_locator:
|
||||
logger.info("Login autofill skipped: login fields not found")
|
||||
return False
|
||||
await username_locator.fill(username, timeout=3000)
|
||||
await password_locator.fill(password, timeout=3000)
|
||||
if not skip_submit:
|
||||
submit_selector = str(config.get("submit_selector") or "").strip()
|
||||
if submit_selector:
|
||||
submit = await self._first_visible_locator(page, [submit_selector], timeout=500)
|
||||
if submit:
|
||||
await submit.click(timeout=3000)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.info("Login autofill skipped: %s", exc)
|
||||
return False
|
||||
|
||||
async def _wait_for_login_locators(
|
||||
self,
|
||||
page: Any,
|
||||
username_selectors: list[Optional[str]],
|
||||
password_selectors: list[Optional[str]],
|
||||
*,
|
||||
max_wait_seconds: float,
|
||||
poll_interval_seconds: float,
|
||||
) -> tuple[Optional[Any], Optional[Any]]:
|
||||
deadline = time.monotonic() + max_wait_seconds
|
||||
while True:
|
||||
username_locator = await self._first_visible_locator(page, username_selectors, timeout=150)
|
||||
password_locator = await self._first_visible_locator(page, password_selectors, timeout=150)
|
||||
if username_locator and password_locator:
|
||||
return username_locator, password_locator
|
||||
if time.monotonic() >= deadline:
|
||||
return None, None
|
||||
await asyncio.sleep(poll_interval_seconds)
|
||||
|
||||
async def _first_visible_locator(
|
||||
self,
|
||||
page: Any,
|
||||
selectors: list[Optional[str]],
|
||||
*,
|
||||
timeout: float = 1500,
|
||||
) -> Optional[Any]:
|
||||
for selector in selectors:
|
||||
selector = str(selector or "").strip()
|
||||
if not selector:
|
||||
continue
|
||||
try:
|
||||
locator = page.locator(selector).first
|
||||
if await locator.count() and await locator.is_visible(timeout=timeout):
|
||||
return locator
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
def get_session(self, session_id: str) -> BrowserSession:
|
||||
"""Retrieve a session by id — raises KeyError if missing."""
|
||||
session = self._sessions.get(session_id)
|
||||
if not session:
|
||||
raise KeyError("browser session not found")
|
||||
return session
|
||||
|
||||
def find_by_page_id(self, custom_page_id: int) -> BrowserSession:
|
||||
"""Find the active session for a custom page. Raises KeyError if none."""
|
||||
for session in self._sessions.values():
|
||||
if session.custom_page_id == custom_page_id and not session.page.is_closed():
|
||||
return session
|
||||
raise KeyError(f"no active browser session for page {custom_page_id}")
|
||||
|
||||
_get = get_session # alias for internal use
|
||||
|
||||
def _ensure_open(self, session: BrowserSession) -> None:
|
||||
if session.active_tab.page.is_closed():
|
||||
# Current tab closed? Try to cleanup and find another one
|
||||
session.tabs.pop(session.active_tab_id, None)
|
||||
if session.tabs:
|
||||
# Pick the latest created tab
|
||||
latest = max(session.tabs.values(), key=lambda t: t.created_at)
|
||||
session.active_tab_id = latest.id
|
||||
session.tab_revision += 1
|
||||
logger.info("active tab closed, switched to %s", latest.id[:8])
|
||||
else:
|
||||
self._discard_session(session.id)
|
||||
raise BrowserSessionError("all browser pages are closed")
|
||||
|
||||
def _discard_session(self, session_id: str) -> BrowserSession | None:
|
||||
session = self._sessions.pop(session_id, None)
|
||||
if session and self._profiles.get(session.profile_key) == session_id:
|
||||
self._profiles.pop(session.profile_key, None)
|
||||
return session
|
||||
|
||||
async def _evict_loop(self) -> None:
|
||||
"""Background loop that runs every 5 minutes to evict idle sessions."""
|
||||
while True:
|
||||
await asyncio.sleep(300) # 5 minutes
|
||||
try:
|
||||
await self._evict_idle_sessions()
|
||||
except Exception:
|
||||
logger.exception("idle eviction loop error")
|
||||
|
||||
async def _evict_idle_sessions(self) -> None:
|
||||
"""Close oldest idle sessions when over cap, or any past TTL.
|
||||
|
||||
- Auth-capture sessions: max 10 minutes lifetime.
|
||||
- Remote browser sessions: close after IDLE_TTL_SECONDS of no WebSocket activity.
|
||||
"""
|
||||
now = asyncio.get_event_loop().time()
|
||||
to_remove: list[str] = []
|
||||
for sid, session in self._sessions.items():
|
||||
if session.profile_key and session.profile_key.startswith("auth-capture-"):
|
||||
# auth-capture: max 10 minute TTL from creation
|
||||
created = session.tabs.get(session.active_tab_id)
|
||||
if created:
|
||||
age = now - created.created_at
|
||||
if age > 600:
|
||||
to_remove.append(sid)
|
||||
logger.info("evicting auth-capture session %s (age=%ds > 600s)", sid[:12], int(age))
|
||||
else:
|
||||
# remote browser sessions: idle TTL
|
||||
last_active = self._last_event_at.get(sid, 0.0)
|
||||
if last_active > 0 and (now - last_active) > self.IDLE_TTL_SECONDS:
|
||||
to_remove.append(sid)
|
||||
logger.info("evicting idle session %s (no activity for >%ds)", sid[:12], self.IDLE_TTL_SECONDS)
|
||||
for sid in to_remove:
|
||||
await self.close(sid)
|
||||
|
||||
# Second: if still over cap, evict oldest by last_event_at
|
||||
persistent = [(sid, s) for sid, s in self._sessions.items()
|
||||
if not (s.profile_key or "").startswith("auth-capture-")]
|
||||
if len(persistent) > self.MAX_SESSIONS:
|
||||
persistent.sort(key=lambda x: self._last_event_at.get(x[0], 0.0))
|
||||
excess = len(persistent) - self.MAX_SESSIONS
|
||||
for sid, _ in persistent[:excess]:
|
||||
logger.info("evicting session %s (over cap of %d)", sid[:12], self.MAX_SESSIONS)
|
||||
await self.close(sid)
|
||||
|
||||
async def clear_profile(self, custom_page_id: int, url: str) -> None:
|
||||
"""Close session for the page if active, then delete profile directory.
|
||||
|
||||
Raises RuntimeError if the directory cannot be fully removed.
|
||||
"""
|
||||
import shutil
|
||||
# Close active session and use its profile_key (precise match)
|
||||
profile_key: Optional[str] = None
|
||||
try:
|
||||
session = self.find_by_page_id(custom_page_id)
|
||||
profile_key = session.profile_key
|
||||
await self.close(session.id)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# Fallback: compute from URL (may be wrong if URL changed since session was created)
|
||||
if not profile_key:
|
||||
profile_key = self._profile_key(custom_page_id, url)
|
||||
|
||||
profile_dir = self._profile_dir(profile_key)
|
||||
if profile_dir.exists():
|
||||
shutil.rmtree(profile_dir) # no ignore_errors — let failure surface
|
||||
if profile_dir.exists():
|
||||
raise RuntimeError(
|
||||
f"Failed to fully remove browser profile directory: {profile_dir}"
|
||||
)
|
||||
logger.info("cleared browser profile for page %d: %s", custom_page_id, profile_dir)
|
||||
|
||||
def _profile_dir(self, profile_key: str) -> Path:
|
||||
root = Path(get_settings().browser_profiles_dir)
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
profile = root / profile_key
|
||||
profile.mkdir(parents=True, exist_ok=True)
|
||||
return profile
|
||||
|
||||
def _cookies_path(self, profile_key: str) -> Path:
|
||||
return self._profile_dir(profile_key) / "session-cookies.json"
|
||||
|
||||
def _profile_key(self, custom_page_id: int, url: str) -> str:
|
||||
parsed = urlparse(url)
|
||||
origin = f"{parsed.scheme}-{parsed.netloc}".lower()
|
||||
safe_origin = re.sub(r"[^a-z0-9_.-]+", "_", origin).strip("_") or "page"
|
||||
return f"page-{custom_page_id}-{safe_origin[:80]}"
|
||||
|
||||
async def create_ephemeral(
|
||||
self,
|
||||
url: str,
|
||||
width: int = 1280,
|
||||
height: int = 720,
|
||||
) -> BrowserSession:
|
||||
"""Create a temporary browser session without a CustomPage record.
|
||||
|
||||
The session uses an isolated random-named profile so it never collides
|
||||
with persistent custom-page profiles. Caller MUST close() when done.
|
||||
"""
|
||||
if not url.startswith(("http://", "https://")):
|
||||
raise ValueError("Only http/https URLs are allowed")
|
||||
width = max(320, min(width, 2560))
|
||||
height = max(240, min(height, 1600))
|
||||
async with self._lock:
|
||||
await self._ensure_playwright()
|
||||
session_id = uuid4().hex
|
||||
profile_key = f"auth-capture-{session_id[:12]}"
|
||||
context = await self._playwright.chromium.launch_persistent_context(
|
||||
str(self._profile_dir(profile_key)),
|
||||
**self._browser_launch_kwargs(width, height),
|
||||
)
|
||||
await self._install_browser_init_scripts(context)
|
||||
# Grant clipboard access for the page origin
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
origin = f"{parsed.scheme}://{parsed.netloc}"
|
||||
await context.grant_permissions(["clipboard-read", "clipboard-write"], origin=origin)
|
||||
except Exception:
|
||||
logger.debug("clipboard permission grant failed (non-fatal)")
|
||||
page = context.pages[0] if context.pages else await context.new_page()
|
||||
tab_id = uuid4().hex
|
||||
tab = BrowserTab(id=tab_id, page=page, created_at=asyncio.get_event_loop().time())
|
||||
session = BrowserSession(
|
||||
id=session_id,
|
||||
custom_page_id=0,
|
||||
profile_key=profile_key,
|
||||
context=context,
|
||||
tabs={tab_id: tab},
|
||||
active_tab_id=tab_id,
|
||||
lock=asyncio.Lock(),
|
||||
captured_headers=[],
|
||||
)
|
||||
self._sessions[session.id] = session
|
||||
self._touch(session.id)
|
||||
# Register listeners for the initial tab
|
||||
self._setup_tab_listeners(session, page)
|
||||
# Register page capture
|
||||
context.on("page", lambda p: self._handle_new_page(session, p))
|
||||
# Start CDP network capture BEFORE the initial page load,
|
||||
# so we capture login redirects and auth headers from the start.
|
||||
await self._start_cdp_capture(session)
|
||||
try:
|
||||
await page.goto(url, wait_until="domcontentloaded", timeout=45000)
|
||||
except Exception:
|
||||
await self.close(session.id)
|
||||
raise
|
||||
return session
|
||||
|
||||
async def _start_cdp_capture(self, session: BrowserSession) -> None:
|
||||
"""Enable CDP Network domain and capture Authorization headers."""
|
||||
try:
|
||||
cdp = await session.context.new_cdp_session(session.page)
|
||||
await cdp.send("Network.enable")
|
||||
|
||||
def on_request(params: dict) -> None:
|
||||
headers = params.get("request", {}).get("headers", {})
|
||||
auth = headers.get("authorization") or headers.get("Authorization")
|
||||
api_key = headers.get("x-api-key") or headers.get("X-API-Key")
|
||||
url = params.get("request", {}).get("url", "")
|
||||
if auth:
|
||||
session.captured_headers.append({
|
||||
"type": "authorization",
|
||||
"value": auth,
|
||||
"url": url,
|
||||
})
|
||||
if api_key:
|
||||
session.captured_headers.append({
|
||||
"type": "api_key",
|
||||
"value": api_key,
|
||||
"url": url,
|
||||
})
|
||||
|
||||
cdp.on("Network.requestWillBeSent", on_request)
|
||||
session.cdp_session = cdp
|
||||
except Exception as exc:
|
||||
logger.debug("CDP capture not available: %s", exc)
|
||||
|
||||
async def _save_session_state(self, session: BrowserSession) -> None:
|
||||
if not session.profile_key or session.profile_key.startswith("auth-capture-"):
|
||||
return
|
||||
try:
|
||||
state = await session.context.storage_state()
|
||||
cookies_path = self._cookies_path(session.profile_key)
|
||||
import json
|
||||
import tempfile
|
||||
import os
|
||||
# Ensure parent directories exist
|
||||
cookies_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
temp_fd, temp_path = tempfile.mkstemp(dir=str(cookies_path.parent))
|
||||
try:
|
||||
with os.fdopen(temp_fd, 'w', encoding='utf-8') as f:
|
||||
json.dump(state, f, ensure_ascii=False, indent=2)
|
||||
os.replace(temp_path, cookies_path)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.debug("failed to save session state for %s: %s", session.profile_key, exc)
|
||||
|
||||
async def _restore_session_state(self, context: Any, profile_key: str) -> None:
|
||||
if profile_key.startswith("auth-capture-"):
|
||||
return
|
||||
cookies_path = self._cookies_path(profile_key)
|
||||
if not cookies_path.exists() or cookies_path.stat().st_size == 0:
|
||||
return
|
||||
try:
|
||||
import json
|
||||
import time
|
||||
with open(cookies_path, 'r', encoding='utf-8') as f:
|
||||
state = json.load(f)
|
||||
cookies = state.get("cookies", [])
|
||||
if cookies:
|
||||
now = time.time()
|
||||
valid_cookies = []
|
||||
for c in cookies:
|
||||
expires = c.get("expires")
|
||||
if expires is not None and expires > 0 and expires <= now:
|
||||
continue
|
||||
if expires is not None and expires <= 0:
|
||||
c.pop("expires", None)
|
||||
valid_cookies.append(c)
|
||||
if valid_cookies:
|
||||
await context.add_cookies(valid_cookies)
|
||||
logger.info("restored %d cookies for profile %s", len(valid_cookies), profile_key)
|
||||
|
||||
# 还原 LocalStorage
|
||||
origins = state.get("origins", [])
|
||||
if origins:
|
||||
origins_json = json.dumps(origins)
|
||||
init_script = f"""
|
||||
(() => {{
|
||||
try {{
|
||||
const origins = {origins_json};
|
||||
const currentOrigin = window.location.origin;
|
||||
const target = origins.find(o => o.origin === currentOrigin);
|
||||
if (target && target.localStorage) {{
|
||||
for (const item of target.localStorage) {{
|
||||
try {{
|
||||
window.localStorage.setItem(item.name, item.value);
|
||||
}} catch (e) {{
|
||||
console.error('Failed to restore localStorage key', item.name, e);
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}} catch (err) {{
|
||||
console.error('LocalStorage restore initialization script failed', err);
|
||||
}}
|
||||
}})();
|
||||
"""
|
||||
await context.add_init_script(init_script)
|
||||
logger.info("registered LocalStorage init script for profile %s (origins: %d)", profile_key, len(origins))
|
||||
except Exception as exc:
|
||||
logger.warning("failed to restore cookies/state for profile %s: %s", profile_key, exc)
|
||||
|
||||
|
||||
browser_sessions = BrowserSessionService()
|
||||
Reference in New Issue
Block a user