fix: object URL leak, CDP before goto, limit raw secrets in extract

- AuthCaptureDialog: revokeObjectURL on each frame to prevent memory leak
- CDP Network capture starts before initial page.goto, not after
- /extract defaults to candidates-only; pass ?include_raw=true for full data
This commit is contained in:
SmartUp Developer
2026-05-18 14:25:56 +08:00
parent c7b33983d6
commit 7cb0ff1608
3 changed files with 15 additions and 9 deletions
+7 -3
View File
@@ -89,12 +89,13 @@ async def create_capture_session(
@router.get("/sessions/{session_id}/extract", response_model=CaptureExtractResponse) @router.get("/sessions/{session_id}/extract", response_model=CaptureExtractResponse)
async def extract_credentials( async def extract_credentials(
session_id: str, session_id: str,
include_raw: bool = Query(default=False, description="Include full cookies/storage/headers in response"),
_=Depends(get_current_user), _=Depends(get_current_user),
): ):
"""Extract all auth credentials from the browser session. """Extract auth credentials from the browser session.
Returns cookies, localStorage, sessionStorage, and curated candidates. By default only returns curated candidates (typed, scored, with masked preview).
Candidate values are masked in logs. Pass include_raw=true to also get full cookies, localStorage, and headers.
""" """
try: try:
session = browser_sessions.get_session(session_id) session = browser_sessions.get_session(session_id)
@@ -106,6 +107,9 @@ async def extract_credentials(
except Exception as exc: except Exception as exc:
raise _browser_error(exc) raise _browser_error(exc)
if not include_raw:
# Strip raw data — only keep curated candidates
return CaptureExtractResponse(candidates=result.get("candidates", []))
return CaptureExtractResponse(**result) return CaptureExtractResponse(**result)
@@ -387,10 +387,11 @@ class BrowserSessionService:
captured_headers=[], captured_headers=[],
) )
self._sessions[session.id] = session self._sessions[session.id] = session
# Start CDP network capture BEFORE the initial page load,
# so we capture login redirects and auth headers from the start.
await self._start_cdp_capture(session)
try: try:
await page.goto(url, wait_until="domcontentloaded", timeout=45000) await page.goto(url, wait_until="domcontentloaded", timeout=45000)
# Start CDP network capture immediately — so we don't miss login requests
await self._start_cdp_capture(session)
except Exception: except Exception:
await self.close(session.id) await self.close(session.id)
raise raise
@@ -180,6 +180,7 @@ const frameRef = ref<HTMLElement | null>(null)
let ws: WebSocket | null = null let ws: WebSocket | null = null
let pointerDown = false let pointerDown = false
let frameW = 1; let frameH = 1 // natural dimensions of the frame let frameW = 1; let frameH = 1 // natural dimensions of the frame
let prevFrameUrl = '' // previous blob URL to revoke
// ——— Launch ——— // ——— Launch ———
@@ -212,11 +213,11 @@ function connectWs() {
ws.onmessage = (evt) => { ws.onmessage = (evt) => {
if (evt.data instanceof ArrayBuffer) { if (evt.data instanceof ArrayBuffer) {
// Binary JPEG frame // Binary JPEG frame — revoke previous to avoid memory leak
if (prevFrameUrl) URL.revokeObjectURL(prevFrameUrl)
const blob = new Blob([evt.data], { type: 'image/jpeg' }) const blob = new Blob([evt.data], { type: 'image/jpeg' })
frameUrl.value = URL.createObjectURL(blob) prevFrameUrl = URL.createObjectURL(blob)
// Revoke previous URL after a tick to free memory frameUrl.value = prevFrameUrl
setTimeout(() => { /* old URL auto-revoked */ }, 100)
} else { } else {
// JSON message (init, error, etc.) // JSON message (init, error, etc.)
try { try {