feat: auth capture — remote browser credential extraction

- BrowserSessionService: add create_ephemeral() for temp sessions
- New auth_capture_service.py: extract cookies, localStorage, sessionStorage from page
- New auth_capture router: POST /sessions, GET /sessions/{id}/extract, DELETE /sessions/{id}
- Frontend AuthCaptureDialog: URL input → browser view → extract → pick candidate
- Upstreams.vue: '提取' button next to Bearer Token field
- No sensitive values logged
This commit is contained in:
SmartUp Developer
2026-05-17 21:04:36 +08:00
parent c809139470
commit 4d1237c58f
7 changed files with 659 additions and 4 deletions
+2 -1
View File
@@ -14,7 +14,7 @@ from app.models.admin_user import AdminUser
from app.database import SessionLocal
from app.utils.auth import hash_password, verify_password, validate_password_supported
from app.services.scheduler import start_scheduler, stop_scheduler
from app.routers import auth, upstreams, webhooks, logs, custom_pages, browser_sessions, websites
from app.routers import auth, upstreams, webhooks, logs, custom_pages, browser_sessions, websites, auth_capture
from app.services.browser_session_service import browser_sessions as browser_session_service
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
@@ -97,6 +97,7 @@ app.include_router(logs.router)
app.include_router(custom_pages.router)
app.include_router(browser_sessions.router)
app.include_router(websites.router)
app.include_router(auth_capture.router)
@app.get("/healthz")
+120
View File
@@ -0,0 +1,120 @@
"""Auth capture API — remote browser for manual login + credential extraction."""
from __future__ import annotations
import logging
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
from app.database import get_db
from app.services.auth_capture_service import extract_all
from app.services.browser_session_service import (
BrowserDependencyError,
BrowserSessionError,
browser_sessions,
)
from app.utils.auth import get_current_user, get_user_from_token_param
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/auth-capture", tags=["auth-capture"])
class CaptureSessionCreate(BaseModel):
url: str = Field(..., description="Target login page URL to open in browser")
width: int = Field(default=1280, ge=320, le=2560)
height: int = Field(default=720, ge=240, le=1600)
class CaptureSessionResponse(BaseModel):
session_id: str
ws_url: str
class CaptureExtractResponse(BaseModel):
cookies: list[dict] = []
storage: dict[str, str] = {}
session_storage: dict[str, str] = {}
candidates: list[dict] = []
def _browser_error(exc: Exception) -> HTTPException:
if isinstance(exc, BrowserDependencyError):
return HTTPException(503, str(exc))
if isinstance(exc, BrowserSessionError):
return HTTPException(409, str(exc))
if isinstance(exc, KeyError):
return HTTPException(404, "session not found")
if isinstance(exc, ValueError):
return HTTPException(400, str(exc))
logger.exception("auth-capture error")
return HTTPException(500, "internal error")
def _ws_url(session_id: str, token: str) -> str:
"""Build WebSocket URL for the remote browser viewer."""
return f"/api/browser-sessions/{session_id}/ws?token={token}"
@router.post("/sessions", response_model=CaptureSessionResponse, status_code=201)
async def create_capture_session(
body: CaptureSessionCreate,
_=Depends(get_current_user),
):
"""Create a temporary browser session pointing at the given URL.
Returns a session_id and ws_url for the frontend to view/interact.
The user should manually log in, then call GET /extract.
"""
try:
session = await browser_sessions.create_ephemeral(
url=body.url,
width=body.width,
height=body.height,
)
except Exception as exc:
raise _browser_error(exc)
# Build a short-lived token for WS auth (reuse current user's token logic)
# The frontend already has the user's Bearer token, pass it via query param
return CaptureSessionResponse(
session_id=session.id,
ws_url=f"/api/browser-sessions/{session.id}/ws",
)
@router.get("/sessions/{session_id}/extract", response_model=CaptureExtractResponse)
async def extract_credentials(
session_id: str,
_=Depends(get_current_user),
):
"""Extract all auth credentials from the browser session.
Returns cookies, localStorage, sessionStorage, and curated candidates.
Candidate values are masked in logs.
"""
try:
session = browser_sessions.get_session(session_id)
except KeyError:
raise HTTPException(404, "session not found")
try:
result = await extract_all(session)
except Exception as exc:
raise _browser_error(exc)
return CaptureExtractResponse(**result)
@router.delete("/sessions/{session_id}", status_code=204)
async def close_capture_session(
session_id: str,
_=Depends(get_current_user),
):
"""Close and release the auth-capture browser session."""
try:
await browser_sessions.close(session_id)
except Exception as exc:
raise _browser_error(exc)
@@ -0,0 +1,141 @@
"""Auth credential extraction from remote browser sessions."""
from __future__ import annotations
import json
import logging
from typing import Any
logger = logging.getLogger(__name__)
async def extract_cookies(session: Any) -> list[dict[str, Any]]:
"""Extract cookies from the browser context."""
cookies = await session.context.cookies()
return [
{
"name": c.get("name", ""),
"value": c.get("value", ""),
"domain": c.get("domain", ""),
"httpOnly": c.get("httpOnly", False),
"secure": c.get("secure", False),
}
for c in cookies
]
async def extract_local_storage(page: Any) -> dict[str, str]:
"""Extract all localStorage items from the page origin."""
try:
raw = await page.evaluate("() => JSON.stringify(window.localStorage)")
if isinstance(raw, str):
return json.loads(raw)
return raw or {}
except Exception as exc:
logger.debug("localStorage extraction failed: %s", exc)
return {}
async def extract_session_storage(page: Any) -> dict[str, str]:
"""Extract all sessionStorage items from the page origin."""
try:
raw = await page.evaluate("() => JSON.stringify(window.sessionStorage)")
if isinstance(raw, str):
return json.loads(raw)
return raw or {}
except Exception as exc:
logger.debug("sessionStorage extraction failed: %s", exc)
return {}
async def extract_all(session: Any) -> dict[str, Any]:
"""Extract all possible auth credentials from a browser session.
Returns:
- cookies: list of cookie dicts
- storage: dict of localStorage key-values
- session_storage: dict of sessionStorage key-values
- candidates: curated list of likely auth tokens/credentials
"""
page = session.page
cookies = await extract_cookies(session)
local_storage = await extract_local_storage(page)
session_storage = await extract_session_storage(page)
candidates = _curate_candidates(cookies, local_storage, session_storage)
return {
"cookies": cookies,
"storage": local_storage,
"session_storage": session_storage,
"candidates": candidates,
}
def _curate_candidates(
cookies: list[dict[str, Any]],
local_storage: dict[str, str],
session_storage: dict[str, str],
) -> list[dict[str, Any]]:
"""Scan extracted data for likely bearer tokens and session cookies."""
candidates: list[dict[str, Any]] = []
# 1. localStorage / sessionStorage items that look like tokens
for store_name, store in [("localStorage", local_storage), ("sessionStorage", session_storage)]:
for key, val in store.items():
if not isinstance(val, str) or not val:
continue
key_lower = key.lower()
# Explicit auth keys
if any(k in key_lower for k in ("token", "jwt", "auth", "access", "secret", "api_key")):
_add_candidate(candidates, "bearer_token", f"{store_name}.{key}", val,
f"{store_name}.{key}")
# JWT-shaped strings (not in an auth-named key)
elif val.count(".") >= 2 and 20 < len(val) < 5000:
_add_candidate(candidates, "bearer_token", f"{store_name}.{key}", val,
f"{store_name}.{key} (JWT)")
# 2. Cookies that look like session/token cookies
cookie_keywords = ("session", "token", "jwt", "sid", "auth", "connect.sid", "gin_session", "tdc_itoken")
for c in cookies:
cname = c["name"].lower()
if any(k in cname for k in cookie_keywords):
_add_candidate(candidates, "cookie", f"cookie:{c['name']}", f"{c['name']}={c['value']}",
f"🍪 {c['name']} ({c['domain']})",
extra={"cookie_name": c["name"], "cookie_value": c["value"]})
# 3. Any localStorage key whose value looks like a sk-xxx key
for store_name, store in [("localStorage", local_storage), ("sessionStorage", session_storage)]:
for key, val in store.items():
if isinstance(val, str) and val.startswith("sk-") and len(val) > 10:
_add_candidate(candidates, "bearer_token", f"{store_name}.{key}", val,
f"{store_name}.{key} (sk-key)")
# Deduplicate by value
seen = set()
deduped = []
for c in candidates:
if c["value"] not in seen:
seen.add(c["value"])
deduped.append(c)
return deduped
def _add_candidate(
candidates: list[dict[str, Any]],
ctype: str,
source: str,
value: str,
label: str,
extra: dict | None = None,
) -> None:
"""Add a candidate, masking sensitive values in logs."""
logger.debug("auth-capture candidate: type=%s source=%s label=%s", ctype, source, label)
entry: dict[str, Any] = {
"type": ctype,
"source": source,
"value": value,
"label": label,
}
if extra:
entry.update(extra)
candidates.append(entry)
@@ -302,12 +302,15 @@ class BrowserSessionService:
continue
return None
def _get(self, session_id: str) -> BrowserSession:
def get_session(self, session_id: str) -> BrowserSession:
"""Retrieve a session by id — raises KeyError if missing."""
session = self._sessions.get(session_id)
if not session:
raise KeyError("browser session not found")
return session
_get = get_session # alias for internal use
def _ensure_open(self, session: BrowserSession) -> None:
if session.page.is_closed():
self._discard_session(session.id)
@@ -332,5 +335,47 @@ class BrowserSessionService:
safe_origin = re.sub(r"[^a-z0-9_.-]+", "_", origin).strip("_") or "page"
return f"page-{custom_page_id}-{safe_origin[:80]}"
async def create_ephemeral(
self,
url: str,
width: int = 1280,
height: int = 720,
) -> BrowserSession:
"""Create a temporary browser session without a CustomPage record.
The session uses an isolated random-named profile so it never collides
with persistent custom-page profiles. Caller MUST close() when done.
"""
if not url.startswith(("http://", "https://")):
raise ValueError("Only http/https URLs are allowed")
width = max(320, min(width, 2560))
height = max(240, min(height, 1600))
async with self._lock:
await self._ensure_playwright()
session_id = uuid4().hex
profile_key = f"auth-capture-{session_id[:12]}"
context = await self._playwright.chromium.launch_persistent_context(
str(self._profile_dir(profile_key)),
headless=get_settings().browser_headless,
viewport={"width": width, "height": height},
args=["--no-sandbox", "--disable-dev-shm-usage"],
)
page = context.pages[0] if context.pages else await context.new_page()
session = BrowserSession(
id=session_id,
custom_page_id=0,
profile_key=profile_key,
context=context,
page=page,
lock=asyncio.Lock(),
)
self._sessions[session.id] = session
try:
await page.goto(url, wait_until="domcontentloaded", timeout=45000)
except Exception:
await self.close(session.id)
raise
return session
browser_sessions = BrowserSessionService()