feat(auth-capture): full cookie bundle extraction + richer refresh-auth

Problem: Meow upstream uses Cloudflare, which sets cf_clearance + session
cookies that must all be sent together. The old code only captured a single
session-named cookie via a whitelist, discarding cf_clearance entirely, and
wrote back only 'name=value' instead of the full cookie string.

Changes:

auth_capture_service.py:
  - Add _cookie_matches_hostname(): hostname suffix matching supporting
    dot-prefixed domains (.saki.lat matches api.saki.lat)
  - Add _build_cookie_bundle(): collects ALL cookies matching the current
    page's hostname, returns complete 'name1=v1; name2=v2' string
  - _curate_candidates(): new 'cookie_bundle' candidate type (type=0 in sort,
    highest priority), carries cookie_count + cookie_names in extra fields
  - extract_all(): obtain real-time page URL from session.page.url and pass
    to _curate_candidates so cookie domain filtering is accurate
  - Sort order: cookie_bundle > cookie > bearer_token/api_key > credential
  - Fix bug in original JWT dedup check (was assigning instead of checking)

custom_pages.py:
  - Add logging import + logger
  - _pick_best_candidate(): cookie preferred_auth_type now tries cookie_bundle
    first, then single cookie; bearer/api_key use existing type_map logic
  - RefreshAuthResponse: add optional 'warning' field
  - refresh_auth(): handle ctype='cookie_bundle' same as 'cookie'; always
    write full candidate.value as cookie_string (works for both types)
  - Post-write validation: attempt get_available_groups with new credentials;
    on failure, still commit (lenient mode) but set warning message explaining
    cf_clearance IP-binding as the likely cause; success logs at INFO level

Tests (test_auth_capture.py, 19 cases):
  - _cookie_matches_hostname: exact, dot-prefix subdomain, empty domain,
    different domain, evil-subdomain partial match rejection
  - _build_cookie_bundle: cf_clearance included, cross-domain excluded,
    single cookie, empty value excluded, no cookies
  - _curate_candidates: bundle ranks first, value is full string, bundle
    beats single session cookie, bearer wins when no cookies, empty case,
    cookie_count/cookie_names in extra, session fallback preserved,
    new_api_user propagation to bundle

All 46 tests pass.
This commit is contained in:
liumangmang
2026-06-02 09:32:23 +08:00
parent f17317b13c
commit 4f9acdc99c
3 changed files with 352 additions and 17 deletions
+56 -12
View File
@@ -1,6 +1,7 @@
"""Custom pages CRUD router + authenticated iframe proxy.""" """Custom pages CRUD router + authenticated iframe proxy."""
from __future__ import annotations from __future__ import annotations
import logging
import re import re
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any, List, Literal, Optional from typing import Any, List, Literal, Optional
@@ -21,6 +22,8 @@ from app.services.auth_capture_service import extract_all
from app.services.browser_session_service import browser_sessions from app.services.browser_session_service import browser_sessions
from app.utils.auth import decode_token, get_current_user, get_user_from_token_param from app.utils.auth import decode_token, get_current_user, get_user_from_token_param
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/custom-pages", tags=["custom-pages"]) router = APIRouter(prefix="/api/custom-pages", tags=["custom-pages"])
# Headers that prevent iframe embedding — strip them from proxied responses # Headers that prevent iframe embedding — strip them from proxied responses
@@ -215,17 +218,29 @@ import json as _json
class RefreshAuthResponse(BaseModel): class RefreshAuthResponse(BaseModel):
success: bool success: bool
message: str message: str
warning: Optional[str] = None
def _pick_best_candidate(candidates: list[dict], preferred_auth_type: str) -> Optional[dict]: def _pick_best_candidate(candidates: list[dict], preferred_auth_type: str) -> Optional[dict]:
if not candidates: if not candidates:
return None return None
type_map = {"cookie": "cookie", "bearer": "bearer_token", "api_key": "api_key"} # cookie_bundle > cookie > bearer_token > api_key
preferred = type_map.get(preferred_auth_type) # preferred_auth_type="cookie" 时优先匹配 bundle,其次单 cookie
if preferred: if preferred_auth_type == "cookie":
for c in candidates: for c in candidates:
if c["type"] == preferred: if c["type"] == "cookie_bundle":
return c return c
for c in candidates:
if c["type"] == "cookie":
return c
elif preferred_auth_type in ("bearer", "api_key"):
type_map = {"bearer": "bearer_token", "api_key": "api_key"}
preferred = type_map.get(preferred_auth_type)
if preferred:
for c in candidates:
if c["type"] == preferred:
return c
# fallback:排序后取第一个
return candidates[0] return candidates[0]
@@ -260,12 +275,10 @@ async def refresh_auth(pid: int, db: Session = Depends(get_db), _=Depends(get_cu
existing_config = _json.loads(upstream.auth_config_json or "{}") existing_config = _json.loads(upstream.auth_config_json or "{}")
ctype = candidate["type"] ctype = candidate["type"]
if ctype == "cookie": if ctype in ("cookie_bundle", "cookie"):
upstream.auth_type = "cookie" upstream.auth_type = "cookie"
if candidate.get("cookie_name") and candidate.get("cookie_value"): # cookie_bundle.value 已是完整 cookie_stringcookie.value 是 "name=value" 格式
existing_config["cookie_string"] = f"{candidate['cookie_name']}={candidate['cookie_value']}" existing_config["cookie_string"] = candidate.get("value", "")
else:
existing_config["cookie_string"] = candidate.get("value", "")
if candidate.get("new_api_user"): if candidate.get("new_api_user"):
existing_config["new_api_user"] = candidate["new_api_user"] existing_config["new_api_user"] = candidate["new_api_user"]
elif ctype == "bearer_token": elif ctype == "bearer_token":
@@ -281,7 +294,7 @@ async def refresh_auth(pid: int, db: Session = Depends(get_db), _=Depends(get_cu
except UnicodeEncodeError: except UnicodeEncodeError:
return RefreshAuthResponse( return RefreshAuthResponse(
success=False, success=False,
message=f"提取到的 Token 含有非 HTTP 标头字符,请确认已在远程浏览器中正确登录并重试", message="提取到的 Token 含有非 HTTP 标头字符,请确认已在远程浏览器中正确登录并重试",
) )
existing_config["token"] = token existing_config["token"] = token
elif ctype == "api_key": elif ctype == "api_key":
@@ -291,9 +304,40 @@ async def refresh_auth(pid: int, db: Session = Depends(get_db), _=Depends(get_cu
upstream.auth_config_json = _json.dumps(existing_config, ensure_ascii=False) upstream.auth_config_json = _json.dumps(existing_config, ensure_ascii=False)
upstream.updated_at = datetime.now(timezone.utc) upstream.updated_at = datetime.now(timezone.utc)
db.commit()
return RefreshAuthResponse(success=True, message=f"凭证已刷新 ({upstream.auth_type})") # ── 宽松验证:写回后尝试调用 get_available_groups 验证凭证可用性 ──
# 失败时仍然 commit(凭证已写入),但在 message 里说明验证失败
# 这样用户仍能看到新凭证已写入,便于 debugcf_clearance 绑 IP 时验证必然失败)
warning_msg: Optional[str] = None
try:
from app.services.upstream_client import UpstreamClient
groups_endpoint = upstream.groups_endpoint or "/groups/available"
new_auth_config = _json.loads(upstream.auth_config_json)
with UpstreamClient(
base_url=upstream.base_url,
api_prefix=upstream.api_prefix or "",
auth_type=upstream.auth_type,
auth_config=new_auth_config,
timeout=float(upstream.timeout_seconds or 30),
) as uc:
uc.get_available_groups(groups_endpoint)
logger.info("refresh_auth: upstream %s credential verification passed", upstream.id)
except Exception as exc:
warning_msg = (
f"凭证已写入但 API 验证失败:{exc}"
"若 SmartUp 与远程浏览器不在同一 IPcf_clearance 可能无法复用,请手动测试连接。"
)
logger.warning(
"refresh_auth: upstream %s credential verification failed (written anyway): %s",
upstream.id, exc,
)
db.commit()
auth_type_label = upstream.auth_type
cookie_count = candidate.get("cookie_count", "")
count_str = f"{cookie_count} 个 cookie" if cookie_count else ""
success_msg = f"凭证已刷新 ({auth_type_label}{count_str})"
return RefreshAuthResponse(success=True, message=success_msg, warning=warning_msg)
# ---- Frame Proxy (simple: strip X-Frame-Options / CSP, pass through content) ---- # ---- Frame Proxy (simple: strip X-Frame-Options / CSP, pass through content) ----
+84 -5
View File
@@ -4,6 +4,7 @@ from __future__ import annotations
import json import json
import logging import logging
from typing import Any from typing import Any
from urllib.parse import urlparse
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -110,7 +111,18 @@ async def extract_all(session: Any) -> dict[str, Any]:
session_storage = await extract_session_storage(page) session_storage = await extract_session_storage(page)
auth_headers = await extract_request_headers(session) auth_headers = await extract_request_headers(session)
new_api_user = _find_new_api_user(local_storage, session_storage) or await extract_new_api_user_id(page) new_api_user = _find_new_api_user(local_storage, session_storage) or await extract_new_api_user_id(page)
candidates = _curate_candidates(cookies, local_storage, session_storage, auth_headers, new_api_user)
# 获取当前浏览器页面的真实 URL(比 session.url 更准确)
page_url = ""
try:
page_url = page.url or ""
except Exception:
pass
candidates = _curate_candidates(
cookies, local_storage, session_storage, auth_headers, new_api_user,
page_url=page_url,
)
return { return {
"cookies": cookies, "cookies": cookies,
@@ -121,17 +133,83 @@ async def extract_all(session: Any) -> dict[str, Any]:
} }
def _cookie_matches_hostname(cookie_domain: str, hostname: str) -> bool:
"""判断 cookie domain 是否适用于给定 hostname。
支持带点前缀的 domain(如 `.saki.lat` 匹配 `api.saki.lat`)。
"""
if not cookie_domain or not hostname:
return True # 无 domain 限制时视为全域
# 去掉前缀点
domain = cookie_domain.lstrip(".")
return hostname == domain or hostname.endswith("." + domain)
def _build_cookie_bundle(
cookies: list[dict[str, Any]],
page_url: str,
) -> tuple[str, list[str]]:
"""按 page_url 的 hostname 过滤并组装完整 cookie 字符串。
返回 (cookie_string, cookie_names_list)。
cookie_string 格式:name1=value1; name2=value2; ...
过滤掉空值 cookie。
"""
hostname = ""
if page_url:
try:
hostname = urlparse(page_url).hostname or ""
except Exception:
pass
parts: list[str] = []
names: list[str] = []
for c in cookies:
name = c.get("name", "")
value = c.get("value", "")
domain = c.get("domain", "")
if not name or not value:
continue
if hostname and not _cookie_matches_hostname(domain, hostname):
continue
parts.append(f"{name}={value}")
names.append(name)
return "; ".join(parts), names
def _curate_candidates( def _curate_candidates(
cookies: list[dict[str, Any]], cookies: list[dict[str, Any]],
local_storage: dict[str, str], local_storage: dict[str, str],
session_storage: dict[str, str], session_storage: dict[str, str],
auth_headers: list[dict[str, str]], auth_headers: list[dict[str, str]],
new_api_user: str = "", new_api_user: str = "",
page_url: str = "",
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
"""Scan extracted data for likely credentials with confidence scoring.""" """Scan extracted data for likely credentials with confidence scoring."""
candidates: list[dict[str, Any]] = [] candidates: list[dict[str, Any]] = []
# 1. CDP-captured network headers (highest confidence) # 0. 完整 Cookie Bundle(最高优先级)
# 按页面 origin 收集所有相关 cookie,包含 cf_clearance 等 Cloudflare cookie
cookie_string, cookie_names = _build_cookie_bundle(cookies, page_url)
if cookie_string:
bundle_extra: dict[str, Any] = {
"cookie_count": len(cookie_names),
"cookie_names": cookie_names,
}
if new_api_user:
bundle_extra["new_api_user"] = new_api_user
_add(
candidates, "cookie_bundle",
f"bundle:{page_url[:60]}",
cookie_string,
f"[{len(cookie_names)} cookies: {', '.join(cookie_names[:5])}{'' if len(cookie_names) > 5 else ''}]",
f"完整 Cookie 组({len(cookie_names)} 个)",
98,
extra=bundle_extra,
)
# 1. CDP-captured network headers (high confidence)
seen = set() seen = set()
for h in auth_headers: for h in auth_headers:
dedup_key = h["value"] dedup_key = h["value"]
@@ -166,7 +244,7 @@ def _curate_candidates(
# Looks like a JWT (xx.yy.zz format) # Looks like a JWT (xx.yy.zz format)
if val.count(".") >= 2 and 20 < len(val) < 5000: if val.count(".") >= 2 and 20 < len(val) < 5000:
if dedup_key := val not in seen: if val not in seen:
seen.add(val) seen.add(val)
_add(candidates, "bearer_token", f"{store_name}.{key}", val, _preview(val), _add(candidates, "bearer_token", f"{store_name}.{key}", val, _preview(val),
f"{store_name}.{key} (JWT)", 80) f"{store_name}.{key} (JWT)", 80)
@@ -179,7 +257,7 @@ def _curate_candidates(
if not new_api_user: if not new_api_user:
new_api_user = _find_new_api_user(local_storage, session_storage) new_api_user = _find_new_api_user(local_storage, session_storage)
# 3. Session cookies # 3. 单个 Session cookie(保留,供独立 fallback / bearer 降级使用)
for c in cookies: for c in cookies:
cname = c["name"].lower() cname = c["name"].lower()
if any(k in cname for k in SESSION_COOKIE_NAMES): if any(k in cname for k in SESSION_COOKIE_NAMES):
@@ -193,8 +271,9 @@ def _curate_candidates(
f"Cookie {c['name']} ({c['domain']})", confidence, f"Cookie {c['name']} ({c['domain']})", confidence,
extra=extra) extra=extra)
# 排序:cookie_bundle 最高 → cookie → bearer/api_key → 其他
candidates.sort(key=lambda item: ( candidates.sort(key=lambda item: (
0 if item.get("type") == "cookie" and item.get("cookie_name") == "session" else 0 if item.get("type") == "cookie_bundle" else
1 if item.get("type") == "cookie" else 1 if item.get("type") == "cookie" else
2, 2,
-int(item.get("confidence") or 0), -int(item.get("confidence") or 0),
+212
View File
@@ -0,0 +1,212 @@
"""
auth_capture_service 单元测试
覆盖场景:
- 完整 cookie bundle(含 cf_clearance、session
- domain 后缀匹配(.saki.lat → api.saki.lat
- 仅 1 个 cookie 也生成 bundle
- 无 cookies 时 bearer_token 排第一
- bundle 优先级高于单 session cookie
- 现有 bearer / api_key 提取路径不受影响
"""
from __future__ import annotations
import pytest
from app.services.auth_capture_service import (
_build_cookie_bundle,
_cookie_matches_hostname,
_curate_candidates,
)
# ── _cookie_matches_hostname ──────────────────────────────────────────────────
def test_exact_hostname_match():
assert _cookie_matches_hostname("saki.lat", "saki.lat")
def test_dot_prefix_subdomain_match():
"""`.saki.lat` 应该匹配 `api.saki.lat`。"""
assert _cookie_matches_hostname(".saki.lat", "api.saki.lat")
def test_dot_prefix_exact_match():
"""`saki.lat` 仍应匹配 `saki.lat`(无前缀点)。"""
assert _cookie_matches_hostname(".saki.lat", "saki.lat")
def test_no_domain_matches_all():
"""空 domain 视为不限制。"""
assert _cookie_matches_hostname("", "anything.example.com")
def test_different_domain_no_match():
assert not _cookie_matches_hostname(".example.com", "saki.lat")
def test_partial_suffix_no_match():
"""evil-saki.lat 不应匹配 .saki.lat。"""
assert not _cookie_matches_hostname(".saki.lat", "evil-saki.lat")
# ── _build_cookie_bundle ──────────────────────────────────────────────────────
def _make_cookies(*items):
"""构造 cookie 列表,items 是 (name, value, domain) 元组。"""
return [
{"name": n, "value": v, "domain": d, "httpOnly": False, "secure": True}
for n, v, d in items
]
def test_bundle_includes_cf_clearance():
cookies = _make_cookies(
("cf_clearance", "abc123", ".saki.lat"),
("session", "sess456", ".saki.lat"),
("csrfToken", "csrf789", ".saki.lat"),
)
cookie_str, names = _build_cookie_bundle(cookies, "https://api.saki.lat/login")
assert "cf_clearance=abc123" in cookie_str
assert "session=sess456" in cookie_str
assert "csrfToken=csrf789" in cookie_str
assert "cf_clearance" in names
assert "session" in names
def test_bundle_excludes_unrelated_domain():
cookies = _make_cookies(
("session", "mine", ".saki.lat"),
("other_session", "theirs", ".example.com"),
)
cookie_str, names = _build_cookie_bundle(cookies, "https://api.saki.lat/login")
assert "mine" in cookie_str
assert "theirs" not in cookie_str
assert "other_session" not in names
def test_single_cookie_generates_bundle():
cookies = _make_cookies(("session", "onlyme", ".saki.lat"))
cookie_str, names = _build_cookie_bundle(cookies, "https://saki.lat/login")
assert cookie_str == "session=onlyme"
assert names == ["session"]
def test_empty_value_cookie_excluded():
cookies = _make_cookies(
("session", "valid", ".saki.lat"),
("empty_cookie", "", ".saki.lat"),
)
cookie_str, names = _build_cookie_bundle(cookies, "https://saki.lat/")
assert "empty_cookie" not in names
assert "empty_cookie" not in cookie_str
def test_no_cookies_returns_empty_bundle():
cookie_str, names = _build_cookie_bundle([], "https://saki.lat/")
assert cookie_str == ""
assert names == []
# ── _curate_candidates ────────────────────────────────────────────────────────
def _base_candidates(cookies, page_url="", auth_headers=None):
return _curate_candidates(
cookies=cookies,
local_storage={},
session_storage={},
auth_headers=auth_headers or [],
new_api_user="",
page_url=page_url,
)
def test_cookie_bundle_ranks_first():
"""有 cookies 时,cookie_bundle 候选应排在第一。"""
cookies = _make_cookies(
("cf_clearance", "cf_val", ".saki.lat"),
("session", "sess_val", ".saki.lat"),
)
candidates = _base_candidates(cookies, page_url="https://api.saki.lat/login")
assert len(candidates) > 0
assert candidates[0]["type"] == "cookie_bundle"
def test_cookie_bundle_value_is_full_string():
"""cookie_bundle 的 value 应是完整 cookie 字符串。"""
cookies = _make_cookies(
("cf_clearance", "cf_val", ".saki.lat"),
("session", "sess_val", ".saki.lat"),
)
candidates = _base_candidates(cookies, page_url="https://api.saki.lat/")
bundle = next(c for c in candidates if c["type"] == "cookie_bundle")
assert "cf_clearance=cf_val" in bundle["value"]
assert "session=sess_val" in bundle["value"]
def test_bundle_prioritized_over_single_session_cookie():
"""cookie_bundle 候选应排在单 session cookie 候选之前。"""
cookies = _make_cookies(
("session", "sess_val", ".saki.lat"),
("cf_clearance", "cf_val", ".saki.lat"),
)
candidates = _base_candidates(cookies, page_url="https://api.saki.lat/")
types = [c["type"] for c in candidates]
bundle_idx = types.index("cookie_bundle")
cookie_idx = types.index("cookie") if "cookie" in types else len(types)
assert bundle_idx < cookie_idx
def test_bearer_token_wins_when_no_cookies():
"""无 cookies 时,bearer_token 应排在第一(如果有 auth_headers)。"""
auth_headers = [
{"type": "authorization", "value": "eyJ.abc.def", "url": "https://api.saki.lat/api/v1/groups"}
]
candidates = _base_candidates([], page_url="https://api.saki.lat/", auth_headers=auth_headers)
assert len(candidates) > 0
assert candidates[0]["type"] == "bearer_token"
def test_no_cookies_no_headers_returns_empty():
"""无 cookies、无 storage、无 headers → 候选列表为空。"""
candidates = _base_candidates([], page_url="https://api.saki.lat/")
assert candidates == []
def test_bundle_extra_contains_cookie_count_and_names():
"""cookie_bundle 候选的 extra 应包含 cookie_count 和 cookie_names。"""
cookies = _make_cookies(
("cf_clearance", "cf_val", ".saki.lat"),
("session", "sess_val", ".saki.lat"),
)
candidates = _base_candidates(cookies, page_url="https://api.saki.lat/")
bundle = next(c for c in candidates if c["type"] == "cookie_bundle")
assert bundle.get("cookie_count") == 2
assert set(bundle.get("cookie_names", [])) == {"cf_clearance", "session"}
def test_single_session_cookie_still_present_as_fallback():
"""session cookie 应同时出现在候选列表中(作为 fallback)。"""
cookies = _make_cookies(
("session", "sess_val", ".saki.lat"),
("cf_clearance", "cf_val", ".saki.lat"),
)
candidates = _base_candidates(cookies, page_url="https://api.saki.lat/")
types = [c["type"] for c in candidates]
assert "cookie_bundle" in types
assert "cookie" in types
def test_new_api_user_propagated_to_bundle():
"""new_api_user 应写入 cookie_bundle 的 extra 字段。"""
cookies = _make_cookies(("session", "s", ".saki.lat"))
candidates = _curate_candidates(
cookies=cookies,
local_storage={"uid": "42"},
session_storage={},
auth_headers=[],
new_api_user="42",
page_url="https://saki.lat/",
)
bundle = next(c for c in candidates if c["type"] == "cookie_bundle")
assert bundle.get("new_api_user") == "42"