feat: add multi-tab support to remote browser

This commit is contained in:
liumangmang
2026-05-30 09:51:51 +08:00
parent 5c20ddc8e6
commit 3ab3a5e26f
7 changed files with 440 additions and 16 deletions
+34
View File
@@ -34,11 +34,21 @@ class BrowserSessionCreate(BaseModel):
height: int = Field(default=720)
class BrowserTabResponse(BaseModel):
id: str
title: str
url: str
created_at: float
class BrowserSessionResponse(BaseModel):
id: str
custom_page_id: int
url: str
title: str
active_tab_id: Optional[str] = None
tabs: Optional[list[BrowserTabResponse]] = None
tab_revision: Optional[int] = 0
class BrowserSelectionResponse(BaseModel):
@@ -123,6 +133,22 @@ async def send_event(session_id: str, body: BrowserEvent, _=Depends(get_current_
raise _error_from_browser(exc)
@router.post("/{session_id}/tabs/{tab_id}/activate", response_model=BrowserSessionResponse)
async def activate_tab(session_id: str, tab_id: str, _=Depends(get_current_user)):
try:
return await browser_sessions.activate_tab(session_id, tab_id)
except Exception as exc:
raise _error_from_browser(exc)
@router.delete("/{session_id}/tabs/{tab_id}", response_model=BrowserSessionResponse)
async def close_tab(session_id: str, tab_id: str, _=Depends(get_current_user)):
try:
return await browser_sessions.close_tab(session_id, tab_id)
except Exception as exc:
raise _error_from_browser(exc)
@router.get("/{session_id}/selection", response_model=BrowserSelectionResponse)
async def get_selection(session_id: str, _=Depends(get_current_user)):
try:
@@ -314,6 +340,7 @@ async def session_ws(
# Task: push screenshots
async def push_loop():
nonlocal last_frame_hash, unchanged_count
last_tab_revision = -1
try:
while True:
now = asyncio.get_event_loop().time()
@@ -327,6 +354,13 @@ async def session_ws(
interval = _WS_IDLE_INTERVAL
try:
# Check for tab state changes
session_obj = browser_sessions.get_session(session_id)
if session_obj.tab_revision != last_tab_revision:
last_tab_revision = session_obj.tab_revision
state = await browser_sessions.state(session_id)
await websocket.send_json({"type": "state", "session": state})
frame = await browser_sessions.screenshot(session_id)
except KeyError:
await websocket.send_json({"error": "session_not_found"})
+139 -6
View File
@@ -24,18 +24,35 @@ class BrowserSessionError(RuntimeError):
"""Raised when an existing browser session can no longer be used."""
@dataclass
class BrowserTab:
id: str
page: Any
created_at: float
@dataclass
class BrowserSession:
id: str
custom_page_id: int
profile_key: str
context: Any
page: Any
tabs: dict[str, BrowserTab]
active_tab_id: str
lock: asyncio.Lock
tab_revision: int = 0
cdp_session: Any = None
captured_headers: list[dict] = None # auth headers from CDP
last_saved_state_at: float = 0.0
@property
def active_tab(self) -> BrowserTab:
return self.tabs[self.active_tab_id]
@property
def page(self) -> Any:
return self.active_tab.page
class BrowserSessionService:
# Idle TTL: close sessions that haven't had activity for this long
@@ -110,17 +127,22 @@ class BrowserSessionService:
except Exception:
logger.debug("clipboard permission grant failed (non-fatal)")
page = context.pages[0] if context.pages else await context.new_page()
tab_id = uuid4().hex
tab = BrowserTab(id=tab_id, page=page, created_at=asyncio.get_event_loop().time())
session = BrowserSession(
id=uuid4().hex,
custom_page_id=custom_page_id,
profile_key=profile_key,
context=context,
page=page,
tabs={tab_id: tab},
active_tab_id=tab_id,
lock=asyncio.Lock(),
)
self._sessions[session.id] = session
self._profiles[profile_key] = session.id
self._touch(session.id)
# Register page capture for multi-tab support
context.on("page", lambda p: self._handle_new_page(session, p))
# Evict again after adding the new session so cap is enforced immediately
await self._evict_idle_sessions()
try:
@@ -136,6 +158,33 @@ class BrowserSessionService:
"""Mark a session as recently active (reset idle timer)."""
self._last_event_at[session_id] = asyncio.get_event_loop().time()
def _handle_new_page(self, session: BrowserSession, page: Any) -> None:
"""Capture a new page opened by the remote browser (e.g. target="_blank")."""
tab_id = uuid4().hex
tab = BrowserTab(id=tab_id, page=page, created_at=asyncio.get_event_loop().time())
session.tabs[tab_id] = tab
session.active_tab_id = tab_id
session.tab_revision += 1
logger.info("session %s: captured new tab %s (total: %d)", session.id[:12], tab_id[:8], len(session.tabs))
# Best-effort: bring to front and reset zoom
asyncio.create_task(self._init_new_tab(session, tab))
async def _init_new_tab(self, session: BrowserSession, tab: BrowserTab) -> None:
try:
await tab.page.bring_to_front()
await self._reset_page_zoom(session)
# Grant clipboard permission for the new page's origin if possible
try:
url = tab.page.url
if url.startswith("http"):
parsed = urlparse(url)
origin = f"{parsed.scheme}://{parsed.netloc}"
await session.context.grant_permissions(["clipboard-read", "clipboard-write"], origin=origin)
except Exception:
pass
except Exception as exc:
logger.debug("new tab %s init failed: %s", tab.id[:8], exc)
async def screenshot(self, session_id: str) -> bytes:
session = self._get(session_id)
self._touch(session_id)
@@ -309,12 +358,82 @@ class BrowserSessionService:
self._ensure_open(session)
return await self._session_state(session)
async def activate_tab(self, session_id: str, tab_id: str) -> dict[str, Any]:
session = self._get(session_id)
self._touch(session_id)
async with session.lock:
self._ensure_open(session)
if tab_id not in session.tabs:
raise KeyError("tab not found")
session.active_tab_id = tab_id
session.tab_revision += 1
await session.page.bring_to_front()
return await self._session_state(session)
async def close_tab(self, session_id: str, tab_id: str) -> dict[str, Any]:
session = self._get(session_id)
self._touch(session_id)
async with session.lock:
self._ensure_open(session)
if tab_id not in session.tabs:
raise KeyError("tab not found")
if len(session.tabs) <= 1:
raise ValueError("cannot close the last tab")
tab = session.tabs.pop(tab_id)
try:
await tab.page.close()
except Exception:
pass
if session.active_tab_id == tab_id:
# Pick the latest remaining tab
latest = max(session.tabs.values(), key=lambda t: t.created_at)
session.active_tab_id = latest.id
await session.page.bring_to_front()
session.tab_revision += 1
return await self._session_state(session)
async def _session_state(self, session: BrowserSession) -> dict[str, Any]:
tabs = []
# We might need to prune closed pages during state generation too
closed_ids = []
for tid, tab in session.tabs.items():
if tab.page.is_closed():
closed_ids.append(tid)
continue
try:
title = await tab.page.title()
url = tab.page.url
except Exception:
title, url = "Loading...", "about:blank"
tabs.append({
"id": tid,
"title": title,
"url": url,
"created_at": tab.created_at,
})
if closed_ids:
for cid in closed_ids:
session.tabs.pop(cid, None)
if not session.tabs:
raise BrowserSessionError("all browser pages are closed")
if session.active_tab_id in closed_ids:
latest = max(session.tabs.values(), key=lambda t: t.created_at)
session.active_tab_id = latest.id
session.tab_revision += 1
tabs.sort(key=lambda x: x["created_at"])
return {
"id": session.id,
"custom_page_id": session.custom_page_id,
"url": session.page.url,
"title": await session.page.title(),
"active_tab_id": session.active_tab_id,
"tabs": tabs,
"tab_revision": session.tab_revision,
}
async def _ensure_playwright(self) -> None:
@@ -470,9 +589,18 @@ class BrowserSessionService:
_get = get_session # alias for internal use
def _ensure_open(self, session: BrowserSession) -> None:
if session.page.is_closed():
self._discard_session(session.id)
raise BrowserSessionError("browser page is closed")
if session.active_tab.page.is_closed():
# Current tab closed? Try to cleanup and find another one
session.tabs.pop(session.active_tab_id, None)
if session.tabs:
# Pick the latest created tab
latest = max(session.tabs.values(), key=lambda t: t.created_at)
session.active_tab_id = latest.id
session.tab_revision += 1
logger.info("active tab closed, switched to %s", latest.id[:8])
else:
self._discard_session(session.id)
raise BrowserSessionError("all browser pages are closed")
def _discard_session(self, session_id: str) -> BrowserSession | None:
session = self._sessions.pop(session_id, None)
@@ -592,17 +720,22 @@ class BrowserSessionService:
except Exception:
logger.debug("clipboard permission grant failed (non-fatal)")
page = context.pages[0] if context.pages else await context.new_page()
tab_id = uuid4().hex
tab = BrowserTab(id=tab_id, page=page, created_at=asyncio.get_event_loop().time())
session = BrowserSession(
id=session_id,
custom_page_id=0,
profile_key=profile_key,
context=context,
page=page,
tabs={tab_id: tab},
active_tab_id=tab_id,
lock=asyncio.Lock(),
captured_headers=[],
)
self._sessions[session.id] = session
self._touch(session.id)
# Register page capture
context.on("page", lambda p: self._handle_new_page(session, p))
# Start CDP network capture BEFORE the initial page load,
# so we capture login redirects and auth headers from the start.
await self._start_cdp_capture(session)
+13 -8
View File
@@ -159,7 +159,7 @@ def test_screenshot_throttled_save():
try:
from unittest.mock import AsyncMock, MagicMock
from app.services.browser_session_service import BrowserSession
from app.services.browser_session_service import BrowserSession, BrowserTab
# Mock Context & Page
fake_context = AsyncMock()
@@ -172,7 +172,8 @@ def test_screenshot_throttled_save():
custom_page_id=1,
profile_key="page-1-test",
context=fake_context,
page=fake_page,
tabs={"main": BrowserTab(id="main", page=fake_page, created_at=0.0)},
active_tab_id="main",
lock=asyncio.Lock(),
last_saved_state_at=0.0
)
@@ -204,7 +205,8 @@ def test_screenshot_throttled_save():
custom_page_id=0,
profile_key="auth-capture-xyz",
context=fake_context,
page=fake_page,
tabs={"main": BrowserTab(id="main", page=fake_page, created_at=0.0)},
active_tab_id="main",
lock=asyncio.Lock(),
last_saved_state_at=0.0
)
@@ -228,7 +230,7 @@ def test_close_saves_state_and_cleans_up():
try:
from unittest.mock import AsyncMock, MagicMock
from app.services.browser_session_service import BrowserSession
from app.services.browser_session_service import BrowserSession, BrowserTab
fake_context = AsyncMock()
fake_page = MagicMock()
@@ -240,7 +242,8 @@ def test_close_saves_state_and_cleans_up():
custom_page_id=2,
profile_key="page-2-test",
context=fake_context,
page=fake_page,
tabs={"main": BrowserTab(id="main", page=fake_page, created_at=0.0)},
active_tab_id="main",
lock=asyncio.Lock(),
last_saved_state_at=time.monotonic() # 此时在限流内
)
@@ -260,7 +263,8 @@ def test_close_saves_state_and_cleans_up():
custom_page_id=0,
profile_key="auth-capture-abc",
context=eph_context,
page=eph_page,
tabs={"main": BrowserTab(id="main", page=eph_page, created_at=0.0)},
active_tab_id="main",
lock=asyncio.Lock(),
last_saved_state_at=0.0
)
@@ -382,7 +386,7 @@ def test_websocket_event_saves_state():
try:
from unittest.mock import AsyncMock, MagicMock
from app.services.browser_session_service import BrowserSession
from app.services.browser_session_service import BrowserSession, BrowserTab
fake_context = AsyncMock()
fake_page = MagicMock()
@@ -395,7 +399,8 @@ def test_websocket_event_saves_state():
custom_page_id=3,
profile_key="page-3-ws-test",
context=fake_context,
page=fake_page,
tabs={"main": BrowserTab(id="main", page=fake_page, created_at=0.0)},
active_tab_id="main",
lock=asyncio.Lock(),
last_saved_state_at=0.0
)
+141
View File
@@ -0,0 +1,141 @@
import asyncio
import json
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from app.services.browser_session_service import BrowserSessionService, BrowserSession, BrowserTab, BrowserSessionError
@pytest.fixture
def service():
return BrowserSessionService()
@pytest.fixture
def session(service):
fake_context = AsyncMock()
fake_page = AsyncMock()
fake_page.is_closed = MagicMock(return_value=False)
fake_page.url = "https://initial.test"
fake_page.title = AsyncMock(return_value="Initial Tab")
tab_id = "tab1"
tab = BrowserTab(id=tab_id, page=fake_page, created_at=100.0)
sess = BrowserSession(
id="sess123",
custom_page_id=1,
profile_key="profile1",
context=fake_context,
tabs={tab_id: tab},
active_tab_id=tab_id,
lock=asyncio.Lock(),
)
service._sessions[sess.id] = sess
return sess
@pytest.mark.asyncio
async def test_tab_capture(service, session):
new_page = AsyncMock()
new_page.is_closed = MagicMock(return_value=False)
new_page.url = "https://new.test"
new_page.title = AsyncMock(return_value="New Tab")
new_page.bring_to_front = AsyncMock()
# Simulate page capture
service._handle_new_page(session, new_page)
assert len(session.tabs) == 2
assert session.active_tab_id != "tab1"
new_tab_id = session.active_tab_id
assert session.tabs[new_tab_id].page == new_page
assert session.tab_revision == 1
# Wait a bit for the background task _init_new_tab to finish if possible,
# though it's mocked anyway.
await asyncio.sleep(0.1)
@pytest.mark.asyncio
async def test_activate_tab(service, session):
new_page = AsyncMock()
new_page.is_closed = MagicMock(return_value=False)
new_page.bring_to_front = AsyncMock()
tab2_id = "tab2"
session.tabs[tab2_id] = BrowserTab(id=tab2_id, page=new_page, created_at=200.0)
await service.activate_tab(session.id, tab2_id)
assert session.active_tab_id == tab2_id
assert new_page.bring_to_front.call_count == 1
assert session.tab_revision == 1
@pytest.mark.asyncio
async def test_close_tab_safety(service, session):
# Cannot close last tab
with pytest.raises(ValueError, match="cannot close the last tab"):
await service.close_tab(session.id, "tab1")
@pytest.mark.asyncio
async def test_close_active_tab_fallback(service, session):
# Setup tab2
page2 = AsyncMock()
page2.is_closed = MagicMock(return_value=False)
page2.bring_to_front = AsyncMock()
tab2_id = "tab2"
session.tabs[tab2_id] = BrowserTab(id=tab2_id, page=page2, created_at=200.0)
# Active is tab2
session.active_tab_id = tab2_id
# Close active tab2
await service.close_tab(session.id, tab2_id)
assert len(session.tabs) == 1
assert session.active_tab_id == "tab1"
assert tab2_id not in session.tabs
assert session.tabs["tab1"].page.bring_to_front.call_count == 1
@pytest.mark.asyncio
async def test_session_state_includes_tabs(service, session):
# Setup tab2
page2 = AsyncMock()
page2.is_closed = MagicMock(return_value=False)
page2.url = "https://tab2.test"
page2.title = AsyncMock(return_value="Tab 2")
tab2_id = "tab2"
session.tabs[tab2_id] = BrowserTab(id=tab2_id, page=page2, created_at=200.0)
state = await service.state(session.id)
assert state["id"] == session.id
assert state["active_tab_id"] == "tab1"
assert len(state["tabs"]) == 2
# Tabs should be sorted by created_at
assert state["tabs"][0]["id"] == "tab1"
assert state["tabs"][1]["id"] == "tab2"
assert state["tabs"][0]["title"] == "Initial Tab"
assert state["tabs"][1]["url"] == "https://tab2.test"
@pytest.mark.asyncio
async def test_ensure_open_prunes_closed_tab(service, session):
# Setup tab2 and make it active
page2 = AsyncMock()
page2.is_closed = MagicMock(return_value=True) # Page 2 is closed
tab2_id = "tab2"
session.tabs[tab2_id] = BrowserTab(id=tab2_id, page=page2, created_at=200.0)
session.active_tab_id = tab2_id
# Calling any interaction method should trigger _ensure_open
# and fallback to tab1
await service.screenshot(session.id)
assert session.active_tab_id == "tab1"
assert tab2_id not in session.tabs
assert session.tab_revision == 1
@pytest.mark.asyncio
async def test_ensure_open_discards_session_if_all_tabs_closed(service, session):
session.tabs["tab1"].page.is_closed = MagicMock(return_value=True)
with pytest.raises(BrowserSessionError, match="all browser pages are closed"):
await service.screenshot(session.id)
assert session.id not in service._sessions