refactor(browser_manager): 实现页面池机制以提升性能 refactor(image_manager): 添加模板缓存并集成页面池 refactor(bili_parser): 迁移到异步HTTP请求并实现会话复用 docs: 新增性能优化、架构设计和最佳实践文档 chore: 更新requirements.txt添加新依赖
152 lines
5.1 KiB
Python
152 lines
5.1 KiB
Python
"""
|
||
浏览器管理器模块
|
||
|
||
负责管理全局唯一的 Playwright 浏览器实例,避免频繁启动/关闭浏览器的开销。
|
||
"""
|
||
import asyncio
|
||
from typing import Optional
|
||
from playwright.async_api import async_playwright, Browser, Playwright, Page
|
||
from ..utils.logger import logger
|
||
|
||
class BrowserManager:
|
||
"""
|
||
浏览器管理器(异步单例)
|
||
"""
|
||
_instance = None
|
||
_playwright: Optional[Playwright] = None
|
||
_browser: Optional[Browser] = None
|
||
_page_pool: Optional[asyncio.Queue] = None
|
||
_pool_size: int = 3
|
||
|
||
def __new__(cls):
|
||
if cls._instance is None:
|
||
cls._instance = super().__new__(cls)
|
||
return cls._instance
|
||
|
||
async def initialize(self):
|
||
"""
|
||
初始化 Playwright 和 Browser
|
||
"""
|
||
if self._browser is None:
|
||
try:
|
||
logger.info("正在启动无头浏览器...")
|
||
self._playwright = await async_playwright().start()
|
||
# 启动 Chromium,headless=True 表示无头模式
|
||
self._browser = await self._playwright.chromium.launch(headless=True)
|
||
logger.success("无头浏览器启动成功!")
|
||
except Exception as e:
|
||
logger.exception(f"无头浏览器启动失败: {e}")
|
||
self._browser = None
|
||
|
||
async def init_pool(self, size: int = 3):
|
||
"""
|
||
初始化页面池
|
||
"""
|
||
if not self._browser:
|
||
await self.initialize()
|
||
|
||
if not self._browser:
|
||
logger.error("浏览器初始化失败,无法创建页面池")
|
||
return
|
||
|
||
self._pool_size = size
|
||
self._page_pool = asyncio.Queue(maxsize=size)
|
||
|
||
logger.info(f"正在初始化页面池 (大小: {size})...")
|
||
for i in range(size):
|
||
try:
|
||
page = await self._browser.new_page()
|
||
await self._page_pool.put(page)
|
||
except Exception as e:
|
||
logger.error(f"创建页面池页面 {i+1} 失败: {e}")
|
||
|
||
logger.success(f"页面池初始化完成,当前可用页面: {self._page_pool.qsize()}")
|
||
|
||
async def get_page(self) -> Optional[Page]:
|
||
"""
|
||
从池中获取一个页面。如果池未初始化或为空,则尝试创建一个新页面(不入池)。
|
||
"""
|
||
if self._page_pool and not self._page_pool.empty():
|
||
try:
|
||
page = self._page_pool.get_nowait()
|
||
# 简单的健康检查
|
||
if page.is_closed():
|
||
logger.warning("检测到池中页面已关闭,重新创建一个...")
|
||
if self._browser:
|
||
page = await self._browser.new_page()
|
||
else:
|
||
return None
|
||
return page
|
||
except asyncio.QueueEmpty:
|
||
pass
|
||
|
||
# 如果池空了或者没初始化,回退到临时创建
|
||
logger.debug("页面池为空或未初始化,创建临时页面")
|
||
return await self.get_new_page()
|
||
|
||
async def release_page(self, page: Page):
|
||
"""
|
||
归还页面到池中。如果池已满或未初始化,则关闭页面。
|
||
"""
|
||
if not page or page.is_closed():
|
||
return
|
||
|
||
if self._page_pool:
|
||
try:
|
||
# 重置页面状态 (例如清空内容),防止数据污染
|
||
# 注意: goto('about:blank') 比 close() 快得多
|
||
await page.goto("about:blank")
|
||
|
||
self._page_pool.put_nowait(page)
|
||
return
|
||
except asyncio.QueueFull:
|
||
pass
|
||
|
||
# 池满或未启用池,直接关闭
|
||
await page.close()
|
||
|
||
async def get_new_page(self) -> Optional[Page]:
|
||
"""
|
||
获取一个新的页面 (Page)
|
||
|
||
使用完毕后,调用者应该负责关闭该页面 (await page.close())
|
||
"""
|
||
if self._browser is None:
|
||
logger.warning("浏览器尚未初始化,尝试重新初始化...")
|
||
await self.initialize()
|
||
|
||
if self._browser:
|
||
try:
|
||
return await self._browser.new_page()
|
||
except Exception as e:
|
||
logger.error(f"创建新页面失败: {e}")
|
||
return None
|
||
return None
|
||
|
||
async def shutdown(self):
|
||
"""
|
||
关闭浏览器和 Playwright
|
||
"""
|
||
# 清空页面池
|
||
if self._page_pool:
|
||
while not self._page_pool.empty():
|
||
try:
|
||
page = self._page_pool.get_nowait()
|
||
await page.close()
|
||
except Exception:
|
||
pass
|
||
self._page_pool = None
|
||
|
||
if self._browser:
|
||
await self._browser.close()
|
||
self._browser = None
|
||
logger.info("浏览器已关闭")
|
||
|
||
if self._playwright:
|
||
await self._playwright.stop()
|
||
self._playwright = None
|
||
logger.info("Playwright 已停止")
|
||
|
||
# 全局浏览器管理器实例
|
||
browser_manager = BrowserManager()
|