feat: 添加性能优化和架构文档,更新依赖和核心模块

refactor(browser_manager): 实现页面池机制以提升性能
refactor(image_manager): 添加模板缓存并集成页面池
refactor(bili_parser): 迁移到异步HTTP请求并实现会话复用
docs: 新增性能优化、架构设计和最佳实践文档
chore: 更新requirements.txt添加新依赖
This commit is contained in:
2026-01-13 03:56:31 +08:00
parent 5996f6eeaf
commit 24af862924
18 changed files with 589 additions and 489 deletions

View File

@@ -15,6 +15,8 @@ class BrowserManager:
_instance = None
_playwright: Optional[Playwright] = None
_browser: Optional[Browser] = None
_page_pool: Optional[asyncio.Queue] = None
_pool_size: int = 3
def __new__(cls):
if cls._instance is None:
@@ -36,6 +38,73 @@ class BrowserManager:
logger.exception(f"无头浏览器启动失败: {e}")
self._browser = None
async def init_pool(self, size: int = 3):
"""
初始化页面池
"""
if not self._browser:
await self.initialize()
if not self._browser:
logger.error("浏览器初始化失败,无法创建页面池")
return
self._pool_size = size
self._page_pool = asyncio.Queue(maxsize=size)
logger.info(f"正在初始化页面池 (大小: {size})...")
for i in range(size):
try:
page = await self._browser.new_page()
await self._page_pool.put(page)
except Exception as e:
logger.error(f"创建页面池页面 {i+1} 失败: {e}")
logger.success(f"页面池初始化完成,当前可用页面: {self._page_pool.qsize()}")
async def get_page(self) -> Optional[Page]:
"""
从池中获取一个页面。如果池未初始化或为空,则尝试创建一个新页面(不入池)。
"""
if self._page_pool and not self._page_pool.empty():
try:
page = self._page_pool.get_nowait()
# 简单的健康检查
if page.is_closed():
logger.warning("检测到池中页面已关闭,重新创建一个...")
if self._browser:
page = await self._browser.new_page()
else:
return None
return page
except asyncio.QueueEmpty:
pass
# 如果池空了或者没初始化,回退到临时创建
logger.debug("页面池为空或未初始化,创建临时页面")
return await self.get_new_page()
async def release_page(self, page: Page):
"""
归还页面到池中。如果池已满或未初始化,则关闭页面。
"""
if not page or page.is_closed():
return
if self._page_pool:
try:
# 重置页面状态 (例如清空内容),防止数据污染
# 注意: goto('about:blank') 比 close() 快得多
await page.goto("about:blank")
self._page_pool.put_nowait(page)
return
except asyncio.QueueFull:
pass
# 池满或未启用池,直接关闭
await page.close()
async def get_new_page(self) -> Optional[Page]:
"""
获取一个新的页面 (Page)
@@ -58,6 +127,16 @@ class BrowserManager:
"""
关闭浏览器和 Playwright
"""
# 清空页面池
if self._page_pool:
while not self._page_pool.empty():
try:
page = self._page_pool.get_nowait()
await page.close()
except Exception:
pass
self._page_pool = None
if self._browser:
await self._browser.close()
self._browser = None

View File

@@ -29,6 +29,8 @@ class ImageManager:
# core/managers/image_manager.py -> core/managers -> core -> core/data/temp
self.temp_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "temp")
os.makedirs(self.temp_dir, exist_ok=True)
# 模板缓存
self._template_cache: Dict[str, Template] = {}
async def render_template(self, template_name: str, data: Dict[str, Any], output_name: str = "output.png", quality: int = 80, image_type: str = "png") -> Optional[str]:
"""
@@ -50,15 +52,20 @@ class ImageManager:
return None
try:
# 1. 渲染 HTML
with open(template_path, "r", encoding="utf-8") as f:
template_str = f.read()
# 1. 渲染 HTML (使用缓存)
if template_name in self._template_cache:
template = self._template_cache[template_name]
else:
with open(template_path, "r", encoding="utf-8") as f:
template_str = f.read()
template = Template(template_str)
self._template_cache[template_name] = template
template = Template(template_str)
html_content = template.render(**data)
# 2. 使用浏览器截图
page = await browser_manager.get_new_page()
# 改为从池中获取页面
page = await browser_manager.get_page()
if not page:
logger.error("无法获取浏览器页面")
return None
@@ -76,10 +83,11 @@ class ImageManager:
if image_type == 'jpeg':
screenshot_args['quality'] = quality
screenshot_bytes = await page.screenshot(**screenshot_args)
screenshot_bytes = await page.screenshot(**screenshot_args) # type: ignore
finally:
await page.close()
# 归还页面到池中,而不是直接关闭
await browser_manager.release_page(page)
# 3. 保存文件
output_path = os.path.join(self.temp_dir, output_name)