feat: 添加性能优化和架构文档,更新依赖和核心模块
refactor(browser_manager): 实现页面池机制以提升性能 refactor(image_manager): 添加模板缓存并集成页面池 refactor(bili_parser): 迁移到异步HTTP请求并实现会话复用 docs: 新增性能优化、架构设计和最佳实践文档 chore: 更新requirements.txt添加新依赖
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import json
|
||||
import requests
|
||||
import aiohttp
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Optional, Dict, Any, Union
|
||||
from cachetools import TTLCache
|
||||
@@ -23,6 +23,15 @@ HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
|
||||
# 全局共享的 ClientSession
|
||||
_session: Optional[aiohttp.ClientSession] = None
|
||||
|
||||
async def get_session() -> aiohttp.ClientSession:
|
||||
global _session
|
||||
if _session is None or _session.closed:
|
||||
_session = aiohttp.ClientSession()
|
||||
return _session
|
||||
|
||||
|
||||
def format_count(num: int) -> str:
|
||||
if not isinstance(num, int):
|
||||
@@ -40,20 +49,23 @@ def format_duration(seconds: int) -> str:
|
||||
return f"{minutes:02d}:{seconds:02d}"
|
||||
|
||||
|
||||
def get_real_url(short_url: str) -> Optional[str]:
|
||||
async def get_real_url(short_url: str) -> Optional[str]:
|
||||
try:
|
||||
response = requests.head(short_url, headers=HEADERS, allow_redirects=False, timeout=5)
|
||||
if response.status_code == 302:
|
||||
return response.headers.get('Location')
|
||||
except requests.RequestException as e:
|
||||
print(f"获取真实URL失败: {e}")
|
||||
session = await get_session()
|
||||
async with session.head(short_url, headers=HEADERS, allow_redirects=False, timeout=5) as response:
|
||||
if response.status == 302:
|
||||
return response.headers.get('Location')
|
||||
except Exception as e:
|
||||
logger.error(f"获取真实URL失败: {e}")
|
||||
return None
|
||||
|
||||
def parse_video_info(video_url: str) -> Optional[Dict[str, Any]]:
|
||||
async def parse_video_info(video_url: str) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
response = requests.get(video_url, headers=HEADERS, timeout=5)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
session = await get_session()
|
||||
async with session.get(video_url, headers=HEADERS, timeout=5) as response:
|
||||
response.raise_for_status()
|
||||
text = await response.text()
|
||||
soup = BeautifulSoup(text, 'html.parser')
|
||||
|
||||
script_tag = soup.find('script', text=re.compile('window.__INITIAL_STATE__'))
|
||||
if not script_tag or not script_tag.string:
|
||||
@@ -98,12 +110,12 @@ def parse_video_info(video_url: str) -> Optional[Dict[str, Any]]:
|
||||
"followers": up_data.get('fans', 0),
|
||||
}
|
||||
|
||||
except (requests.RequestException, KeyError, AttributeError, json.JSONDecodeError) as e:
|
||||
print(f"解析视频信息失败: {e}")
|
||||
except (aiohttp.ClientError, KeyError, AttributeError, json.JSONDecodeError) as e:
|
||||
logger.error(f"解析视频信息失败: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def get_direct_video_url(video_url: str) -> Optional[str]:
|
||||
async def get_direct_video_url(video_url: str) -> Optional[str]:
|
||||
"""
|
||||
调用第三方API解析B站视频直链
|
||||
:param video_url: B站视频的完整URL
|
||||
@@ -111,12 +123,13 @@ def get_direct_video_url(video_url: str) -> Optional[str]:
|
||||
"""
|
||||
api_url = f"https://api.mir6.com/api/bzjiexi?url={video_url}&type=json"
|
||||
try:
|
||||
response = requests.get(api_url, headers=HEADERS, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
if data.get("code") == 200 and data.get("data"):
|
||||
return data["data"][0].get("video_url")
|
||||
except (requests.RequestException, json.JSONDecodeError, KeyError, IndexError) as e:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(api_url, headers=HEADERS, timeout=10) as response:
|
||||
response.raise_for_status()
|
||||
data = await response.json()
|
||||
if data.get("code") == 200 and data.get("data"):
|
||||
return data["data"][0].get("video_url")
|
||||
except (aiohttp.ClientError, json.JSONDecodeError, KeyError, IndexError) as e:
|
||||
logger.error(f"[bili_parser] 调用第三方API解析视频失败: {e}")
|
||||
return None
|
||||
|
||||
@@ -178,7 +191,7 @@ async def process_bili_link(event: MessageEvent, url: str):
|
||||
:param url: 待处理的B站链接
|
||||
"""
|
||||
if "b23.tv" in url:
|
||||
real_url = get_real_url(url)
|
||||
real_url = await get_real_url(url)
|
||||
if not real_url:
|
||||
logger.error(f"[bili_parser] 无法从 {url} 获取真实URL。")
|
||||
await event.reply("无法解析B站短链接。")
|
||||
@@ -186,7 +199,7 @@ async def process_bili_link(event: MessageEvent, url: str):
|
||||
else:
|
||||
real_url = url.split('?')[0]
|
||||
|
||||
video_info = parse_video_info(real_url)
|
||||
video_info = await parse_video_info(real_url)
|
||||
if not video_info:
|
||||
logger.error(f"[bili_parser] 无法从 {real_url} 解析视频信息。")
|
||||
await event.reply("无法获取视频信息,可能是B站接口变动或视频不存在。")
|
||||
@@ -197,7 +210,7 @@ async def process_bili_link(event: MessageEvent, url: str):
|
||||
if video_info['duration'] > 300: # 5分钟 = 300秒
|
||||
video_message = "视频时长超过5分钟,不进行解析。"
|
||||
else:
|
||||
direct_url = get_direct_video_url(real_url)
|
||||
direct_url = await get_direct_video_url(real_url)
|
||||
if direct_url:
|
||||
video_message = MessageSegment.video(direct_url)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user