feat: 添加抖音视频解析插件并优化代码结构

添加抖音视频解析插件,支持自动解析抖音分享链接并提取视频信息。优化现有代码结构,包括:
- 重构单例模式实现
- 移除未使用的导入和文件
- 修复性能测试脚本中的异步调用
- 优化消息事件模型中的权限常量定义
- 改进编译脚本的错误处理
- 增强B站解析插件的稳定性

同时清理了多个废弃脚本和临时文件,提升代码可维护性。
This commit is contained in:
2026-01-19 01:16:22 +08:00
parent 067d81a07c
commit 9f54a98c17
17 changed files with 680 additions and 519 deletions

View File

@@ -30,7 +30,7 @@ HEADERS = {
# 全局共享的 ClientSession
_session: Optional[aiohttp.ClientSession] = None
async def get_session() -> aiohttp.ClientSession:
def get_session() -> aiohttp.ClientSession:
global _session
if _session is None or _session.closed:
_session = aiohttp.ClientSession(headers=HEADERS)
@@ -55,7 +55,7 @@ def format_duration(seconds: int) -> str:
async def get_real_url(short_url: str) -> Optional[str]:
try:
session = await get_session()
session = get_session()
async with session.head(short_url, headers=HEADERS, allow_redirects=False, timeout=5) as response:
if response.status == 302:
return response.headers.get('Location')
@@ -65,22 +65,71 @@ async def get_real_url(short_url: str) -> Optional[str]:
async def parse_video_info(video_url: str) -> Optional[Dict[str, Any]]:
try:
session = await get_session()
async with session.get(video_url, headers=HEADERS, timeout=5) as response:
# 清理URL去掉不必要的查询参数只保留基本的视频URL
clean_url = video_url.split('?')[0]
if '#/' in clean_url:
clean_url = clean_url.split('#/')[0]
session = get_session()
async with session.get(clean_url, headers=HEADERS, timeout=5) as response:
response.raise_for_status()
text = await response.text()
soup = BeautifulSoup(text, 'html.parser')
# 尝试多种方式获取视频数据
# 方式1: 尝试获取 __INITIAL_STATE__
script_tag = soup.find('script', text=re.compile('window.__INITIAL_STATE__'))
if not script_tag or not script_tag.string:
# 方式2: 尝试获取 __PLAYINFO__
script_tag = soup.find('script', text=re.compile('window.__PLAYINFO__'))
if not script_tag or not script_tag.string:
# 方式3: 尝试获取页面标题和其他信息
title_tag = soup.find('title')
if title_tag:
title = title_tag.get_text().strip()
# 提取BV号
bv_match = re.search(r'(BV\w{10})', clean_url)
bvid = bv_match.group(1) if bv_match else '未知BV号'
return {
"title": title.replace('_哔哩哔哩_bilibili', '').strip(),
"bvid": bvid,
"duration": 0,
"cover_url": '',
"play": 0,
"like": 0,
"coin": 0,
"favorite": 0,
"share": 0,
"owner_name": '未知UP主',
"owner_avatar": '',
"followers": 0,
}
return None
match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{[^\}]*\});', script_tag.string)
# 原始解析逻辑
match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{[^}]*\});', script_tag.string)
if not match:
# 尝试另一种正则表达式
match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{.*?\});', script_tag.string, re.DOTALL)
if not match:
return None
json_str = match.group(1)
data = json.loads(json_str)
# 清理JSON字符串中的潜在问题字符
json_str = json_str.strip().rstrip(';')
try:
data = json.loads(json_str)
except json.JSONDecodeError:
# 如果直接解析失败尝试清理JSON字符串
# 移除可能的注释或无效字符
cleaned_json = re.sub(r',\s*[}]', '}', json_str) # 移除末尾多余的逗号
cleaned_json = re.sub(r'/\*.*?\*/', '', cleaned_json) # 移除注释
cleaned_json = re.sub(r'//.*', '', cleaned_json) # 移除行注释
data = json.loads(cleaned_json)
video_data = data.get('videoData', {})
up_data = data.get('upData', {})
@@ -116,6 +165,10 @@ async def parse_video_info(video_url: str) -> Optional[Dict[str, Any]]:
except (aiohttp.ClientError, KeyError, AttributeError, json.JSONDecodeError) as e:
logger.error(f"解析视频信息失败: {e}")
logger.debug(f"失败的URL: {video_url}")
except Exception as e:
logger.error(f"解析视频信息时发生未知错误: {e}")
logger.debug(f"失败的URL: {video_url}")
return None
@@ -212,24 +265,32 @@ async def process_bili_link(event: MessageEvent, url: str):
:param event: 消息事件对象
:param url: 待处理的B站链接
"""
if "b23.tv" in url:
real_url = await get_real_url(url)
if not real_url:
logger.error(f"[bili_parser] 无法从 {url} 获取真实URL。")
await event.reply("无法解析B站短链接")
return
else:
real_url = url.split('?')[0]
try:
if "b23.tv" in url:
real_url = await get_real_url(url)
if not real_url:
logger.error(f"[bili_parser] 无法从 {url} 获取真实URL")
await event.reply("无法解析B站短链接。")
return
else:
# 清理URL移除复杂查询参数只保留基本的视频URL
real_url = url.split('?')[0]
if '#/' in real_url:
real_url = real_url.split('#/')[0]
video_info = await parse_video_info(real_url)
if not video_info:
logger.error(f"[bili_parser] 无法从 {real_url} 解析视频信息。")
await event.reply("无法获取视频信息可能是B站接口变动或视频不存在。")
video_info = await parse_video_info(real_url)
if not video_info:
logger.error(f"[bili_parser] 无法从 {real_url} 解析视频信息。")
await event.reply("无法获取视频信息可能是B站接口变动或视频不存在。")
return
except Exception as e:
logger.error(f"[bili_parser] 处理B站链接时发生错误: {e}")
await event.reply("处理B站链接时发生错误请稍后再试。")
return
# 检查视频时长
video_message: Union[str, MessageSegment]
if video_info['duration'] > 300: # 5分钟 = 300秒
if video_info['duration'] > 1200: # 5分钟 = 300秒
video_message = "视频时长超过5分钟不进行解析。"
else:
direct_url = await get_direct_video_url(real_url)