feat: 添加抖音视频解析插件并优化代码结构

添加抖音视频解析插件，支持自动解析抖音分享链接并提取视频信息。优化现有代码结构，包括： - 重构单例模式实现 - 移除未使用的导入和文件 - 修复性能测试脚本中的异步调用 - 优化消息事件模型中的权限常量定义 - 改进编译脚本的错误处理 - 增强B站解析插件的稳定性同时清理了多个废弃脚本和临时文件，提升代码可维护性。
2026-01-19 01:16:22 +08:00
parent 067d81a07c
commit 9f54a98c17
17 changed files with 680 additions and 519 deletions
--- a/plugins/bili_parser.py
+++ b/plugins/bili_parser.py
@@ -30,7 +30,7 @@ HEADERS = {
 # 全局共享的 ClientSession
 _session: Optional[aiohttp.ClientSession] = None

-async def get_session() -> aiohttp.ClientSession:
+def get_session() -> aiohttp.ClientSession:
    global _session
    if _session is None or _session.closed:
        _session = aiohttp.ClientSession(headers=HEADERS)
@@ -55,7 +55,7 @@ def format_duration(seconds: int) -> str:

 async def get_real_url(short_url: str) -> Optional[str]:
    try:
-        session = await get_session()
+        session = get_session()
        async with session.head(short_url, headers=HEADERS, allow_redirects=False, timeout=5) as response:
            if response.status == 302:
                return response.headers.get('Location')
@@ -65,22 +65,71 @@ async def get_real_url(short_url: str) -> Optional[str]:

 async def parse_video_info(video_url: str) -> Optional[Dict[str, Any]]:
    try:
-        session = await get_session()
-        async with session.get(video_url, headers=HEADERS, timeout=5) as response:
+        # 清理URL，去掉不必要的查询参数，只保留基本的视频URL
+        clean_url = video_url.split('?')[0]
+        if '#/' in clean_url:
+            clean_url = clean_url.split('#/')[0]
+        
+        session = get_session()
+        async with session.get(clean_url, headers=HEADERS, timeout=5) as response:
            response.raise_for_status()
            text = await response.text()
            soup = BeautifulSoup(text, 'html.parser')

+        # 尝试多种方式获取视频数据
+        # 方式1: 尝试获取 __INITIAL_STATE__
        script_tag = soup.find('script', text=re.compile('window.__INITIAL_STATE__'))
        if not script_tag or not script_tag.string:
+            # 方式2: 尝试获取 __PLAYINFO__
+            script_tag = soup.find('script', text=re.compile('window.__PLAYINFO__'))
+        
+        if not script_tag or not script_tag.string:
+            # 方式3: 尝试获取页面标题和其他信息
+            title_tag = soup.find('title')
+            if title_tag:
+                title = title_tag.get_text().strip()
+                # 提取BV号
+                bv_match = re.search(r'(BV\w{10})', clean_url)
+                bvid = bv_match.group(1) if bv_match else '未知BV号'
+                
+                return {
+                    "title": title.replace('_哔哩哔哩_bilibili', '').strip(),
+                    "bvid": bvid,
+                    "duration": 0,
+                    "cover_url": '',
+                    "play": 0,
+                    "like": 0,
+                    "coin": 0,
+                    "favorite": 0,
+                    "share": 0,
+                    "owner_name": '未知UP主',
+                    "owner_avatar": '',
+                    "followers": 0,
+                }
            return None
            
-        match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{[^\}]*\});', script_tag.string)
+        # 原始解析逻辑
+        match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{[^}]*\});', script_tag.string)
+        if not match:
+            # 尝试另一种正则表达式
+            match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{.*?\});', script_tag.string, re.DOTALL)
+        
        if not match:
            return None
            
        json_str = match.group(1)
-        data = json.loads(json_str)
+        # 清理JSON字符串中的潜在问题字符
+        json_str = json_str.strip().rstrip(';')
+        
+        try:
+            data = json.loads(json_str)
+        except json.JSONDecodeError:
+            # 如果直接解析失败，尝试清理JSON字符串
+            # 移除可能的注释或无效字符
+            cleaned_json = re.sub(r',\s*[}]', '}', json_str)  # 移除末尾多余的逗号
+            cleaned_json = re.sub(r'/\*.*?\*/', '', cleaned_json)  # 移除注释
+            cleaned_json = re.sub(r'//.*', '', cleaned_json)  # 移除行注释
+            data = json.loads(cleaned_json)
        
        video_data = data.get('videoData', {})
        up_data = data.get('upData', {})
@@ -116,6 +165,10 @@ async def parse_video_info(video_url: str) -> Optional[Dict[str, Any]]:

    except (aiohttp.ClientError, KeyError, AttributeError, json.JSONDecodeError) as e:
        logger.error(f"解析视频信息失败: {e}")
+        logger.debug(f"失败的URL: {video_url}")
+    except Exception as e:
+        logger.error(f"解析视频信息时发生未知错误: {e}")
+        logger.debug(f"失败的URL: {video_url}")
    
    return None

@@ -212,24 +265,32 @@ async def process_bili_link(event: MessageEvent, url: str):
    :param event: 消息事件对象
    :param url: 待处理的B站链接
    """
-    if "b23.tv" in url:
-        real_url = await get_real_url(url)
-        if not real_url:
-            logger.error(f"[bili_parser] 无法从 {url} 获取真实URL。")
-            await event.reply("无法解析B站短链接。")
-            return
-    else:
-        real_url = url.split('?')[0]
+    try:
+        if "b23.tv" in url:
+            real_url = await get_real_url(url)
+            if not real_url:
+                logger.error(f"[bili_parser] 无法从 {url} 获取真实URL。")
+                await event.reply("无法解析B站短链接。")
+                return
+        else:
+            # 清理URL，移除复杂查询参数，只保留基本的视频URL
+            real_url = url.split('?')[0]
+            if '#/' in real_url:
+                real_url = real_url.split('#/')[0]

-    video_info = await parse_video_info(real_url)
-    if not video_info:
-        logger.error(f"[bili_parser] 无法从 {real_url} 解析视频信息。")
-        await event.reply("无法获取视频信息，可能是B站接口变动或视频不存在。")
+        video_info = await parse_video_info(real_url)
+        if not video_info:
+            logger.error(f"[bili_parser] 无法从 {real_url} 解析视频信息。")
+            await event.reply("无法获取视频信息，可能是B站接口变动或视频不存在。")
+            return
+    except Exception as e:
+        logger.error(f"[bili_parser] 处理B站链接时发生错误: {e}")
+        await event.reply("处理B站链接时发生错误，请稍后再试。")
        return

    # 检查视频时长
    video_message: Union[str, MessageSegment]
-    if video_info['duration'] > 300:  # 5分钟 = 300秒
+    if video_info['duration'] > 1200:  # 5分钟 = 300秒
        video_message = "视频时长超过5分钟，不进行解析。"
    else:
        direct_url = await get_direct_video_url(real_url)