Files
NeoBot/plugins/web_parser/parsers/bili.py
K2cr2O1 ff4a4d92a5 feat: 添加多线程架构支持并优化性能
实现线程管理器以支持高并发场景,添加GIL-free模式提升Python 3.14下的多线程性能
新增B站API集成和本地文件服务器功能,改进镜像插件支持GIF处理
更新文档说明多线程架构和GIL-free模式的使用方法
2026-03-01 16:01:51 +08:00

385 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import re
from typing import Optional, Dict, Any, List, Union
from urllib.parse import urlparse, parse_qs
from core.utils.logger import logger
from models import MessageEvent, MessageSegment
from ..base import BaseParser
from ..utils import format_duration
from bilibili_api import video, select_client, Credential
from bilibili_api.exceptions import ResponseCodeException
from core.config_loader import global_config
from core.services.local_file_server import download_to_local
# bilibili_api-python 可用性标志
BILI_API_AVAILABLE = True
# 显式指定使用 aiohttp避免与其他库冲突
try:
select_client("aiohttp")
except Exception as e:
logger.warning(f"设置 bilibili_api 客户端失败: {e}")
class BiliParser(BaseParser):
"""
B站视频解析器使用 bilibili-api-python 库)
"""
def __init__(self):
super().__init__()
self.name = "B站解析器"
self.url_pattern = re.compile(r"https?://(?:www\.)?(bilibili\.com/video/\w+|b23\.tv/[a-zA-Z0-9]+)")
self.nickname = "B站视频解析"
def _get_credential(self) -> Optional[Credential]:
"""获取 B 站登录凭证"""
try:
bili_config = global_config.bilibili
if bili_config.sessdata and bili_config.bili_jct and bili_config.buvid3:
return Credential(
sessdata=bili_config.sessdata,
bili_jct=bili_config.bili_jct,
buvid3=bili_config.buvid3,
dedeuserid=bili_config.dedeuserid
)
except Exception:
pass
return None
async def parse(self, url: str) -> Optional[Dict[str, Any]]:
"""
解析B站视频信息
Args:
url (str): B站视频URL
Returns:
Optional[Dict[str, Any]]: 视频信息字典如果失败则返回None
"""
# 提取 BV 号
bvid = self.extract_bvid(url)
if not bvid:
logger.error(f"[{self.name}] 无法从 URL 提取 BV 号: {url}")
return None
try:
if BILI_API_AVAILABLE:
# 使用 bilibili-api-python 库
credential = self._get_credential()
v = video.Video(bvid=bvid, credential=credential)
info = await v.get_info()
# 处理封面 URL
cover_url = info.get('pic', '')
if cover_url:
cover_url = cover_url.split('@')[0]
if cover_url.startswith('//'):
cover_url = 'https:' + cover_url
# 处理 UP 主头像
owner = info.get('owner', {})
owner_name = owner.get('name', '未知UP主')
owner_face = owner.get('face', '')
if owner_face:
if owner_face.startswith('//'):
owner_face = 'https:' + owner_face
owner_face = owner_face.split('@')[0]
# 处理统计信息
stat = info.get('stat', {})
return {
"title": info.get('title', '未知标题'),
"bvid": bvid,
"aid": info.get('aid', 0),
"duration": info.get('duration', 0),
"cover_url": cover_url,
"play": stat.get('view', 0),
"like": stat.get('like', 0),
"coin": stat.get('coin', 0),
"favorite": stat.get('favorite', 0),
"share": stat.get('share', 0),
"danmaku": stat.get('danmaku', 0),
"owner_name": owner_name,
"owner_avatar": owner_face,
"followers": info.get('owner', {}).get('fans', 0),
"description": info.get('desc', ''),
"pubdate": info.get('pubdate', 0),
}
else:
# 备用方案:直接解析页面
return await self._parse_fallback(url, bvid)
except ResponseCodeException as e:
logger.error(f"[{self.name}] API 返回错误: {e.code} - {e.msg}")
except Exception as e:
logger.error(f"[{self.name}] 解析视频信息失败: {e}")
if BILI_API_AVAILABLE:
logger.info(f"[{self.name}] 尝试备用解析方案")
return await self._parse_fallback(url, bvid)
return None
async def _parse_fallback(self, url: str, bvid: str) -> Optional[Dict[str, Any]]:
"""
备用解析方案(不使用 bilibili-api-python
Args:
url (str): B站视频URL
bvid (str): BV号
Returns:
Optional[Dict[str, Any]]: 视频信息字典
"""
try:
session = self.get_session()
clean_url = url.split('?')[0]
if '#/' in clean_url:
clean_url = clean_url.split('#/')[0]
async with session.get(clean_url, headers=self.HEADERS, timeout=5) as response:
response.raise_for_status()
text = await response.text()
# 提取标题
import re
title_match = re.search(r'<h1[^>]*>([^<]+)</h1>', text)
title = title_match.group(1).strip() if title_match else '未知标题'
# 提取播放量等信息
play_match = re.search(r'"view":(\d+)', text)
play = int(play_match.group(1)) if play_match else 0
like_match = re.search(r'"like":(\d+)', text)
like = int(like_match.group(1)) if like_match else 0
coin_match = re.search(r'"coin":(\d+)', text)
coin = int(coin_match.group(1)) if coin_match else 0
favorite_match = re.search(r'"favorite":(\d+)', text)
favorite = int(favorite_match.group(1)) if favorite_match else 0
share_match = re.search(r'"share":(\d+)', text)
share = int(share_match.group(1)) if share_match else 0
# 提取 UP 主信息
owner_match = re.search(r'"name":"([^"]+)"', text)
owner_name = owner_match.group(1) if owner_match else '未知UP主'
face_match = re.search(r'"face":"([^"]+)"', text)
owner_face = face_match.group(1) if face_match else ''
if owner_face:
if owner_face.startswith('//'):
owner_face = 'https:' + owner_face
owner_face = owner_face.split('@')[0]
return {
"title": title,
"bvid": bvid,
"aid": 0,
"duration": 0,
"cover_url": '',
"play": play,
"like": like,
"coin": coin,
"favorite": favorite,
"share": share,
"danmaku": 0,
"owner_name": owner_name,
"owner_avatar": owner_face,
"followers": 0,
"description": '',
"pubdate": 0,
}
except Exception as e:
logger.error(f"[{self.name}] 备用解析方案失败: {e}")
return None
def extract_bvid(self, url: str) -> Optional[str]:
"""
从 URL 中提取 BV 号
Args:
url (str): B站视频URL
Returns:
Optional[str]: BV号如果失败则返回None
"""
# 方式1: 直接从 URL 中提取
bvid_match = re.search(r'/video/(BV\w+)', url)
if bvid_match:
return bvid_match.group(1)
# 方式2: 从短链接跳转后提取
if 'b23.tv' in url:
try:
session = self.get_session()
# 简单处理,不实际跳转,直接尝试提取
bvid_match = re.search(r'BV\w{10}', url)
if bvid_match:
return bvid_match.group(0)
except Exception:
pass
return None
async def get_real_url(self, short_url: str) -> Optional[str]:
"""
获取B站短链接的真实URL
Args:
short_url (str): B站短链接
Returns:
Optional[str]: 真实URL如果失败则返回None
"""
try:
session = self.get_session()
async with session.head(short_url, headers=self.HEADERS, allow_redirects=False, timeout=5) as response:
if response.status == 302:
return response.headers.get('Location')
except Exception as e:
logger.error(f"[{self.name}] 获取真实URL失败: {e}")
return None
async def get_direct_video_url(self, video_url: str, bvid: str) -> Optional[str]:
"""
获取B站视频直链通过本地文件服务器下载
Args:
video_url (str): B站视频的完整URL
bvid (str): BV号
Returns:
Optional[str]: 本地视频 URL如果失败则返回None
"""
if not BILI_API_AVAILABLE:
return None
try:
credential = self._get_credential()
v = video.Video(bvid=bvid, credential=credential)
# 先获取视频信息以获取 cid
info = await v.get_info()
cid = info.get('cid', 0)
if not cid:
return None
# 获取下载链接数据
download_url_data = await v.get_download_url(cid=cid)
# 使用 VideoDownloadURLDataDetecter 解析数据
detecter = video.VideoDownloadURLDataDetecter(data=download_url_data)
streams = detecter.detect_best_streams()
if streams:
# 获取视频直链
video_direct_url = streams[0].url
logger.info(f"[{self.name}] 获取到视频直链,开始下载到本地...")
# 使用本地文件服务器下载
local_url = await download_to_local(video_direct_url, timeout=120)
if local_url:
logger.success(f"[{self.name}] 视频已下载到本地: {local_url}")
return local_url
else:
logger.error(f"[{self.name}] 下载到本地失败")
return None
except Exception as e:
logger.error(f"[{self.name}] 获取视频直链失败: {e}")
return None
async def format_response(self, event: MessageEvent, data: Dict[str, Any]) -> List[Any]:
"""
格式化B站视频响应消息
Args:
event (MessageEvent): 消息事件对象
data (Dict[str, Any]): 视频信息
Returns:
List[Any]: 消息段列表
"""
# 检查视频时长
video_message: Union[str, MessageSegment]
direct_url = None
if data['duration'] > 1200: # 20分钟 = 1200秒
video_message = "视频时长超过20分钟不进行解析。"
else:
# 构建完整的B站视频URL
video_url = f"https://www.bilibili.com/video/{data.get('bvid', '')}"
bvid = data.get('bvid', '')
direct_url = await self.get_direct_video_url(video_url, bvid)
if direct_url:
video_message = MessageSegment.video(direct_url)
else:
video_message = "视频解析失败,无法获取直链。"
text_message = (
f"BiliBili 视频解析\n"
f"--------------------\n"
f" UP主: {data['owner_name']}\n"
f" 粉丝: {self.format_count(data['followers'])}\n"
f"--------------------\n"
f" 标题: {data['title']}\n"
f" BV号: {data['bvid']}\n"
f" 时长: {format_duration(data['duration'])}\n"
f"--------------------\n"
f" 数据:\n"
f" 播放: {self.format_count(data['play'])}\n"
f" 点赞: {self.format_count(data['like'])}\n"
f" 投币: {self.format_count(data['coin'])}\n"
f" 收藏: {self.format_count(data['favorite'])}\n"
f" 转发: {self.format_count(data['share'])}\n"
f" 弹幕: {self.format_count(data.get('danmaku', 0))}\n"
)
image_message_segment = [
MessageSegment.text("B站封面"),
MessageSegment.image(data['cover_url'])
]
up_info_segment = [
MessageSegment.text("UP主头像"),
MessageSegment.image(data['owner_avatar'])
]
nodes = [
event.bot.build_forward_node(user_id=event.self_id, nickname=self.nickname, message=text_message),
event.bot.build_forward_node(user_id=event.self_id, nickname=self.nickname, message=image_message_segment),
event.bot.build_forward_node(user_id=event.self_id, nickname=self.nickname, message=up_info_segment),
event.bot.build_forward_node(user_id=event.self_id, nickname=self.nickname, message=video_message)
]
# 同时直接发送视频(如果获取到直链)
if direct_url:
try:
await event.reply(MessageSegment.video(direct_url))
except Exception as e:
logger.error(f"[{self.name}] 直接发送视频失败: {e}")
return nodes
def should_handle_url(self, url: str) -> bool:
"""
判断是否应该处理该URL
Args:
url (str): URL
Returns:
bool: 是否应该处理
"""
return bool(self.url_pattern.search(url))