feat: 添加抖音视频解析插件并优化代码结构
添加抖音视频解析插件,支持自动解析抖音分享链接并提取视频信息。优化现有代码结构,包括: - 重构单例模式实现 - 移除未使用的导入和文件 - 修复性能测试脚本中的异步调用 - 优化消息事件模型中的权限常量定义 - 改进编译脚本的错误处理 - 增强B站解析插件的稳定性 同时清理了多个废弃脚本和临时文件,提升代码可维护性。
This commit is contained in:
@@ -30,7 +30,7 @@ HEADERS = {
|
||||
# 全局共享的 ClientSession
|
||||
_session: Optional[aiohttp.ClientSession] = None
|
||||
|
||||
async def get_session() -> aiohttp.ClientSession:
|
||||
def get_session() -> aiohttp.ClientSession:
|
||||
global _session
|
||||
if _session is None or _session.closed:
|
||||
_session = aiohttp.ClientSession(headers=HEADERS)
|
||||
@@ -55,7 +55,7 @@ def format_duration(seconds: int) -> str:
|
||||
|
||||
async def get_real_url(short_url: str) -> Optional[str]:
|
||||
try:
|
||||
session = await get_session()
|
||||
session = get_session()
|
||||
async with session.head(short_url, headers=HEADERS, allow_redirects=False, timeout=5) as response:
|
||||
if response.status == 302:
|
||||
return response.headers.get('Location')
|
||||
@@ -65,22 +65,71 @@ async def get_real_url(short_url: str) -> Optional[str]:
|
||||
|
||||
async def parse_video_info(video_url: str) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
session = await get_session()
|
||||
async with session.get(video_url, headers=HEADERS, timeout=5) as response:
|
||||
# 清理URL,去掉不必要的查询参数,只保留基本的视频URL
|
||||
clean_url = video_url.split('?')[0]
|
||||
if '#/' in clean_url:
|
||||
clean_url = clean_url.split('#/')[0]
|
||||
|
||||
session = get_session()
|
||||
async with session.get(clean_url, headers=HEADERS, timeout=5) as response:
|
||||
response.raise_for_status()
|
||||
text = await response.text()
|
||||
soup = BeautifulSoup(text, 'html.parser')
|
||||
|
||||
# 尝试多种方式获取视频数据
|
||||
# 方式1: 尝试获取 __INITIAL_STATE__
|
||||
script_tag = soup.find('script', text=re.compile('window.__INITIAL_STATE__'))
|
||||
if not script_tag or not script_tag.string:
|
||||
# 方式2: 尝试获取 __PLAYINFO__
|
||||
script_tag = soup.find('script', text=re.compile('window.__PLAYINFO__'))
|
||||
|
||||
if not script_tag or not script_tag.string:
|
||||
# 方式3: 尝试获取页面标题和其他信息
|
||||
title_tag = soup.find('title')
|
||||
if title_tag:
|
||||
title = title_tag.get_text().strip()
|
||||
# 提取BV号
|
||||
bv_match = re.search(r'(BV\w{10})', clean_url)
|
||||
bvid = bv_match.group(1) if bv_match else '未知BV号'
|
||||
|
||||
return {
|
||||
"title": title.replace('_哔哩哔哩_bilibili', '').strip(),
|
||||
"bvid": bvid,
|
||||
"duration": 0,
|
||||
"cover_url": '',
|
||||
"play": 0,
|
||||
"like": 0,
|
||||
"coin": 0,
|
||||
"favorite": 0,
|
||||
"share": 0,
|
||||
"owner_name": '未知UP主',
|
||||
"owner_avatar": '',
|
||||
"followers": 0,
|
||||
}
|
||||
return None
|
||||
|
||||
match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{[^\}]*\});', script_tag.string)
|
||||
# 原始解析逻辑
|
||||
match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{[^}]*\});', script_tag.string)
|
||||
if not match:
|
||||
# 尝试另一种正则表达式
|
||||
match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{.*?\});', script_tag.string, re.DOTALL)
|
||||
|
||||
if not match:
|
||||
return None
|
||||
|
||||
json_str = match.group(1)
|
||||
data = json.loads(json_str)
|
||||
# 清理JSON字符串中的潜在问题字符
|
||||
json_str = json_str.strip().rstrip(';')
|
||||
|
||||
try:
|
||||
data = json.loads(json_str)
|
||||
except json.JSONDecodeError:
|
||||
# 如果直接解析失败,尝试清理JSON字符串
|
||||
# 移除可能的注释或无效字符
|
||||
cleaned_json = re.sub(r',\s*[}]', '}', json_str) # 移除末尾多余的逗号
|
||||
cleaned_json = re.sub(r'/\*.*?\*/', '', cleaned_json) # 移除注释
|
||||
cleaned_json = re.sub(r'//.*', '', cleaned_json) # 移除行注释
|
||||
data = json.loads(cleaned_json)
|
||||
|
||||
video_data = data.get('videoData', {})
|
||||
up_data = data.get('upData', {})
|
||||
@@ -116,6 +165,10 @@ async def parse_video_info(video_url: str) -> Optional[Dict[str, Any]]:
|
||||
|
||||
except (aiohttp.ClientError, KeyError, AttributeError, json.JSONDecodeError) as e:
|
||||
logger.error(f"解析视频信息失败: {e}")
|
||||
logger.debug(f"失败的URL: {video_url}")
|
||||
except Exception as e:
|
||||
logger.error(f"解析视频信息时发生未知错误: {e}")
|
||||
logger.debug(f"失败的URL: {video_url}")
|
||||
|
||||
return None
|
||||
|
||||
@@ -212,24 +265,32 @@ async def process_bili_link(event: MessageEvent, url: str):
|
||||
:param event: 消息事件对象
|
||||
:param url: 待处理的B站链接
|
||||
"""
|
||||
if "b23.tv" in url:
|
||||
real_url = await get_real_url(url)
|
||||
if not real_url:
|
||||
logger.error(f"[bili_parser] 无法从 {url} 获取真实URL。")
|
||||
await event.reply("无法解析B站短链接。")
|
||||
return
|
||||
else:
|
||||
real_url = url.split('?')[0]
|
||||
try:
|
||||
if "b23.tv" in url:
|
||||
real_url = await get_real_url(url)
|
||||
if not real_url:
|
||||
logger.error(f"[bili_parser] 无法从 {url} 获取真实URL。")
|
||||
await event.reply("无法解析B站短链接。")
|
||||
return
|
||||
else:
|
||||
# 清理URL,移除复杂查询参数,只保留基本的视频URL
|
||||
real_url = url.split('?')[0]
|
||||
if '#/' in real_url:
|
||||
real_url = real_url.split('#/')[0]
|
||||
|
||||
video_info = await parse_video_info(real_url)
|
||||
if not video_info:
|
||||
logger.error(f"[bili_parser] 无法从 {real_url} 解析视频信息。")
|
||||
await event.reply("无法获取视频信息,可能是B站接口变动或视频不存在。")
|
||||
video_info = await parse_video_info(real_url)
|
||||
if not video_info:
|
||||
logger.error(f"[bili_parser] 无法从 {real_url} 解析视频信息。")
|
||||
await event.reply("无法获取视频信息,可能是B站接口变动或视频不存在。")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.error(f"[bili_parser] 处理B站链接时发生错误: {e}")
|
||||
await event.reply("处理B站链接时发生错误,请稍后再试。")
|
||||
return
|
||||
|
||||
# 检查视频时长
|
||||
video_message: Union[str, MessageSegment]
|
||||
if video_info['duration'] > 300: # 5分钟 = 300秒
|
||||
if video_info['duration'] > 1200: # 5分钟 = 300秒
|
||||
video_message = "视频时长超过5分钟,不进行解析。"
|
||||
else:
|
||||
direct_url = await get_direct_video_url(real_url)
|
||||
|
||||
Reference in New Issue
Block a user