Files
NeoBot/plugins/web_parser/parsers/douyin.py
K2cr2O1 1420d0f0b2 feat(web_parser): 新增通用web链接解析插件框架
refactor: 重构B站、抖音、GitHub解析器为模块化结构

fix(executor): 增强docker容器错误处理和回调稳定性

style(templates): 优化帮助页面和代码执行结果的样式

perf(web_parser): 添加API缓存和消息去重机制

docs: 更新插件元信息和注释

chore: 移除旧的独立解析器插件文件
2026-01-22 01:58:13 +08:00

262 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import re
import json
import aiohttp
from typing import Optional, Dict, Any, List
from core.utils.logger import logger
from models import MessageEvent, MessageSegment
from ..base import BaseParser
from ..utils import extract_original_text
from cachetools import TTLCache
class DouyinParser(BaseParser):
"""
抖音视频解析器
"""
def __init__(self):
super().__init__()
self.name = "抖音解析器"
self.url_pattern = re.compile(r"https?://v\.douyin\.com/[a-zA-Z0-9_]+/?", re.IGNORECASE)
self.short_pattern = re.compile(r"(?:https?://)?v\.douyin\.com/[a-zA-Z0-9_]+/?", re.IGNORECASE)
self.nickname = "抖音视频解析"
# 消息去重缓存
self.processed_messages: TTLCache[int, bool] = TTLCache(maxsize=100, ttl=10)
async def parse(self, url: str) -> Optional[Dict[str, Any]]:
"""
解析抖音视频信息
Args:
url (str): 抖音视频URL
Returns:
Optional[Dict[str, Any]]: 视频信息字典如果失败则返回None
"""
try:
# 使用第三方API解析抖音视频
api_url = f"http://api.xhus.cn/api/douyin?url={url}"
session = self.get_session()
async with session.get(api_url, headers=self.HEADERS, timeout=10) as response:
if response.status != 200:
logger.error(f"[{self.name}] API请求失败状态码: {response.status}")
return None
response_data = await response.json()
if not isinstance(response_data, dict):
logger.error(f"[{self.name}] API返回格式错误: {response_data}")
return None
if response_data.get("code") != 200:
logger.error(f"[{self.name}] API返回错误: {response_data}")
return None
data = response_data.get("data", {})
if not data:
logger.error(f"[{self.name}] API返回数据为空")
return None
# 转换API响应格式
return {
"type": "video" if not data.get("images") or not isinstance(data.get("images"), list) else "image",
"video_url": data.get("url", ""),
"video_url_HQ": data.get("url", ""),
"nickname": data.get("author", "未知作者"),
"desc": data.get("title", "无描述"),
"aweme_id": data.get("uid", ""),
"like": data.get("like", 0),
"cover": data.get("cover", ""),
"time": data.get("time", 0),
"author_avatar": data.get("avatar", ""),
"music": data.get("music", {}),
}
except (aiohttp.ClientError, KeyError, AttributeError, json.JSONDecodeError) as e:
logger.error(f"[{self.name}] 解析抖音视频信息失败: {e}")
logger.debug(f"失败的URL: {url}")
except Exception as e:
logger.error(f"[{self.name}] 解析抖音视频时发生未知错误: {e}")
logger.debug(f"失败的URL: {url}")
return None
async def get_real_url(self, short_url: str) -> Optional[str]:
"""
获取抖音短链接的真实URL
Args:
short_url (str): 抖音短链接
Returns:
Optional[str]: 真实URL如果失败则返回None
"""
try:
# 首先尝试获取重定向后的URL
async with aiohttp.ClientSession() as session:
# 添加更多头部信息模拟移动端访问
mobile_headers = self.HEADERS.copy()
mobile_headers.update({
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Cache-Control': 'max-age=0',
# 模拟移动设备的额外头部
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1',
'X-Requested-With': 'XMLHttpRequest',
'Referer': 'https://www.douyin.com/'
})
async with session.get(short_url, headers=mobile_headers, allow_redirects=True, timeout=10) as response:
redirected_url = str(response.url)
# 检查重定向后的URL是否包含视频ID
if 'video/' in redirected_url or '/note/' in redirected_url:
logger.info(f"[{self.name}] 重定向后的视频URL: {redirected_url}")
return redirected_url
elif 'share_item' in redirected_url:
logger.info(f"[{self.name}] 重定向后的分享URL: {redirected_url}")
return redirected_url
else:
logger.warning(f"[{self.name}] 重定向到了非预期页面: {redirected_url}")
return redirected_url
except Exception as e:
logger.error(f"[{self.name}] 获取真实URL失败: {e}")
return None
async def format_response(self, event: MessageEvent, data: Dict[str, Any]) -> List[Any]:
"""
格式化抖音视频响应消息
Args:
event (MessageEvent): 消息事件对象
data (Dict[str, Any]): 视频信息
Returns:
List[Any]: 消息段列表
"""
# 构建回复消息,包含原分享中的文本内容(如果有)
original_text = extract_original_text(event.message, self.url_pattern)
# 构建回复消息
text_parts = ["抖音视频解析"]
text_parts.append("--------------------")
if original_text:
text_parts.append(f" 分享内容: {original_text}")
text_parts.append("--------------------")
text_parts.append(f" 作者: {data['nickname']}")
text_parts.append(f" 抖音号: {data['aweme_id']}")
text_parts.append(f" 标题: {data['desc']}")
text_parts.append(f" 点赞: {self.format_count(data['like'])}")
text_parts.append(f" 类型: {data['type']}")
# 如果是音乐,添加音乐信息
if data.get('music'):
music_info = data['music']
text_parts.append("--------------------")
text_parts.append(" 背景音乐:")
text_parts.append(f" 标题: {music_info.get('title', '')}")
text_parts.append(f" 作者: {music_info.get('author', '')}")
text_parts.append("--------------------")
text_message = "\n".join(text_parts)
# 准备转发消息节点
nodes = []
# 添加文本信息节点
text_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=self.nickname,
message=text_message
)
nodes.append(text_node)
# 添加封面图片节点(如果有)
if data.get('cover'):
try:
cover_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=self.nickname,
message=[
MessageSegment.text("抖音视频封面:\n"),
MessageSegment.image(data['cover'])
]
)
nodes.append(cover_node)
except Exception as e:
logger.warning(f"[{self.name}] 无法添加封面图片: {e}")
# 添加作者头像节点(如果有)
if data.get('author_avatar'):
try:
avatar_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=self.nickname,
message=[
MessageSegment.text("作者头像:\n"),
MessageSegment.image(data['author_avatar'])
]
)
nodes.append(avatar_node)
except Exception as e:
logger.warning(f"[{self.name}] 无法添加作者头像: {e}")
# 尝试添加视频直链(单独节点)
video_success = False
try:
if data.get('video_url'):
video_url = data.get('video_url', '')
# 检查视频类型
if data.get('type') == 'video':
video_message = MessageSegment.video(video_url)
video_type_text = "视频直链:"
else: # image类型
video_message = MessageSegment.image(video_url) # 单个图片
video_type_text = "图集首图:"
# 构建视频/图片节点
video_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=self.nickname,
message=[
MessageSegment.text(video_type_text + "\n"),
video_message
]
)
nodes.append(video_node)
video_success = True
except Exception as e:
logger.error(f"[{self.name}] 无法添加视频/图片: {e}")
# 如果无法添加视频,添加提示信息
if not video_success:
no_video_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=self.nickname,
message="视频解析成功,但无法获取直链或播放视频。"
)
nodes.append(no_video_node)
return nodes
def should_handle_url(self, url: str) -> bool:
"""
判断是否应该处理该URL
Args:
url (str): URL
Returns:
bool: 是否应该处理
"""
# 检查是否是抖音相关域名
return ('douyin.com' in url or bool(self.url_pattern.search(url)) or bool(self.short_pattern.search(url)))