Files
NeoBot/plugins/douyin_parser.py

391 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import re
import json
import aiohttp
from typing import Optional, Dict, Any, Union
from cachetools import TTLCache
from core.utils.logger import logger
from core.managers.command_manager import matcher
from models import MessageEvent, MessageSegment
# 创建一个TTL缓存最大容量100缓存时间10秒
processed_messages: TTLCache[int, bool] = TTLCache(maxsize=100, ttl=10)
# 插件元数据
__plugin_meta__ = {
"name": "douyin_parser",
"description": "自动解析抖音分享链接,提取视频信息和直链。",
"usage": "(自动触发)当检测到抖音分享链接时,自动发送视频信息。",
}
# 常量定义
DOUYIN_NICKNAME = "抖音视频解析"
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding': 'gzip, deflate, br', # 重新启用br编码支持
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
# 全局共享的 ClientSession
_session: Optional[aiohttp.ClientSession] = None
async def get_session() -> aiohttp.ClientSession:
global _session
if _session is None or _session.closed:
_session = aiohttp.ClientSession(headers=HEADERS)
return _session
def format_count(num: Union[int, str]) -> str:
try:
n = int(num)
if n < 10000:
return str(n)
return f"{n / 10000:.1f}"
except (ValueError, TypeError):
return str(num)
DOUYIN_URL_PATTERN = re.compile(r"https?://v\.douyin\.com/[a-zA-Z0-9_]+/?", re.IGNORECASE) # 包含下划线
DOUYIN_SHORT_PATTERN = re.compile(r"(?:https?://)?v\.douyin\.com/[a-zA-Z0-9_]+/?", re.IGNORECASE) # 包含下划线
def extract_url_from_json_segments(segments):
"""
从消息的JSON段中提取抖音链接
:param segments: 消息段列表
:return: 提取到的URL或None
"""
for segment in segments:
if segment.type == "json":
logger.info(f"[douyin_parser] 检测到JSON CQ码: {segment.data}")
try:
json_data = json.loads(segment.data.get("data", "{}"))
# 检查是否是抖音分享卡片
meta = json_data.get("meta", {})
if "detail_1" in meta:
detail = meta["detail_1"]
if "qqdocurl" in detail:
url = detail["qqdocurl"]
if "douyin.com" in url or "iesdouyin.com" in url:
logger.success(f"[douyin_parser] 成功从JSON卡片中提取到抖音链接: {url}")
return url
except (json.JSONDecodeError, KeyError) as e:
logger.error(f"[douyin_parser] 解析JSON失败: {e}")
continue
return None
def extract_url_from_text_segments(segments):
"""
从消息的文本段中提取抖音链接
:param segments: 消息段列表
:return: 提取到的URL或None
"""
for segment in segments:
if segment.type == "text":
text_content = segment.data.get("text", "")
# 查找抖音链接
match = DOUYIN_URL_PATTERN.search(text_content)
if match:
extracted_url = match.group(0)
logger.success(f"[douyin_parser] 成功从文本中提取到抖音链接: {extracted_url}")
return extracted_url
# 也检查是否有v.douyin.com格式的链接
short_match = DOUYIN_SHORT_PATTERN.search(text_content)
if short_match:
extracted_url = short_match.group(0)
logger.success(f"[douyin_parser] 成功从文本中提取到抖音短链接: {extracted_url}")
return extracted_url
return None
@matcher.on_message()
async def handle_douyin_share(event: MessageEvent):
"""
处理消息检测抖音分享链接JSON卡片或文本链接并进行解析。
:param event: 消息事件对象
"""
# 消息去重
if event.message_id in processed_messages:
return
processed_messages[event.message_id] = True
# 忽略机器人自己发送的消息,防止无限循环
if event.user_id == event.self_id:
return
# 1. 优先解析JSON卡片中的链接
url_to_process = extract_url_from_json_segments(event.message)
# 2. 如果未在JSON卡片中找到链接则在文本消息中查找
if not url_to_process:
url_to_process = extract_url_from_text_segments(event.message)
# 3. 如果找到了抖音链接,则进行处理
if url_to_process:
await process_douyin_link(event, url_to_process)
async def get_real_url(short_url: str) -> Optional[str]:
"""
获取抖音短链接的真实URL
:param short_url: 抖音短链接
:return: 真实URL或None
"""
try:
# 首先尝试获取重定向后的URL
async with aiohttp.ClientSession() as session:
# 添加更多头部信息模拟移动端访问
mobile_headers = HEADERS.copy() # 使用更新后的完整请求头
mobile_headers.update({
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Cache-Control': 'max-age=0',
# 模拟移动设备的额外头部
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1',
'X-Requested-With': 'XMLHttpRequest',
'Referer': 'https://www.douyin.com/'
})
async with session.get(short_url, headers=mobile_headers, allow_redirects=True, timeout=10) as response:
redirected_url = str(response.url)
# 检查重定向后的URL是否包含视频ID
# 抖音视频页通常包含 aweme_id 或 sec_uid 参数
if 'video/' in redirected_url or '/note/' in redirected_url:
logger.info(f"[douyin_parser] 重定向后的视频URL: {redirected_url}")
return redirected_url
elif 'share_item' in redirected_url:
# 如果URL中有share_item参数尝试从中提取视频信息
logger.info(f"[douyin_parser] 重定向后的分享URL: {redirected_url}")
return redirected_url
else:
# 如果重定向到了主页或其他非视频页面,尝试从响应中提取信息
logger.warning(f"[douyin_parser] 重定向到了非预期页面: {redirected_url}")
return redirected_url
except Exception as e:
logger.error(f"[douyin_parser] 获取真实URL失败: {e}")
return None
async def parse_douyin_video(video_url: str) -> Optional[Dict[str, Any]]:
"""
解析抖音视频信息
:param video_url: 抖音视频链接
:return: 视频信息字典或None
"""
try:
# 使用新的第三方API解析抖音视频
api_url = f"http://api.xhus.cn/api/douyin?url={video_url}"
session = await get_session()
async with session.get(api_url, headers=HEADERS, timeout=10) as response:
if response.status != 200:
logger.error(f"[douyin_parser] API请求失败状态码: {response.status}")
return None
response_data = await response.json()
if not isinstance(response_data, dict):
logger.error(f"[douyin_parser] API返回格式错误: {response_data}")
return None
if response_data.get("code") != 200:
logger.error(f"[douyin_parser] API返回错误: {response_data}")
return None
data = response_data.get("data", {})
if not data:
logger.error("[douyin_parser] API返回数据为空")
return None
# 新API的响应格式转换
return {
"type": "video" if not data.get("images") or not isinstance(data.get("images"), list) else "image",
"video_url": data.get("url", ""), # 核心字段:视频播放地址
"video_url_HQ": data.get("url", ""), # 新API没有HQ字段使用同一个地址
"nickname": data.get("author", "未知作者"),
"desc": data.get("title", "无描述"),
"aweme_id": data.get("uid", ""),
"like": data.get("like", 0),
"cover": data.get("cover", ""),
"time": data.get("time", 0),
"author_avatar": data.get("avatar", ""),
"music": data.get("music", {}),
}
except (aiohttp.ClientError, KeyError, AttributeError, json.JSONDecodeError) as e:
logger.error(f"[douyin_parser] 解析抖音视频信息失败: {e}")
logger.debug(f"失败的URL: {video_url}")
except Exception as e:
logger.error(f"[douyin_parser] 解析抖音视频时发生未知错误: {e}")
logger.debug(f"失败的URL: {video_url}")
return None
async def process_douyin_link(event: MessageEvent, url: str):
"""
处理抖音链接,获取信息并回复
:param event: 消息事件对象
:param url: 待处理的抖音链接
"""
try:
# 直接将原始链接传递给API不需要获取真实URL
video_info = await parse_douyin_video(url)
if not video_info:
logger.error(f"[douyin_parser] 无法从 {url} 解析视频信息。")
await event.reply("无法获取视频信息,可能是抖音接口变动或视频不存在。")
return
# 构建回复消息,包含原分享中的文本内容(如果有)
original_text = ""
for segment in event.message:
if segment.type == "text":
text_content = segment.data.get("text", "")
# 提取除了链接以外的文本内容
# 移除链接和复制提示
cleaned_text = re.sub(DOUYIN_URL_PATTERN, '', text_content)
cleaned_text = re.sub(DOUYIN_SHORT_PATTERN, '', cleaned_text)
cleaned_text = re.sub(r'复制此链接打开Dou音搜索直接观看视频', '', cleaned_text)
cleaned_text = cleaned_text.strip()
if cleaned_text:
original_text = cleaned_text
break
# 构建回复消息
text_parts = ["抖音视频解析"]
text_parts.append("--------------------")
if original_text:
text_parts.append(f" 分享内容: {original_text}")
text_parts.append("--------------------")
text_parts.append(f" 作者: {video_info['nickname']}")
text_parts.append(f" 抖音号: {video_info['aweme_id']}")
text_parts.append(f" 标题: {video_info['desc']}")
text_parts.append(f" 点赞: {format_count(video_info['like'])}")
text_parts.append(f" 类型: {video_info['type']}")
# 如果是音乐,添加音乐信息
if video_info.get('music'):
music_info = video_info['music']
text_parts.append("--------------------")
text_parts.append(" 背景音乐:")
text_parts.append(f" 标题: {music_info.get('title', '')}")
text_parts.append(f" 作者: {music_info.get('author', '')}")
text_parts.append("--------------------")
text_parts.append(f" 原始链接: {url}")
text_message = "\n".join(text_parts)
# 准备转发消息节点
nodes = []
# 添加文本信息节点
text_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=DOUYIN_NICKNAME,
message=text_message
)
nodes.append(text_node)
# 添加封面图片节点(如果有)
if video_info.get('cover'):
try:
cover_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=DOUYIN_NICKNAME,
message=[
MessageSegment.text("抖音视频封面:\n"),
MessageSegment.image(video_info['cover'])
]
)
nodes.append(cover_node)
except Exception as e:
logger.warning(f"[douyin_parser] 无法添加封面图片: {e}")
# 添加作者头像节点(如果有)
if video_info.get('author_avatar'):
try:
avatar_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=DOUYIN_NICKNAME,
message=[
MessageSegment.text("作者头像:\n"),
MessageSegment.image(video_info['author_avatar'])
]
)
nodes.append(avatar_node)
except Exception as e:
logger.warning(f"[douyin_parser] 无法添加作者头像: {e}")
# 尝试添加视频直链(单独节点)
video_success = False
try:
if video_info.get('video_url'):
video_url = video_info.get('video_url', '')
# 检查视频类型
if video_info.get('type') == 'video':
video_message = MessageSegment.video(video_url)
video_type_text = "视频直链:"
else: # image类型
video_message = MessageSegment.image(video_url) # 单个图片
video_type_text = "图集首图:"
# 构建视频/图片节点
video_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=DOUYIN_NICKNAME,
message=[
MessageSegment.text(video_type_text + "\n"),
video_message
]
)
nodes.append(video_node)
video_success = True
except Exception as e:
logger.error(f"[douyin_parser] 无法添加视频/图片: {e}")
# 如果无法添加视频,添加提示信息
if not video_success:
no_video_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=DOUYIN_NICKNAME,
message="视频解析成功,但无法获取直链或播放视频。"
)
nodes.append(no_video_node)
logger.success(f"[douyin_parser] 成功解析视频信息并准备以聊天记录形式回复: {video_info['desc'][:20]}...")
# 发送合并转发消息
try:
# 使用更通用的 send_forwarded_messages 方法,自动判断私聊或群聊
await event.bot.send_forwarded_messages(target=event, nodes=nodes)
except Exception as e:
# 如果发送合并转发失败,尝试单独发送文本信息
logger.error(f"[douyin_parser] 发送合并转发失败: {e}")
# 构建替代的简单文本回复,避免电脑端显示问题
simple_reply = f"抖音视频解析成功\n{text_message}\n\n如果无法查看视频内容,请复制原始链接到浏览器打开:{url}"
await event.reply(simple_reply)
# 如果有封面,尝试单独发送
if video_info.get('cover'):
try:
await event.reply(MessageSegment.image(video_info['cover']))
except Exception:
pass
except Exception as e:
logger.error(f"[douyin_parser] 处理抖音链接时发生错误: {e}")
await event.reply("处理抖音链接时发生错误,请稍后再试。")
return