391 lines
16 KiB
Python
391 lines
16 KiB
Python
# -*- coding: utf-8 -*-
|
||
import re
|
||
import json
|
||
import aiohttp
|
||
from typing import Optional, Dict, Any, Union
|
||
from cachetools import TTLCache
|
||
|
||
from core.utils.logger import logger
|
||
from core.managers.command_manager import matcher
|
||
from models import MessageEvent, MessageSegment
|
||
|
||
# 创建一个TTL缓存,最大容量100,缓存时间10秒
|
||
processed_messages: TTLCache[int, bool] = TTLCache(maxsize=100, ttl=10)
|
||
|
||
# 插件元数据
|
||
__plugin_meta__ = {
|
||
"name": "douyin_parser",
|
||
"description": "自动解析抖音分享链接,提取视频信息和直链。",
|
||
"usage": "(自动触发)当检测到抖音分享链接时,自动发送视频信息。",
|
||
}
|
||
|
||
# 常量定义
|
||
DOUYIN_NICKNAME = "抖音视频解析"
|
||
|
||
HEADERS = {
|
||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
|
||
'Accept-Encoding': 'gzip, deflate, br', # 重新启用br编码支持
|
||
'Connection': 'keep-alive',
|
||
'Upgrade-Insecure-Requests': '1'
|
||
}
|
||
|
||
# 全局共享的 ClientSession
|
||
_session: Optional[aiohttp.ClientSession] = None
|
||
|
||
async def get_session() -> aiohttp.ClientSession:
|
||
global _session
|
||
if _session is None or _session.closed:
|
||
_session = aiohttp.ClientSession(headers=HEADERS)
|
||
return _session
|
||
|
||
|
||
def format_count(num: Union[int, str]) -> str:
|
||
try:
|
||
n = int(num)
|
||
if n < 10000:
|
||
return str(n)
|
||
return f"{n / 10000:.1f}万"
|
||
except (ValueError, TypeError):
|
||
return str(num)
|
||
|
||
|
||
DOUYIN_URL_PATTERN = re.compile(r"https?://v\.douyin\.com/[a-zA-Z0-9_]+/?", re.IGNORECASE) # 包含下划线
|
||
DOUYIN_SHORT_PATTERN = re.compile(r"(?:https?://)?v\.douyin\.com/[a-zA-Z0-9_]+/?", re.IGNORECASE) # 包含下划线
|
||
|
||
|
||
def extract_url_from_json_segments(segments):
|
||
"""
|
||
从消息的JSON段中提取抖音链接
|
||
:param segments: 消息段列表
|
||
:return: 提取到的URL或None
|
||
"""
|
||
for segment in segments:
|
||
if segment.type == "json":
|
||
logger.info(f"[douyin_parser] 检测到JSON CQ码: {segment.data}")
|
||
try:
|
||
json_data = json.loads(segment.data.get("data", "{}"))
|
||
# 检查是否是抖音分享卡片
|
||
meta = json_data.get("meta", {})
|
||
if "detail_1" in meta:
|
||
detail = meta["detail_1"]
|
||
if "qqdocurl" in detail:
|
||
url = detail["qqdocurl"]
|
||
if "douyin.com" in url or "iesdouyin.com" in url:
|
||
logger.success(f"[douyin_parser] 成功从JSON卡片中提取到抖音链接: {url}")
|
||
return url
|
||
except (json.JSONDecodeError, KeyError) as e:
|
||
logger.error(f"[douyin_parser] 解析JSON失败: {e}")
|
||
continue
|
||
return None
|
||
|
||
|
||
def extract_url_from_text_segments(segments):
|
||
"""
|
||
从消息的文本段中提取抖音链接
|
||
:param segments: 消息段列表
|
||
:return: 提取到的URL或None
|
||
"""
|
||
for segment in segments:
|
||
if segment.type == "text":
|
||
text_content = segment.data.get("text", "")
|
||
# 查找抖音链接
|
||
match = DOUYIN_URL_PATTERN.search(text_content)
|
||
if match:
|
||
extracted_url = match.group(0)
|
||
logger.success(f"[douyin_parser] 成功从文本中提取到抖音链接: {extracted_url}")
|
||
return extracted_url
|
||
# 也检查是否有v.douyin.com格式的链接
|
||
short_match = DOUYIN_SHORT_PATTERN.search(text_content)
|
||
if short_match:
|
||
extracted_url = short_match.group(0)
|
||
logger.success(f"[douyin_parser] 成功从文本中提取到抖音短链接: {extracted_url}")
|
||
return extracted_url
|
||
return None
|
||
|
||
|
||
@matcher.on_message()
|
||
async def handle_douyin_share(event: MessageEvent):
|
||
"""
|
||
处理消息,检测抖音分享链接(JSON卡片或文本链接)并进行解析。
|
||
:param event: 消息事件对象
|
||
"""
|
||
# 消息去重
|
||
if event.message_id in processed_messages:
|
||
return
|
||
processed_messages[event.message_id] = True
|
||
|
||
# 忽略机器人自己发送的消息,防止无限循环
|
||
if event.user_id == event.self_id:
|
||
return
|
||
|
||
# 1. 优先解析JSON卡片中的链接
|
||
url_to_process = extract_url_from_json_segments(event.message)
|
||
|
||
# 2. 如果未在JSON卡片中找到链接,则在文本消息中查找
|
||
if not url_to_process:
|
||
url_to_process = extract_url_from_text_segments(event.message)
|
||
|
||
# 3. 如果找到了抖音链接,则进行处理
|
||
if url_to_process:
|
||
await process_douyin_link(event, url_to_process)
|
||
|
||
|
||
async def get_real_url(short_url: str) -> Optional[str]:
|
||
"""
|
||
获取抖音短链接的真实URL
|
||
:param short_url: 抖音短链接
|
||
:return: 真实URL或None
|
||
"""
|
||
try:
|
||
# 首先尝试获取重定向后的URL
|
||
async with aiohttp.ClientSession() as session:
|
||
# 添加更多头部信息模拟移动端访问
|
||
mobile_headers = HEADERS.copy() # 使用更新后的完整请求头
|
||
mobile_headers.update({
|
||
'Sec-Fetch-Dest': 'document',
|
||
'Sec-Fetch-Mode': 'navigate',
|
||
'Sec-Fetch-Site': 'none',
|
||
'Cache-Control': 'max-age=0',
|
||
# 模拟移动设备的额外头部
|
||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1',
|
||
'X-Requested-With': 'XMLHttpRequest',
|
||
'Referer': 'https://www.douyin.com/'
|
||
})
|
||
|
||
async with session.get(short_url, headers=mobile_headers, allow_redirects=True, timeout=10) as response:
|
||
redirected_url = str(response.url)
|
||
|
||
# 检查重定向后的URL是否包含视频ID
|
||
# 抖音视频页通常包含 aweme_id 或 sec_uid 参数
|
||
if 'video/' in redirected_url or '/note/' in redirected_url:
|
||
logger.info(f"[douyin_parser] 重定向后的视频URL: {redirected_url}")
|
||
return redirected_url
|
||
elif 'share_item' in redirected_url:
|
||
# 如果URL中有share_item参数,尝试从中提取视频信息
|
||
logger.info(f"[douyin_parser] 重定向后的分享URL: {redirected_url}")
|
||
return redirected_url
|
||
else:
|
||
# 如果重定向到了主页或其他非视频页面,尝试从响应中提取信息
|
||
logger.warning(f"[douyin_parser] 重定向到了非预期页面: {redirected_url}")
|
||
return redirected_url
|
||
|
||
except Exception as e:
|
||
logger.error(f"[douyin_parser] 获取真实URL失败: {e}")
|
||
return None
|
||
|
||
|
||
async def parse_douyin_video(video_url: str) -> Optional[Dict[str, Any]]:
|
||
"""
|
||
解析抖音视频信息
|
||
:param video_url: 抖音视频链接
|
||
:return: 视频信息字典或None
|
||
"""
|
||
try:
|
||
# 使用新的第三方API解析抖音视频
|
||
api_url = f"http://api.xhus.cn/api/douyin?url={video_url}"
|
||
|
||
session = await get_session()
|
||
async with session.get(api_url, headers=HEADERS, timeout=10) as response:
|
||
if response.status != 200:
|
||
logger.error(f"[douyin_parser] API请求失败,状态码: {response.status}")
|
||
return None
|
||
|
||
response_data = await response.json()
|
||
|
||
if not isinstance(response_data, dict):
|
||
logger.error(f"[douyin_parser] API返回格式错误: {response_data}")
|
||
return None
|
||
|
||
if response_data.get("code") != 200:
|
||
logger.error(f"[douyin_parser] API返回错误: {response_data}")
|
||
return None
|
||
|
||
data = response_data.get("data", {})
|
||
if not data:
|
||
logger.error("[douyin_parser] API返回数据为空")
|
||
return None
|
||
|
||
# 新API的响应格式转换
|
||
return {
|
||
"type": "video" if not data.get("images") or not isinstance(data.get("images"), list) else "image",
|
||
"video_url": data.get("url", ""), # 核心字段:视频播放地址
|
||
"video_url_HQ": data.get("url", ""), # 新API没有HQ字段,使用同一个地址
|
||
"nickname": data.get("author", "未知作者"),
|
||
"desc": data.get("title", "无描述"),
|
||
"aweme_id": data.get("uid", ""),
|
||
"like": data.get("like", 0),
|
||
"cover": data.get("cover", ""),
|
||
"time": data.get("time", 0),
|
||
"author_avatar": data.get("avatar", ""),
|
||
"music": data.get("music", {}),
|
||
}
|
||
except (aiohttp.ClientError, KeyError, AttributeError, json.JSONDecodeError) as e:
|
||
logger.error(f"[douyin_parser] 解析抖音视频信息失败: {e}")
|
||
logger.debug(f"失败的URL: {video_url}")
|
||
except Exception as e:
|
||
logger.error(f"[douyin_parser] 解析抖音视频时发生未知错误: {e}")
|
||
logger.debug(f"失败的URL: {video_url}")
|
||
|
||
return None
|
||
|
||
|
||
async def process_douyin_link(event: MessageEvent, url: str):
|
||
"""
|
||
处理抖音链接,获取信息并回复
|
||
:param event: 消息事件对象
|
||
:param url: 待处理的抖音链接
|
||
"""
|
||
try:
|
||
# 直接将原始链接传递给API,不需要获取真实URL
|
||
video_info = await parse_douyin_video(url)
|
||
if not video_info:
|
||
logger.error(f"[douyin_parser] 无法从 {url} 解析视频信息。")
|
||
await event.reply("无法获取视频信息,可能是抖音接口变动或视频不存在。")
|
||
return
|
||
|
||
# 构建回复消息,包含原分享中的文本内容(如果有)
|
||
original_text = ""
|
||
for segment in event.message:
|
||
if segment.type == "text":
|
||
text_content = segment.data.get("text", "")
|
||
# 提取除了链接以外的文本内容
|
||
# 移除链接和复制提示
|
||
cleaned_text = re.sub(DOUYIN_URL_PATTERN, '', text_content)
|
||
cleaned_text = re.sub(DOUYIN_SHORT_PATTERN, '', cleaned_text)
|
||
cleaned_text = re.sub(r'复制此链接,打开Dou音搜索,直接观看视频!', '', cleaned_text)
|
||
cleaned_text = cleaned_text.strip()
|
||
if cleaned_text:
|
||
original_text = cleaned_text
|
||
break
|
||
|
||
# 构建回复消息
|
||
text_parts = ["抖音视频解析"]
|
||
text_parts.append("--------------------")
|
||
|
||
if original_text:
|
||
text_parts.append(f" 分享内容: {original_text}")
|
||
text_parts.append("--------------------")
|
||
|
||
text_parts.append(f" 作者: {video_info['nickname']}")
|
||
text_parts.append(f" 抖音号: {video_info['aweme_id']}")
|
||
text_parts.append(f" 标题: {video_info['desc']}")
|
||
text_parts.append(f" 点赞: {format_count(video_info['like'])}")
|
||
text_parts.append(f" 类型: {video_info['type']}")
|
||
|
||
# 如果是音乐,添加音乐信息
|
||
if video_info.get('music'):
|
||
music_info = video_info['music']
|
||
text_parts.append("--------------------")
|
||
text_parts.append(" 背景音乐:")
|
||
text_parts.append(f" 标题: {music_info.get('title', '')}")
|
||
text_parts.append(f" 作者: {music_info.get('author', '')}")
|
||
|
||
text_parts.append("--------------------")
|
||
text_parts.append(f" 原始链接: {url}")
|
||
|
||
text_message = "\n".join(text_parts)
|
||
|
||
# 准备转发消息节点
|
||
nodes = []
|
||
|
||
# 添加文本信息节点
|
||
text_node = event.bot.build_forward_node(
|
||
user_id=event.self_id,
|
||
nickname=DOUYIN_NICKNAME,
|
||
message=text_message
|
||
)
|
||
nodes.append(text_node)
|
||
|
||
# 添加封面图片节点(如果有)
|
||
if video_info.get('cover'):
|
||
try:
|
||
cover_node = event.bot.build_forward_node(
|
||
user_id=event.self_id,
|
||
nickname=DOUYIN_NICKNAME,
|
||
message=[
|
||
MessageSegment.text("抖音视频封面:\n"),
|
||
MessageSegment.image(video_info['cover'])
|
||
]
|
||
)
|
||
nodes.append(cover_node)
|
||
except Exception as e:
|
||
logger.warning(f"[douyin_parser] 无法添加封面图片: {e}")
|
||
|
||
# 添加作者头像节点(如果有)
|
||
if video_info.get('author_avatar'):
|
||
try:
|
||
avatar_node = event.bot.build_forward_node(
|
||
user_id=event.self_id,
|
||
nickname=DOUYIN_NICKNAME,
|
||
message=[
|
||
MessageSegment.text("作者头像:\n"),
|
||
MessageSegment.image(video_info['author_avatar'])
|
||
]
|
||
)
|
||
nodes.append(avatar_node)
|
||
except Exception as e:
|
||
logger.warning(f"[douyin_parser] 无法添加作者头像: {e}")
|
||
|
||
# 尝试添加视频直链(单独节点)
|
||
video_success = False
|
||
try:
|
||
if video_info.get('video_url'):
|
||
video_url = video_info.get('video_url', '')
|
||
# 检查视频类型
|
||
if video_info.get('type') == 'video':
|
||
video_message = MessageSegment.video(video_url)
|
||
video_type_text = "视频直链:"
|
||
else: # image类型
|
||
video_message = MessageSegment.image(video_url) # 单个图片
|
||
video_type_text = "图集首图:"
|
||
|
||
# 构建视频/图片节点
|
||
video_node = event.bot.build_forward_node(
|
||
user_id=event.self_id,
|
||
nickname=DOUYIN_NICKNAME,
|
||
message=[
|
||
MessageSegment.text(video_type_text + "\n"),
|
||
video_message
|
||
]
|
||
)
|
||
nodes.append(video_node)
|
||
video_success = True
|
||
except Exception as e:
|
||
logger.error(f"[douyin_parser] 无法添加视频/图片: {e}")
|
||
|
||
# 如果无法添加视频,添加提示信息
|
||
if not video_success:
|
||
no_video_node = event.bot.build_forward_node(
|
||
user_id=event.self_id,
|
||
nickname=DOUYIN_NICKNAME,
|
||
message="视频解析成功,但无法获取直链或播放视频。"
|
||
)
|
||
nodes.append(no_video_node)
|
||
|
||
logger.success(f"[douyin_parser] 成功解析视频信息并准备以聊天记录形式回复: {video_info['desc'][:20]}...")
|
||
|
||
# 发送合并转发消息
|
||
try:
|
||
# 使用更通用的 send_forwarded_messages 方法,自动判断私聊或群聊
|
||
await event.bot.send_forwarded_messages(target=event, nodes=nodes)
|
||
except Exception as e:
|
||
# 如果发送合并转发失败,尝试单独发送文本信息
|
||
logger.error(f"[douyin_parser] 发送合并转发失败: {e}")
|
||
|
||
# 构建替代的简单文本回复,避免电脑端显示问题
|
||
simple_reply = f"抖音视频解析成功\n{text_message}\n\n如果无法查看视频内容,请复制原始链接到浏览器打开:{url}"
|
||
await event.reply(simple_reply)
|
||
|
||
# 如果有封面,尝试单独发送
|
||
if video_info.get('cover'):
|
||
try:
|
||
await event.reply(MessageSegment.image(video_info['cover']))
|
||
except Exception:
|
||
pass
|
||
|
||
except Exception as e:
|
||
logger.error(f"[douyin_parser] 处理抖音链接时发生错误: {e}")
|
||
await event.reply("处理抖音链接时发生错误,请稍后再试。")
|
||
return |