feat(web_parser): 新增通用web链接解析插件框架
refactor: 重构B站、抖音、GitHub解析器为模块化结构 fix(executor): 增强docker容器错误处理和回调稳定性 style(templates): 优化帮助页面和代码执行结果的样式 perf(web_parser): 添加API缓存和消息去重机制 docs: 更新插件元信息和注释 chore: 移除旧的独立解析器插件文件
This commit is contained in:
259
plugins/web_parser/parsers/bili.py
Normal file
259
plugins/web_parser/parsers/bili.py
Normal file
@@ -0,0 +1,259 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import json
|
||||
import aiohttp
|
||||
from typing import Optional, Dict, Any, List
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from core.utils.logger import logger
|
||||
from models import MessageEvent, MessageSegment
|
||||
from ..base import BaseParser
|
||||
from ..utils import format_duration, clean_url
|
||||
|
||||
from cachetools import TTLCache
|
||||
|
||||
class BiliParser(BaseParser):
|
||||
"""
|
||||
B站视频解析器
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.name = "B站解析器"
|
||||
self.url_pattern = re.compile(r"https?://(?:www\.)?(bilibili\.com/video/\w+|b23\.tv/[a-zA-Z0-9]+)")
|
||||
self.nickname = "B站视频解析"
|
||||
# 消息去重缓存
|
||||
self.processed_messages: TTLCache[int, bool] = TTLCache(maxsize=100, ttl=10)
|
||||
|
||||
async def parse(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
解析B站视频信息
|
||||
|
||||
Args:
|
||||
url (str): B站视频URL
|
||||
|
||||
Returns:
|
||||
Optional[Dict[str, Any]]: 视频信息字典,如果失败则返回None
|
||||
"""
|
||||
try:
|
||||
# 清理URL
|
||||
clean_url = url.split('?')[0]
|
||||
if '#/' in clean_url:
|
||||
clean_url = clean_url.split('#/')[0]
|
||||
|
||||
session = self.get_session()
|
||||
async with session.get(clean_url, headers=self.HEADERS, timeout=5) as response:
|
||||
response.raise_for_status()
|
||||
text = await response.text()
|
||||
soup = BeautifulSoup(text, 'html.parser')
|
||||
|
||||
# 尝试多种方式获取视频数据
|
||||
# 方式1: 尝试获取 __INITIAL_STATE__
|
||||
script_tag = soup.find('script', text=re.compile('window.__INITIAL_STATE__'))
|
||||
if not script_tag or not script_tag.string:
|
||||
# 方式2: 尝试获取 __PLAYINFO__
|
||||
script_tag = soup.find('script', text=re.compile('window.__PLAYINFO__'))
|
||||
|
||||
if not script_tag or not script_tag.string:
|
||||
# 方式3: 尝试获取页面标题和其他信息
|
||||
title_tag = soup.find('title')
|
||||
if title_tag:
|
||||
title = title_tag.get_text().strip()
|
||||
# 提取BV号
|
||||
bv_match = re.search(r'(BV\w{10})', clean_url)
|
||||
bvid = bv_match.group(1) if bv_match else '未知BV号'
|
||||
|
||||
return {
|
||||
"title": title.replace('_哔哩哔哩_bilibili', '').strip(),
|
||||
"bvid": bvid,
|
||||
"duration": 0,
|
||||
"cover_url": '',
|
||||
"play": 0,
|
||||
"like": 0,
|
||||
"coin": 0,
|
||||
"favorite": 0,
|
||||
"share": 0,
|
||||
"owner_name": '未知UP主',
|
||||
"owner_avatar": '',
|
||||
"followers": 0,
|
||||
}
|
||||
return None
|
||||
|
||||
# 原始解析逻辑
|
||||
match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{[^}]*\});', script_tag.string)
|
||||
if not match:
|
||||
# 尝试另一种正则表达式
|
||||
match = re.search(r'window\.__INITIAL_STATE__\s*=\s*(\{.*?\});', script_tag.string, re.DOTALL)
|
||||
|
||||
if not match:
|
||||
return None
|
||||
|
||||
json_str = match.group(1)
|
||||
# 清理JSON字符串中的潜在问题字符
|
||||
json_str = json_str.strip().rstrip(';')
|
||||
|
||||
try:
|
||||
data = json.loads(json_str)
|
||||
except json.JSONDecodeError:
|
||||
# 如果直接解析失败,尝试清理JSON字符串
|
||||
# 移除可能的注释或无效字符
|
||||
cleaned_json = re.sub(r',\s*[}]', '}', json_str) # 移除末尾多余的逗号
|
||||
cleaned_json = re.sub(r'/\*.*?\*/', '', cleaned_json) # 移除注释
|
||||
cleaned_json = re.sub(r'//.*', '', cleaned_json) # 移除行注释
|
||||
data = json.loads(cleaned_json)
|
||||
|
||||
video_data = data.get('videoData', {})
|
||||
up_data = data.get('upData', {})
|
||||
stat = video_data.get('stat', {})
|
||||
owner = video_data.get('owner', {})
|
||||
|
||||
cover_url = video_data.get('pic', '')
|
||||
if cover_url:
|
||||
cover_url = cover_url.split('@')[0]
|
||||
if cover_url.startswith('//'):
|
||||
cover_url = 'https:' + cover_url
|
||||
|
||||
owner_avatar = owner.get('face', '')
|
||||
if owner_avatar:
|
||||
if owner_avatar.startswith('//'):
|
||||
owner_avatar = 'https:' + owner_avatar
|
||||
owner_avatar = owner_avatar.split('@')[0]
|
||||
|
||||
return {
|
||||
"title": video_data.get('title', '未知标题'),
|
||||
"bvid": video_data.get('bvid', '未知BV号'),
|
||||
"duration": video_data.get('duration', 0),
|
||||
"cover_url": cover_url,
|
||||
"play": stat.get('view', 0),
|
||||
"like": stat.get('like', 0),
|
||||
"coin": stat.get('coin', 0),
|
||||
"favorite": stat.get('favorite', 0),
|
||||
"share": stat.get('share', 0),
|
||||
"owner_name": owner.get('name', '未知UP主'),
|
||||
"owner_avatar": owner_avatar,
|
||||
"followers": up_data.get('fans', 0),
|
||||
}
|
||||
|
||||
except (aiohttp.ClientError, KeyError, AttributeError, json.JSONDecodeError) as e:
|
||||
logger.error(f"[{self.name}] 解析视频信息失败: {e}")
|
||||
logger.debug(f"失败的URL: {url}")
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.name}] 解析视频信息时发生未知错误: {e}")
|
||||
logger.debug(f"失败的URL: {url}")
|
||||
|
||||
return None
|
||||
|
||||
async def get_real_url(self, short_url: str) -> Optional[str]:
|
||||
"""
|
||||
获取B站短链接的真实URL
|
||||
|
||||
Args:
|
||||
short_url (str): B站短链接
|
||||
|
||||
Returns:
|
||||
Optional[str]: 真实URL,如果失败则返回None
|
||||
"""
|
||||
try:
|
||||
session = self.get_session()
|
||||
async with session.head(short_url, headers=self.HEADERS, allow_redirects=False, timeout=5) as response:
|
||||
if response.status == 302:
|
||||
return response.headers.get('Location')
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.name}] 获取真实URL失败: {e}")
|
||||
return None
|
||||
|
||||
async def get_direct_video_url(self, video_url: str) -> Optional[str]:
|
||||
"""
|
||||
调用第三方API解析B站视频直链
|
||||
|
||||
Args:
|
||||
video_url (str): B站视频的完整URL
|
||||
|
||||
Returns:
|
||||
Optional[str]: 视频直链URL,如果失败则返回None
|
||||
"""
|
||||
api_url = f"https://api.mir6.com/api/bzjiexi?url={video_url}&type=json"
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(api_url, headers=self.HEADERS, timeout=10) as response:
|
||||
response.raise_for_status()
|
||||
# 使用 content_type=None 来忽略 Content-Type 检查
|
||||
data = await response.json(content_type=None)
|
||||
if data.get("code") == 200 and data.get("data"):
|
||||
return data["data"][0].get("video_url")
|
||||
except (aiohttp.ClientError, json.JSONDecodeError, KeyError, IndexError) as e:
|
||||
logger.error(f"[{self.name}] 调用第三方API解析视频失败: {e}")
|
||||
return None
|
||||
|
||||
async def format_response(self, event: MessageEvent, data: Dict[str, Any]) -> List[Any]:
|
||||
"""
|
||||
格式化B站视频响应消息
|
||||
|
||||
Args:
|
||||
event (MessageEvent): 消息事件对象
|
||||
data (Dict[str, Any]): 视频信息
|
||||
|
||||
Returns:
|
||||
List[Any]: 消息段列表
|
||||
"""
|
||||
# 检查视频时长
|
||||
if data['duration'] > 1200: # 20分钟 = 1200秒
|
||||
video_message = "视频时长超过20分钟,不进行解析。"
|
||||
else:
|
||||
# 构建完整的B站视频URL
|
||||
video_url = f"https://www.bilibili.com/video/{data.get('bvid', '')}"
|
||||
direct_url = await self.get_direct_video_url(video_url)
|
||||
if direct_url:
|
||||
video_message = MessageSegment.video(direct_url)
|
||||
else:
|
||||
video_message = "视频解析失败,无法获取直链。"
|
||||
|
||||
text_message = (
|
||||
f"BiliBili 视频解析\n"
|
||||
f"--------------------\n"
|
||||
f" UP主: {data['owner_name']}\n"
|
||||
f" 粉丝: {self.format_count(data['followers'])}\n"
|
||||
f"--------------------\n"
|
||||
f" 标题: {data['title']}\n"
|
||||
f" BV号: {data['bvid']}\n"
|
||||
f" 时长: {format_duration(data['duration'])}\n"
|
||||
f"--------------------\n"
|
||||
f" 数据:\n"
|
||||
f" 播放: {self.format_count(data['play'])}\n"
|
||||
f" 点赞: {self.format_count(data['like'])}\n"
|
||||
f" 投币: {self.format_count(data['coin'])}\n"
|
||||
f" 收藏: {self.format_count(data['favorite'])}\n"
|
||||
f" 转发: {self.format_count(data['share'])}\n"
|
||||
)
|
||||
|
||||
image_message_segment = [
|
||||
MessageSegment.text("B站封面:"),
|
||||
MessageSegment.image(data['cover_url'])
|
||||
]
|
||||
|
||||
up_info_segment = [
|
||||
MessageSegment.text("UP主头像:"),
|
||||
MessageSegment.image(data['owner_avatar'])
|
||||
]
|
||||
|
||||
nodes = [
|
||||
event.bot.build_forward_node(user_id=event.self_id, nickname=self.nickname, message=text_message),
|
||||
event.bot.build_forward_node(user_id=event.self_id, nickname=self.nickname, message=image_message_segment),
|
||||
event.bot.build_forward_node(user_id=event.self_id, nickname=self.nickname, message=up_info_segment),
|
||||
event.bot.build_forward_node(user_id=event.self_id, nickname=self.nickname, message=video_message)
|
||||
]
|
||||
|
||||
return nodes
|
||||
|
||||
def should_handle_url(self, url: str) -> bool:
|
||||
"""
|
||||
判断是否应该处理该URL
|
||||
|
||||
Args:
|
||||
url (str): URL
|
||||
|
||||
Returns:
|
||||
bool: 是否应该处理
|
||||
"""
|
||||
# 检查是否是B站相关域名,包括短链接
|
||||
return bool(self.url_pattern.search(url))
|
||||
261
plugins/web_parser/parsers/douyin.py
Normal file
261
plugins/web_parser/parsers/douyin.py
Normal file
@@ -0,0 +1,261 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import json
|
||||
import aiohttp
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
from core.utils.logger import logger
|
||||
from models import MessageEvent, MessageSegment
|
||||
from ..base import BaseParser
|
||||
from ..utils import extract_original_text
|
||||
from cachetools import TTLCache
|
||||
|
||||
|
||||
class DouyinParser(BaseParser):
|
||||
"""
|
||||
抖音视频解析器
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.name = "抖音解析器"
|
||||
self.url_pattern = re.compile(r"https?://v\.douyin\.com/[a-zA-Z0-9_]+/?", re.IGNORECASE)
|
||||
self.short_pattern = re.compile(r"(?:https?://)?v\.douyin\.com/[a-zA-Z0-9_]+/?", re.IGNORECASE)
|
||||
self.nickname = "抖音视频解析"
|
||||
# 消息去重缓存
|
||||
self.processed_messages: TTLCache[int, bool] = TTLCache(maxsize=100, ttl=10)
|
||||
|
||||
async def parse(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
解析抖音视频信息
|
||||
|
||||
Args:
|
||||
url (str): 抖音视频URL
|
||||
|
||||
Returns:
|
||||
Optional[Dict[str, Any]]: 视频信息字典,如果失败则返回None
|
||||
"""
|
||||
try:
|
||||
# 使用第三方API解析抖音视频
|
||||
api_url = f"http://api.xhus.cn/api/douyin?url={url}"
|
||||
|
||||
session = self.get_session()
|
||||
async with session.get(api_url, headers=self.HEADERS, timeout=10) as response:
|
||||
if response.status != 200:
|
||||
logger.error(f"[{self.name}] API请求失败,状态码: {response.status}")
|
||||
return None
|
||||
|
||||
response_data = await response.json()
|
||||
|
||||
if not isinstance(response_data, dict):
|
||||
logger.error(f"[{self.name}] API返回格式错误: {response_data}")
|
||||
return None
|
||||
|
||||
if response_data.get("code") != 200:
|
||||
logger.error(f"[{self.name}] API返回错误: {response_data}")
|
||||
return None
|
||||
|
||||
data = response_data.get("data", {})
|
||||
if not data:
|
||||
logger.error(f"[{self.name}] API返回数据为空")
|
||||
return None
|
||||
|
||||
# 转换API响应格式
|
||||
return {
|
||||
"type": "video" if not data.get("images") or not isinstance(data.get("images"), list) else "image",
|
||||
"video_url": data.get("url", ""),
|
||||
"video_url_HQ": data.get("url", ""),
|
||||
"nickname": data.get("author", "未知作者"),
|
||||
"desc": data.get("title", "无描述"),
|
||||
"aweme_id": data.get("uid", ""),
|
||||
"like": data.get("like", 0),
|
||||
"cover": data.get("cover", ""),
|
||||
"time": data.get("time", 0),
|
||||
"author_avatar": data.get("avatar", ""),
|
||||
"music": data.get("music", {}),
|
||||
}
|
||||
|
||||
except (aiohttp.ClientError, KeyError, AttributeError, json.JSONDecodeError) as e:
|
||||
logger.error(f"[{self.name}] 解析抖音视频信息失败: {e}")
|
||||
logger.debug(f"失败的URL: {url}")
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.name}] 解析抖音视频时发生未知错误: {e}")
|
||||
logger.debug(f"失败的URL: {url}")
|
||||
|
||||
return None
|
||||
|
||||
async def get_real_url(self, short_url: str) -> Optional[str]:
|
||||
"""
|
||||
获取抖音短链接的真实URL
|
||||
|
||||
Args:
|
||||
short_url (str): 抖音短链接
|
||||
|
||||
Returns:
|
||||
Optional[str]: 真实URL,如果失败则返回None
|
||||
"""
|
||||
try:
|
||||
# 首先尝试获取重定向后的URL
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# 添加更多头部信息模拟移动端访问
|
||||
mobile_headers = self.HEADERS.copy()
|
||||
mobile_headers.update({
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Cache-Control': 'max-age=0',
|
||||
# 模拟移动设备的额外头部
|
||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': 'https://www.douyin.com/'
|
||||
})
|
||||
|
||||
async with session.get(short_url, headers=mobile_headers, allow_redirects=True, timeout=10) as response:
|
||||
redirected_url = str(response.url)
|
||||
|
||||
# 检查重定向后的URL是否包含视频ID
|
||||
if 'video/' in redirected_url or '/note/' in redirected_url:
|
||||
logger.info(f"[{self.name}] 重定向后的视频URL: {redirected_url}")
|
||||
return redirected_url
|
||||
elif 'share_item' in redirected_url:
|
||||
logger.info(f"[{self.name}] 重定向后的分享URL: {redirected_url}")
|
||||
return redirected_url
|
||||
else:
|
||||
logger.warning(f"[{self.name}] 重定向到了非预期页面: {redirected_url}")
|
||||
return redirected_url
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.name}] 获取真实URL失败: {e}")
|
||||
return None
|
||||
|
||||
async def format_response(self, event: MessageEvent, data: Dict[str, Any]) -> List[Any]:
|
||||
"""
|
||||
格式化抖音视频响应消息
|
||||
|
||||
Args:
|
||||
event (MessageEvent): 消息事件对象
|
||||
data (Dict[str, Any]): 视频信息
|
||||
|
||||
Returns:
|
||||
List[Any]: 消息段列表
|
||||
"""
|
||||
# 构建回复消息,包含原分享中的文本内容(如果有)
|
||||
original_text = extract_original_text(event.message, self.url_pattern)
|
||||
|
||||
# 构建回复消息
|
||||
text_parts = ["抖音视频解析"]
|
||||
text_parts.append("--------------------")
|
||||
|
||||
if original_text:
|
||||
text_parts.append(f" 分享内容: {original_text}")
|
||||
text_parts.append("--------------------")
|
||||
|
||||
text_parts.append(f" 作者: {data['nickname']}")
|
||||
text_parts.append(f" 抖音号: {data['aweme_id']}")
|
||||
text_parts.append(f" 标题: {data['desc']}")
|
||||
text_parts.append(f" 点赞: {self.format_count(data['like'])}")
|
||||
text_parts.append(f" 类型: {data['type']}")
|
||||
|
||||
# 如果是音乐,添加音乐信息
|
||||
if data.get('music'):
|
||||
music_info = data['music']
|
||||
text_parts.append("--------------------")
|
||||
text_parts.append(" 背景音乐:")
|
||||
text_parts.append(f" 标题: {music_info.get('title', '')}")
|
||||
text_parts.append(f" 作者: {music_info.get('author', '')}")
|
||||
|
||||
text_parts.append("--------------------")
|
||||
|
||||
text_message = "\n".join(text_parts)
|
||||
|
||||
# 准备转发消息节点
|
||||
nodes = []
|
||||
|
||||
# 添加文本信息节点
|
||||
text_node = event.bot.build_forward_node(
|
||||
user_id=event.self_id,
|
||||
nickname=self.nickname,
|
||||
message=text_message
|
||||
)
|
||||
nodes.append(text_node)
|
||||
|
||||
# 添加封面图片节点(如果有)
|
||||
if data.get('cover'):
|
||||
try:
|
||||
cover_node = event.bot.build_forward_node(
|
||||
user_id=event.self_id,
|
||||
nickname=self.nickname,
|
||||
message=[
|
||||
MessageSegment.text("抖音视频封面:\n"),
|
||||
MessageSegment.image(data['cover'])
|
||||
]
|
||||
)
|
||||
nodes.append(cover_node)
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.name}] 无法添加封面图片: {e}")
|
||||
|
||||
# 添加作者头像节点(如果有)
|
||||
if data.get('author_avatar'):
|
||||
try:
|
||||
avatar_node = event.bot.build_forward_node(
|
||||
user_id=event.self_id,
|
||||
nickname=self.nickname,
|
||||
message=[
|
||||
MessageSegment.text("作者头像:\n"),
|
||||
MessageSegment.image(data['author_avatar'])
|
||||
]
|
||||
)
|
||||
nodes.append(avatar_node)
|
||||
except Exception as e:
|
||||
logger.warning(f"[{self.name}] 无法添加作者头像: {e}")
|
||||
|
||||
# 尝试添加视频直链(单独节点)
|
||||
video_success = False
|
||||
try:
|
||||
if data.get('video_url'):
|
||||
video_url = data.get('video_url', '')
|
||||
# 检查视频类型
|
||||
if data.get('type') == 'video':
|
||||
video_message = MessageSegment.video(video_url)
|
||||
video_type_text = "视频直链:"
|
||||
else: # image类型
|
||||
video_message = MessageSegment.image(video_url) # 单个图片
|
||||
video_type_text = "图集首图:"
|
||||
|
||||
# 构建视频/图片节点
|
||||
video_node = event.bot.build_forward_node(
|
||||
user_id=event.self_id,
|
||||
nickname=self.nickname,
|
||||
message=[
|
||||
MessageSegment.text(video_type_text + "\n"),
|
||||
video_message
|
||||
]
|
||||
)
|
||||
nodes.append(video_node)
|
||||
video_success = True
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.name}] 无法添加视频/图片: {e}")
|
||||
|
||||
# 如果无法添加视频,添加提示信息
|
||||
if not video_success:
|
||||
no_video_node = event.bot.build_forward_node(
|
||||
user_id=event.self_id,
|
||||
nickname=self.nickname,
|
||||
message="视频解析成功,但无法获取直链或播放视频。"
|
||||
)
|
||||
nodes.append(no_video_node)
|
||||
|
||||
return nodes
|
||||
|
||||
def should_handle_url(self, url: str) -> bool:
|
||||
"""
|
||||
判断是否应该处理该URL
|
||||
|
||||
Args:
|
||||
url (str): URL
|
||||
|
||||
Returns:
|
||||
bool: 是否应该处理
|
||||
"""
|
||||
# 检查是否是抖音相关域名
|
||||
return ('douyin.com' in url or bool(self.url_pattern.search(url)) or bool(self.short_pattern.search(url)))
|
||||
201
plugins/web_parser/parsers/github.py
Normal file
201
plugins/web_parser/parsers/github.py
Normal file
@@ -0,0 +1,201 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import json
|
||||
import aiohttp
|
||||
from typing import Optional, Dict, Any, List
|
||||
from cachetools import TTLCache
|
||||
|
||||
from core.utils.logger import logger
|
||||
from core.managers.image_manager import image_manager
|
||||
from models import MessageEvent, MessageSegment
|
||||
from ..base import BaseParser
|
||||
|
||||
|
||||
class GitHubParser(BaseParser):
|
||||
"""
|
||||
GitHub仓库解析器
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.name = "GitHub解析器"
|
||||
self.url_pattern = re.compile(r"https?://(?:www\.)?github\.com/([\w\-]+)/([\w\-\.]+)(?:/[^\s]*)?")
|
||||
self.nickname = "GitHub仓库信息"
|
||||
# 消息去重缓存
|
||||
self.processed_messages: TTLCache[int, bool] = TTLCache(maxsize=100, ttl=10)
|
||||
# 缓存GitHub API响应,避免频繁请求
|
||||
self.api_cache = TTLCache(maxsize=100, ttl=3600) # 100个缓存项,1小时过期
|
||||
|
||||
async def parse(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
解析GitHub仓库信息
|
||||
|
||||
Args:
|
||||
url (str): GitHub仓库URL
|
||||
|
||||
Returns:
|
||||
Optional[Dict[str, Any]]: 仓库信息字典,如果失败则返回None
|
||||
"""
|
||||
# 从URL中提取owner和repo
|
||||
match = self.url_pattern.search(url)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
owner = match.group(1)
|
||||
repo = match.group(2)
|
||||
# 移除可能的.git后缀
|
||||
repo = repo.replace(".git", "")
|
||||
|
||||
return await self.get_github_repo_info(owner, repo)
|
||||
|
||||
async def get_real_url(self, short_url: str) -> Optional[str]:
|
||||
"""
|
||||
获取短链接的真实URL
|
||||
|
||||
Args:
|
||||
short_url (str): 短链接
|
||||
|
||||
Returns:
|
||||
Optional[str]: 真实URL,如果失败则返回None
|
||||
"""
|
||||
try:
|
||||
session = self.get_session()
|
||||
async with session.head(short_url, headers=self.HEADERS, allow_redirects=False, timeout=5) as response:
|
||||
if response.status == 302:
|
||||
return response.headers.get('Location')
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.name}] 获取真实URL失败: {e}")
|
||||
return None
|
||||
|
||||
async def get_github_repo_info(self, owner: str, repo: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
通过GitHub API获取仓库信息
|
||||
|
||||
Args:
|
||||
owner (str): 仓库所有者用户名
|
||||
repo (str): 仓库名称
|
||||
|
||||
Returns:
|
||||
Optional[Dict[str, Any]]: 仓库信息字典,如果失败则返回None
|
||||
"""
|
||||
cache_key = f"{owner}/{repo}"
|
||||
if cache_key in self.api_cache:
|
||||
logger.info(f"[{self.name}] 使用缓存的仓库信息: {cache_key}")
|
||||
return self.api_cache[cache_key]
|
||||
|
||||
api_url = f"https://api.github.com/repos/{owner}/{repo}"
|
||||
try:
|
||||
session = self.get_session()
|
||||
async with session.get(api_url, timeout=10) as response:
|
||||
response.raise_for_status()
|
||||
repo_data = await response.json()
|
||||
|
||||
# 将数据存入缓存
|
||||
self.api_cache[cache_key] = repo_data
|
||||
logger.info(f"[{self.name}] 成功获取仓库信息并缓存: {cache_key}")
|
||||
return repo_data
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
logger.error(f"[{self.name}] GitHub API请求失败: {e}")
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"[{self.name}] 解析GitHub API响应失败: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.name}] 获取仓库信息时发生未知错误: {e}")
|
||||
|
||||
return None
|
||||
|
||||
async def generate_repo_image(self, repo_data: Dict[str, Any]) -> Optional[str]:
|
||||
"""
|
||||
使用Jinja2模板渲染仓库信息为图片
|
||||
|
||||
Args:
|
||||
repo_data (Dict[str, Any]): 仓库信息字典
|
||||
|
||||
Returns:
|
||||
Optional[str]: 生成的图片Base64编码,如果失败则返回None
|
||||
"""
|
||||
try:
|
||||
# 准备模板数据
|
||||
template_data = {
|
||||
"full_name": repo_data.get("full_name", ""),
|
||||
"description": repo_data.get("description", "暂无描述"),
|
||||
"owner_avatar": repo_data.get("owner", {}).get("avatar_url", ""),
|
||||
"stargazers_count": repo_data.get("stargazers_count", 0),
|
||||
"forks_count": repo_data.get("forks_count", 0),
|
||||
"open_issues_count": repo_data.get("open_issues_count", 0),
|
||||
"watchers_count": repo_data.get("watchers_count", 0),
|
||||
}
|
||||
|
||||
# 渲染模板为图片,使用高质量设置
|
||||
base64_image = await image_manager.render_template_to_base64(
|
||||
template_name="github_repo.html",
|
||||
data=template_data,
|
||||
output_name=f"github_{repo_data.get('name', 'repo')}.png",
|
||||
quality=100,
|
||||
image_type="png"
|
||||
)
|
||||
|
||||
return base64_image
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{self.name}] 生成仓库信息图片失败: {e}")
|
||||
return None
|
||||
|
||||
async def format_response(self, event: MessageEvent, data: Dict[str, Any]) -> List[Any]:
|
||||
"""
|
||||
格式化GitHub仓库响应消息
|
||||
|
||||
Args:
|
||||
event (MessageEvent): 消息事件对象
|
||||
data (Dict[str, Any]): 仓库信息
|
||||
|
||||
Returns:
|
||||
List[Any]: 消息段列表
|
||||
"""
|
||||
nodes = []
|
||||
|
||||
# 生成图片
|
||||
image_base64 = await self.generate_repo_image(data)
|
||||
if image_base64:
|
||||
# 发送图片
|
||||
image_node = event.bot.build_forward_node(
|
||||
user_id=event.self_id,
|
||||
nickname=self.nickname,
|
||||
message=MessageSegment.image(image_base64)
|
||||
)
|
||||
nodes.append(image_node)
|
||||
else:
|
||||
# 如果图片生成失败,发送文本信息
|
||||
text_message = (
|
||||
f"GitHub 仓库信息\n"
|
||||
f"--------------------\n"
|
||||
f"仓库: {data.get('full_name', '')}\n"
|
||||
f"描述: {data.get('description', '暂无描述')}\n"
|
||||
f"--------------------\n"
|
||||
f"数据:\n"
|
||||
f" 星标: {data.get('stargazers_count', 0)}\n"
|
||||
f" Fork: {data.get('forks_count', 0)}\n"
|
||||
f" Issues: {data.get('open_issues_count', 0)}\n"
|
||||
f" 关注: {data.get('watchers_count', 0)}\n"
|
||||
)
|
||||
text_node = event.bot.build_forward_node(
|
||||
user_id=event.self_id,
|
||||
nickname=self.nickname,
|
||||
message=text_message
|
||||
)
|
||||
nodes.append(text_node)
|
||||
|
||||
return nodes
|
||||
|
||||
def should_handle_url(self, url: str) -> bool:
|
||||
"""
|
||||
判断是否应该处理该URL
|
||||
|
||||
Args:
|
||||
url (str): URL
|
||||
|
||||
Returns:
|
||||
bool: 是否应该处理
|
||||
"""
|
||||
# 检查是否是GitHub相关域名
|
||||
return bool(self.url_pattern.search(url)) and 'github.com' in url
|
||||
Reference in New Issue
Block a user