Files
NeoBot/plugins/web_parser/parsers/github.py
K2cr2O1 1420d0f0b2 feat(web_parser): 新增通用web链接解析插件框架
refactor: 重构B站、抖音、GitHub解析器为模块化结构

fix(executor): 增强docker容器错误处理和回调稳定性

style(templates): 优化帮助页面和代码执行结果的样式

perf(web_parser): 添加API缓存和消息去重机制

docs: 更新插件元信息和注释

chore: 移除旧的独立解析器插件文件
2026-01-22 01:58:13 +08:00

202 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import re
import json
import aiohttp
from typing import Optional, Dict, Any, List
from cachetools import TTLCache
from core.utils.logger import logger
from core.managers.image_manager import image_manager
from models import MessageEvent, MessageSegment
from ..base import BaseParser
class GitHubParser(BaseParser):
"""
GitHub仓库解析器
"""
def __init__(self):
super().__init__()
self.name = "GitHub解析器"
self.url_pattern = re.compile(r"https?://(?:www\.)?github\.com/([\w\-]+)/([\w\-\.]+)(?:/[^\s]*)?")
self.nickname = "GitHub仓库信息"
# 消息去重缓存
self.processed_messages: TTLCache[int, bool] = TTLCache(maxsize=100, ttl=10)
# 缓存GitHub API响应避免频繁请求
self.api_cache = TTLCache(maxsize=100, ttl=3600) # 100个缓存项1小时过期
async def parse(self, url: str) -> Optional[Dict[str, Any]]:
"""
解析GitHub仓库信息
Args:
url (str): GitHub仓库URL
Returns:
Optional[Dict[str, Any]]: 仓库信息字典如果失败则返回None
"""
# 从URL中提取owner和repo
match = self.url_pattern.search(url)
if not match:
return None
owner = match.group(1)
repo = match.group(2)
# 移除可能的.git后缀
repo = repo.replace(".git", "")
return await self.get_github_repo_info(owner, repo)
async def get_real_url(self, short_url: str) -> Optional[str]:
"""
获取短链接的真实URL
Args:
short_url (str): 短链接
Returns:
Optional[str]: 真实URL如果失败则返回None
"""
try:
session = self.get_session()
async with session.head(short_url, headers=self.HEADERS, allow_redirects=False, timeout=5) as response:
if response.status == 302:
return response.headers.get('Location')
except Exception as e:
logger.error(f"[{self.name}] 获取真实URL失败: {e}")
return None
async def get_github_repo_info(self, owner: str, repo: str) -> Optional[Dict[str, Any]]:
"""
通过GitHub API获取仓库信息
Args:
owner (str): 仓库所有者用户名
repo (str): 仓库名称
Returns:
Optional[Dict[str, Any]]: 仓库信息字典如果失败则返回None
"""
cache_key = f"{owner}/{repo}"
if cache_key in self.api_cache:
logger.info(f"[{self.name}] 使用缓存的仓库信息: {cache_key}")
return self.api_cache[cache_key]
api_url = f"https://api.github.com/repos/{owner}/{repo}"
try:
session = self.get_session()
async with session.get(api_url, timeout=10) as response:
response.raise_for_status()
repo_data = await response.json()
# 将数据存入缓存
self.api_cache[cache_key] = repo_data
logger.info(f"[{self.name}] 成功获取仓库信息并缓存: {cache_key}")
return repo_data
except aiohttp.ClientError as e:
logger.error(f"[{self.name}] GitHub API请求失败: {e}")
except json.JSONDecodeError as e:
logger.error(f"[{self.name}] 解析GitHub API响应失败: {e}")
except Exception as e:
logger.error(f"[{self.name}] 获取仓库信息时发生未知错误: {e}")
return None
async def generate_repo_image(self, repo_data: Dict[str, Any]) -> Optional[str]:
"""
使用Jinja2模板渲染仓库信息为图片
Args:
repo_data (Dict[str, Any]): 仓库信息字典
Returns:
Optional[str]: 生成的图片Base64编码如果失败则返回None
"""
try:
# 准备模板数据
template_data = {
"full_name": repo_data.get("full_name", ""),
"description": repo_data.get("description", "暂无描述"),
"owner_avatar": repo_data.get("owner", {}).get("avatar_url", ""),
"stargazers_count": repo_data.get("stargazers_count", 0),
"forks_count": repo_data.get("forks_count", 0),
"open_issues_count": repo_data.get("open_issues_count", 0),
"watchers_count": repo_data.get("watchers_count", 0),
}
# 渲染模板为图片,使用高质量设置
base64_image = await image_manager.render_template_to_base64(
template_name="github_repo.html",
data=template_data,
output_name=f"github_{repo_data.get('name', 'repo')}.png",
quality=100,
image_type="png"
)
return base64_image
except Exception as e:
logger.error(f"[{self.name}] 生成仓库信息图片失败: {e}")
return None
async def format_response(self, event: MessageEvent, data: Dict[str, Any]) -> List[Any]:
"""
格式化GitHub仓库响应消息
Args:
event (MessageEvent): 消息事件对象
data (Dict[str, Any]): 仓库信息
Returns:
List[Any]: 消息段列表
"""
nodes = []
# 生成图片
image_base64 = await self.generate_repo_image(data)
if image_base64:
# 发送图片
image_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=self.nickname,
message=MessageSegment.image(image_base64)
)
nodes.append(image_node)
else:
# 如果图片生成失败,发送文本信息
text_message = (
f"GitHub 仓库信息\n"
f"--------------------\n"
f"仓库: {data.get('full_name', '')}\n"
f"描述: {data.get('description', '暂无描述')}\n"
f"--------------------\n"
f"数据:\n"
f" 星标: {data.get('stargazers_count', 0)}\n"
f" Fork: {data.get('forks_count', 0)}\n"
f" Issues: {data.get('open_issues_count', 0)}\n"
f" 关注: {data.get('watchers_count', 0)}\n"
)
text_node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=self.nickname,
message=text_message
)
nodes.append(text_node)
return nodes
def should_handle_url(self, url: str) -> bool:
"""
判断是否应该处理该URL
Args:
url (str): URL
Returns:
bool: 是否应该处理
"""
# 检查是否是GitHub相关域名
return bool(self.url_pattern.search(url)) and 'github.com' in url