feat(web_parser): 新增通用web链接解析插件框架

refactor: 重构B站、抖音、GitHub解析器为模块化结构

fix(executor): 增强docker容器错误处理和回调稳定性

style(templates): 优化帮助页面和代码执行结果的样式

perf(web_parser): 添加API缓存和消息去重机制

docs: 更新插件元信息和注释

chore: 移除旧的独立解析器插件文件
This commit is contained in:
2026-01-22 01:58:13 +08:00
parent 5f943c1792
commit 1420d0f0b2
13 changed files with 1665 additions and 995 deletions

144
plugins/web_parser/utils.py Normal file
View File

@@ -0,0 +1,144 @@
# -*- coding: utf-8 -*-
import re
import json
from typing import Optional, Dict, Any, Union, List
from core.utils.logger import logger
from models import MessageEvent, MessageSegment
def format_duration(seconds: int) -> str:
"""
将秒数格式化为 MM:SS 的形式
Args:
seconds (int): 秒数
Returns:
str: 格式化后的时间字符串
"""
if not isinstance(seconds, int) or seconds < 0:
return "00:00"
minutes, seconds = divmod(seconds, 60)
return f"{minutes:02d}:{seconds:02d}"
def clean_url(url: str) -> str:
"""
清理URL去掉不必要的查询参数
Args:
url (str): 原始URL
Returns:
str: 清理后的URL
"""
clean_url = url.split('?')[0]
if '#/' in clean_url:
clean_url = clean_url.split('#/')[0]
return clean_url
def extract_original_text(segments: List[Any], url_pattern: re.Pattern) -> str:
"""
从消息段中提取原始文本(去除链接)
Args:
segments (List[Any]): 消息段列表
url_pattern (re.Pattern): URL正则表达式模式
Returns:
str: 提取的原始文本
"""
for segment in segments:
if segment.type == "text":
text_content = segment.data.get("text", "")
# 移除链接
cleaned_text = re.sub(url_pattern, '', text_content)
# 移除常见的分享提示
cleaned_text = re.sub(r'复制此链接.*?打开.*?搜索.*?直接观看视频!', '', cleaned_text)
cleaned_text = cleaned_text.strip()
if cleaned_text:
return cleaned_text
return ""
def build_forward_nodes(event: MessageEvent, nickname: str, messages: List[Any]) -> List[Any]:
"""
构建转发消息节点
Args:
event (MessageEvent): 消息事件对象
nickname (str): 发送者昵称
messages (List[Any]): 消息内容列表
Returns:
List[Any]: 转发消息节点列表
"""
nodes = []
for msg in messages:
if isinstance(msg, str):
node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=nickname,
message=msg
)
nodes.append(node)
elif isinstance(msg, list):
node = event.bot.build_forward_node(
user_id=event.self_id,
nickname=nickname,
message=msg
)
nodes.append(node)
return nodes
def safe_get(data: Dict[str, Any], keys: List[str], default: Any = None) -> Any:
"""
安全地从嵌套字典中获取值
Args:
data (Dict[str, Any]): 嵌套字典
keys (List[str]): 键路径列表
default (Any, optional): 默认值. Defaults to None.
Returns:
Any: 获取的值或默认值
"""
result = data
for key in keys:
if isinstance(result, dict) and key in result:
result = result[key]
else:
return default
return result
def normalize_url(url: str) -> str:
"""
规范化URL
Args:
url (str): 原始URL
Returns:
str: 规范化后的URL
"""
if not url.startswith('http'):
url = 'https://' + url
return url
def validate_url(url: str) -> bool:
"""
验证URL格式是否正确
Args:
url (str): URL
Returns:
bool: URL格式是否正确
"""
url_pattern = re.compile(r'https?://[^]+')
return bool(url_pattern.match(url))