diff --git a/core/managers/1.py b/core/managers/1.py deleted file mode 100644 index aa631a7..0000000 --- a/core/managers/1.py +++ /dev/null @@ -1,32 +0,0 @@ - -class 真鸭子: - def 叫(self): - print("嘎嘎嘎") - - def 跑(self): - print("鸭子摇摇摆摆跑") - -class 玩具鸭子: - def 叫(self): - print("玩具鸭发出嘎嘎声") - - def 跑(self): - print("玩具鸭轮子咕噜噜跑") - -class 小猫: - def 叫(self): - print("喵喵喵") - def 跑(self): - print("猫咪跑跑") - -def 逗鸭子(鸭子一样的东西): - 鸭子一样的东西.叫() - 鸭子一样的东西.跑() - -逗鸭子(真鸭子()) - -逗鸭子(玩具鸭子()) - -逗鸭子(小猫()) - -鸭子 = 1 \ No newline at end of file diff --git a/plugins/web_parser/base.py b/plugins/web_parser/base.py index 8510d77..941fad0 100644 --- a/plugins/web_parser/base.py +++ b/plugins/web_parser/base.py @@ -117,7 +117,7 @@ class BaseParser(metaclass=abc.ABCMeta): def extract_url_from_text_segments(self, segments): """ - 从消息的文本段中提取URL + 从消息的文本段中提取URL,会合并所有文本段来处理被分割的链接。 Args: segments: 消息段列表 @@ -125,14 +125,19 @@ class BaseParser(metaclass=abc.ABCMeta): Returns: Optional[str]: 提取到的URL或None """ - for segment in segments: - if segment.type == "text": - text_content = segment.data.get("text", "") - match = self.url_pattern.search(text_content) - if match: - extracted_url = match.group(0) - logger.success(f"[{self.name}] 成功从文本中提取到链接: {extracted_url}") - return extracted_url + # 1. 拼接所有文本段内容,保留空格 + full_text = "".join([segment.data.get("text", "") for segment in segments if segment.type == "text"]) + + # 2. 使用解析器自身的url_pattern进行匹配,通常是匹配到第一个空格为止 + match = self.url_pattern.search(full_text) + + if match: + extracted_url = match.group(0) + # 清理一下链接末尾可能误包含的标点符号 + extracted_url = re.sub(r'[,.!?]$', '', extracted_url) + logger.success(f"[{self.name}] 成功从合并后的文本中提取到链接: {extracted_url}") + return extracted_url + return None async def process_url(self, event: MessageEvent, url: str): diff --git a/plugins/web_parser/parsers/douyin.py b/plugins/web_parser/parsers/douyin.py index 6ce5bb8..72cd12b 100644 --- a/plugins/web_parser/parsers/douyin.py +++ b/plugins/web_parser/parsers/douyin.py @@ -18,8 +18,8 @@ class DouyinParser(BaseParser): def __init__(self): super().__init__() self.name = "抖音解析器" - self.url_pattern = re.compile(r"https?://v\.douyin\.com/[a-zA-Z0-9_]+/?", re.IGNORECASE) - self.short_pattern = re.compile(r"(?:https?://)?v\.douyin\.com/[a-zA-Z0-9_]+/?", re.IGNORECASE) + self.url_pattern = re.compile(r"https?://v\.douyin\.com/[a-zA-Z0-9_-]+/?", re.IGNORECASE) + self.short_pattern = re.compile(r"(?:https?://)?v\.douyin\.com/[a-zA-Z0-9_-]+/?", re.IGNORECASE) self.nickname = "抖音视频解析" # 消息去重缓存 self.processed_messages: TTLCache[int, bool] = TTLCache(maxsize=100, ttl=10) @@ -94,34 +94,17 @@ class DouyinParser(BaseParser): Optional[str]: 真实URL,如果失败则返回None """ try: - # 首先尝试获取重定向后的URL - async with aiohttp.ClientSession() as session: - # 添加更多头部信息模拟移动端访问 - mobile_headers = self.HEADERS.copy() - mobile_headers.update({ - 'Sec-Fetch-Dest': 'document', - 'Sec-Fetch-Mode': 'navigate', - 'Sec-Fetch-Site': 'none', - 'Cache-Control': 'max-age=0', - # 模拟移动设备的额外头部 - 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1', - 'X-Requested-With': 'XMLHttpRequest', - 'Referer': 'https://www.douyin.com/' - }) + session = self.get_session() + async with session.get(short_url, allow_redirects=True, timeout=aiohttp.ClientTimeout(total=10)) as response: + redirected_url = str(response.url) - async with session.get(short_url, headers=mobile_headers, allow_redirects=True, timeout=aiohttp.ClientTimeout(total=10)) as response: - redirected_url = str(response.url) - - # 检查重定向后的URL是否包含视频ID - if 'video/' in redirected_url or '/note/' in redirected_url: - logger.info(f"[{self.name}] 重定向后的视频URL: {redirected_url}") - return redirected_url - elif 'share_item' in redirected_url: - logger.info(f"[{self.name}] 重定向后的分享URL: {redirected_url}") - return redirected_url - else: - logger.warning(f"[{self.name}] 重定向到了非预期页面: {redirected_url}") - return redirected_url + # 检查重定向后的URL是否是有效的视频或图文页 + if 'douyin.com/video/' in redirected_url or 'douyin.com/note/' in redirected_url: + logger.info(f"[{self.name}] 成功获取真实URL: {redirected_url}") + return redirected_url + else: + logger.warning(f"[{self.name}] 短链接 {short_url} 重定向到了非预期的页面: {redirected_url}") + return None except Exception as e: logger.error(f"[{self.name}] 获取真实URL失败: {e}")