feat(跨平台): 优化消息处理并添加纯文本提取功能

添加 extract_text_only 函数过滤非文本标记
修改翻译逻辑仅处理纯文本内容
完善附件处理和消息内容拼接
修复仅包含表情时的消息处理问题
This commit is contained in:
2026-03-21 14:41:50 +08:00
parent bd59343d41
commit b016632b74
4 changed files with 54 additions and 13 deletions

View File

@@ -227,6 +227,9 @@ class DiscordToOneBotConverter:
Returns: Returns:
伪装后的 OneBot 事件对象 伪装后的 OneBot 事件对象
""" """
# 在方法内部导入 logger避免作用域问题
from core.utils.logger import logger
# 1. 提取基础信息 # 1. 提取基础信息
user_id = discord_message.author.id user_id = discord_message.author.id
message_id = discord_message.id message_id = discord_message.id
@@ -279,38 +282,43 @@ class DiscordToOneBotConverter:
else: else:
message_list.append(OneBotMessageSegment.text(content)) message_list.append(OneBotMessageSegment.text(content))
# 如果消息只包含表情(没有文本),更新 raw_message 以包含表情信息
if not raw_message.strip() or raw_message.strip().startswith('<'):
import re
raw_message = re.sub(r'<a?:([^:]+):(\d+)>', r'[\1]', raw_message)
# 添加附件信息 # 添加附件信息
if discord_message.attachments: if discord_message.attachments:
self.logger.debug(f"[DiscordToOneBotConverter] 检测到 {len(discord_message.attachments)} 个附件") logger.debug(f"[DiscordToOneBotConverter] 检测到 {len(discord_message.attachments)} 个附件")
for attachment in discord_message.attachments: for attachment in discord_message.attachments:
filename = attachment.filename.lower() filename = attachment.filename.lower()
self.logger.debug(f"[DiscordToOneBotConverter] 处理附件: {attachment.filename}, MIME: {attachment.content_type}") logger.debug(f"[DiscordToOneBotConverter] 处理附件: {attachment.filename}, MIME: {attachment.content_type}")
# 检查是否是语音文件 # 检查是否是语音文件
if filename.endswith(('.amr', '.silk', '.mp3', '.wav', '.ogg', '.m4a')): if filename.endswith(('.amr', '.silk', '.mp3', '.wav', '.ogg', '.m4a')):
seg = OneBotMessageSegment.record(attachment.url) seg = OneBotMessageSegment.record(attachment.url)
seg.data["filename"] = attachment.filename seg.data["filename"] = attachment.filename
message_list.append(seg) message_list.append(seg)
raw_message += f"\n[语音: {attachment.filename}]" raw_message += f"\n[语音: {attachment.filename}]"
self.logger.debug(f"[DiscordToOneBotConverter] 识别为语音文件: {attachment.filename}") logger.debug(f"[DiscordToOneBotConverter] 识别为语音文件: {attachment.filename}")
elif filename.endswith(('.mp4', '.avi', '.mkv', '.mov', '.flv', '.wmv')): elif filename.endswith(('.mp4', '.avi', '.mkv', '.mov', '.flv', '.wmv')):
seg = OneBotMessageSegment.video(attachment.url) seg = OneBotMessageSegment.video(attachment.url)
seg.data["filename"] = attachment.filename seg.data["filename"] = attachment.filename
message_list.append(seg) message_list.append(seg)
raw_message += f"\n[视频: {attachment.filename}]" raw_message += f"\n[视频: {attachment.filename}]"
self.logger.debug(f"[DiscordToOneBotConverter] 识别为视频文件: {attachment.filename}") logger.debug(f"[DiscordToOneBotConverter] 识别为视频文件: {attachment.filename}")
elif filename.endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): elif filename.endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):
image_type = "gif" if filename.endswith('.gif') else None image_type = "gif" if filename.endswith('.gif') else None
seg = OneBotMessageSegment.image(attachment.url, image_type=image_type) seg = OneBotMessageSegment.image(attachment.url, image_type=image_type)
seg.data["filename"] = attachment.filename seg.data["filename"] = attachment.filename
message_list.append(seg) message_list.append(seg)
raw_message += f"\n[图片: {attachment.filename}]" raw_message += f"\n[图片: {attachment.filename}]"
self.logger.debug(f"[DiscordToOneBotConverter] 识别为图片文件: {attachment.filename}") logger.debug(f"[DiscordToOneBotConverter] 识别为图片文件: {attachment.filename}")
else: else:
seg = OneBotMessageSegment.file(attachment.url) seg = OneBotMessageSegment.file(attachment.url)
seg.data["filename"] = attachment.filename seg.data["filename"] = attachment.filename
message_list.append(seg) message_list.append(seg)
raw_message += f"\n[文件: {attachment.filename}]" raw_message += f"\n[文件: {attachment.filename}]"
self.logger.success(f"[DiscordToOneBotConverter] 识别为普通文件: {attachment.filename}") logger.success(f"[DiscordToOneBotConverter] 识别为普通文件: {attachment.filename}")
# 添加贴纸 (Stickers) 信息 # 添加贴纸 (Stickers) 信息
if hasattr(discord_message, 'stickers') and discord_message.stickers: if hasattr(discord_message, 'stickers') and discord_message.stickers:

View File

@@ -171,10 +171,12 @@ async def handle_discord_message_event(event: Any):
logger.debug(f"[CrossPlatform] 开始处理 Discord 事件消息: channel_id={discord_channel_id}") logger.debug(f"[CrossPlatform] 开始处理 Discord 事件消息: channel_id={discord_channel_id}")
if hasattr(event, 'message') and isinstance(event.message, list): if hasattr(event, 'message') and isinstance(event.message, list):
has_text_content = False
for segment in event.message: for segment in event.message:
if isinstance(segment, MessageSegment): if isinstance(segment, MessageSegment):
if segment.type == "text": if segment.type == "text":
content += segment.data.get("text", "") content += segment.data.get("text", "")
has_text_content = True
elif segment.type == "image": elif segment.type == "image":
file_url = segment.data.get("url") or segment.data.get("file") file_url = segment.data.get("url") or segment.data.get("file")
file_name = segment.data.get("filename") file_name = segment.data.get("filename")
@@ -183,6 +185,7 @@ async def handle_discord_message_event(event: Any):
attachment_item = {"type": "image", "url": str(file_url), "filename": file_name} attachment_item = {"type": "image", "url": str(file_url), "filename": file_name}
if attachment_item not in attachments: if attachment_item not in attachments:
attachments.append(attachment_item) attachments.append(attachment_item)
content += f"\n[图片: {file_name}]\n"
elif segment.type == "video": elif segment.type == "video":
file_url = segment.data.get("url") or segment.data.get("file") file_url = segment.data.get("url") or segment.data.get("file")
file_name = segment.data.get("filename") file_name = segment.data.get("filename")
@@ -191,6 +194,7 @@ async def handle_discord_message_event(event: Any):
attachment_item = {"type": "video", "url": str(file_url), "filename": file_name} attachment_item = {"type": "video", "url": str(file_url), "filename": file_name}
if attachment_item not in attachments: if attachment_item not in attachments:
attachments.append(attachment_item) attachments.append(attachment_item)
content += f"\n[视频: {file_name}]\n"
elif segment.type == "record": elif segment.type == "record":
file_url = segment.data.get("url") or segment.data.get("file") file_url = segment.data.get("url") or segment.data.get("file")
file_name = segment.data.get("filename") file_name = segment.data.get("filename")
@@ -199,6 +203,7 @@ async def handle_discord_message_event(event: Any):
attachment_item = {"type": "record", "url": str(file_url), "filename": file_name} attachment_item = {"type": "record", "url": str(file_url), "filename": file_name}
if attachment_item not in attachments: if attachment_item not in attachments:
attachments.append(attachment_item) attachments.append(attachment_item)
content += f"\n[语音: {file_name}]\n"
elif segment.type == "file": elif segment.type == "file":
file_url = segment.data.get("url") or segment.data.get("file") file_url = segment.data.get("url") or segment.data.get("file")
file_name = segment.data.get("filename") file_name = segment.data.get("filename")
@@ -207,12 +212,17 @@ async def handle_discord_message_event(event: Any):
attachment_item = {"type": "file", "url": str(file_url), "filename": file_name} attachment_item = {"type": "file", "url": str(file_url), "filename": file_name}
if attachment_item not in attachments: if attachment_item not in attachments:
attachments.append(attachment_item) attachments.append(attachment_item)
content += f"\n[文件: {file_name}]\n"
logger.debug(f"[CrossPlatform] Discord 消息识别到文件: {file_name}, URL: {file_url}") logger.debug(f"[CrossPlatform] Discord 消息识别到文件: {file_name}, URL: {file_url}")
else: else:
content = event.raw_message or "" content = event.raw_message or ""
content = content.strip() content = content.strip()
# 如果 content 为空但有附件(如只有表情),使用 raw_message 作为 content
if not content and attachments:
content = event.raw_message or ""
logger.debug(f"[CrossPlatform] Discord 消息内容: '{content}', 附件数量: {len(attachments)}") logger.debug(f"[CrossPlatform] Discord 消息内容: '{content}', 附件数量: {len(attachments)}")
discord_username = getattr(event, 'discord_username', 'Unknown') discord_username = getattr(event, 'discord_username', 'Unknown')

View File

@@ -4,10 +4,26 @@
""" """
import os import os
import json import json
import re
from typing import Dict, List, Any from typing import Dict, List, Any
from models.message import MessageSegment from models.message import MessageSegment
from core.utils.logger import logger
from .config import config from .config import config
def extract_text_only(content: str) -> str:
"""从消息内容中提取纯文本,过滤掉非文本标记"""
if not content:
return ""
# 移除所有 [图片: xxx]、[视频: xxx]、[语音: xxx]、[文件: xxx] 等标记
text_only = re.sub(r'\s*\[(图片|视频|语音|文件):[^\]]+\]\s*', ' ', content)
# 移除连续空格
text_only = re.sub(r'\s+', ' ', text_only).strip()
return text_only
async def parse_forward_nodes(nodes: List[Dict[str, Any]]) -> tuple[str, List[dict]]: async def parse_forward_nodes(nodes: List[Dict[str, Any]]) -> tuple[str, List[dict]]:
"""解析 OneBot 合并转发消息节点""" """解析 OneBot 合并转发消息节点"""
content_parts = [] content_parts = []

View File

@@ -8,7 +8,7 @@ from core.utils.logger import logger
from core.managers.redis_manager import redis_manager from core.managers.redis_manager import redis_manager
from .config import config from .config import config
from .translator import translate_with_deepseek from .translator import translate_with_deepseek
from .parser import format_discord_to_qq_content, format_qq_to_discord_content from .parser import format_discord_to_qq_content, format_qq_to_discord_content, extract_text_only
async def send_to_discord(channel_id: int, content: str, attachments: List[dict] = None, embed: dict = None): async def send_to_discord(channel_id: int, content: str, attachments: List[dict] = None, embed: dict = None):
"""发送消息到 Discord 频道""" """发送消息到 Discord 频道"""
@@ -117,9 +117,13 @@ async def forward_discord_to_qq(
logger.debug(f"[CrossPlatform] 格式化后的内容: '{formatted_content}', 图片列表: {image_list}") logger.debug(f"[CrossPlatform] 格式化后的内容: '{formatted_content}', 图片列表: {image_list}")
if formatted_content: if formatted_content:
translated_content = await translate_with_deepseek(formatted_content, "zh-CN", channel_id, "en2zh") # 只提取文本进行翻译,过滤掉非文本内容
if translated_content != formatted_content: text_only = extract_text_only(formatted_content)
formatted_content = f"{formatted_content}\n\n━━━━━ 翻译 ━━━━━\n{translated_content}" if text_only:
translated_content = await translate_with_deepseek(text_only, "zh-CN", channel_id, "en2zh")
if translated_content != text_only:
# 将翻译后的文本替换回原文本位置
formatted_content = formatted_content.replace(text_only, translated_content)
await send_to_qq(target_qq_group, formatted_content, image_list) await send_to_qq(target_qq_group, formatted_content, image_list)
logger.success(f"[CrossPlatform] Discord 频道 {channel_id} -> QQ 群 {target_qq_group}") logger.success(f"[CrossPlatform] Discord 频道 {channel_id} -> QQ 群 {target_qq_group}")
@@ -154,9 +158,12 @@ async def forward_qq_to_discord(
if embed and embed.get("description"): if embed and embed.get("description"):
original_text = embed["description"] original_text = embed["description"]
translated_text = await translate_with_deepseek(original_text, "en", group_id, "zh2en") # 只提取文本进行翻译
if translated_text != original_text: text_only = extract_text_only(original_text)
embed["description"] = f"{original_text}\n\n**Translation:**\n{translated_text}" if text_only:
translated_text = await translate_with_deepseek(text_only, "en", group_id, "zh2en")
if translated_text != text_only:
embed["description"] = embed["description"].replace(text_only, translated_text)
for channel_id in target_channels: for channel_id in target_channels:
await send_to_discord(channel_id, formatted_content, image_list, embed) await send_to_discord(channel_id, formatted_content, image_list, embed)