Files
NeoBot/plugins/discord-cross/translator.py
K2Cr2O1 d6623e2cc8 feat(vectordb): 添加向量数据库支持及集成功能
新增向量数据库管理器模块,支持文本的存储、检索和相似度查询
添加知识库插件和AI聊天插件,利用向量数据库实现记忆功能
优化跨平台翻译模块,集成向量数据库存储历史翻译记录
改进消息处理逻辑,优先使用用户显示名称
2026-03-24 14:32:36 +08:00

224 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
跨平台消息互通插件翻译模块
"""
import time
import uuid
from typing import Dict, List
from core.utils.logger import ModuleLogger
from core.managers.vectordb_manager import vectordb_manager
from .config import config
# 创建模块专用日志记录器
logger = ModuleLogger("CrossPlatformTranslator")
# 翻译上下文缓存每个通道15条消息
TRANSLATION_CONTEXT_CACHE: Dict[str, List[Dict[str, str]]] = {}
MAX_CONTEXT_MESSAGES = 15
def get_translation_context(channel_id: int, direction: str) -> List[Dict[str, str]]:
"""获取翻译上下文缓存"""
cache_key = f"{channel_id}_{direction}"
return TRANSLATION_CONTEXT_CACHE.get(cache_key, [])
def add_translation_context(channel_id: int, direction: str, original: str, translated: str):
"""添加翻译到上下文缓存和向量数据库"""
cache_key = f"{channel_id}_{direction}"
if cache_key not in TRANSLATION_CONTEXT_CACHE:
TRANSLATION_CONTEXT_CACHE[cache_key] = []
TRANSLATION_CONTEXT_CACHE[cache_key].append({
"original": original,
"translated": translated
})
if len(TRANSLATION_CONTEXT_CACHE[cache_key]) > MAX_CONTEXT_MESSAGES:
TRANSLATION_CONTEXT_CACHE[cache_key] = TRANSLATION_CONTEXT_CACHE[cache_key][-MAX_CONTEXT_MESSAGES:]
# 将翻译记录保存到向量数据库
try:
collection_name = f"translation_memory_{channel_id}"
doc_id = str(uuid.uuid4())
# 将原文和译文组合作为向量化文本
text_to_embed = f"原文: {original}\n译文: {translated}"
metadata = {
"channel_id": channel_id,
"direction": direction,
"original": original,
"translated": translated,
"timestamp": int(time.time())
}
vectordb_manager.add_texts(
collection_name=collection_name,
texts=[text_to_embed],
metadatas=[metadata],
ids=[doc_id]
)
logger.debug(f"[CrossPlatform] 翻译记录已保存到向量数据库: {collection_name}")
except Exception as e:
logger.error(f"[CrossPlatform] 保存翻译记录到向量数据库失败: {e}")
def get_similar_translations(channel_id: int, text: str, direction: str, limit: int = 3) -> str:
"""从向量数据库检索相似的翻译记录"""
try:
collection_name = f"translation_memory_{channel_id}"
# 检索相似文本
results = vectordb_manager.query_texts(
collection_name=collection_name,
query_texts=[text],
n_results=limit,
where={"direction": direction}
)
if not results or not results.get("documents") or not results["documents"][0]:
return ""
context_ref = "\n\n参考历史相似翻译(向量检索):\n"
for i, metadata in enumerate(results["metadatas"][0], 1):
original = metadata.get("original", "")
translated = metadata.get("translated", "")
context_ref += f"{i}. 原文: {original[:100]}\n 译文: {translated[:100]}\n"
return context_ref
except Exception as e:
logger.error(f"[CrossPlatform] 从向量数据库检索翻译记录失败: {e}")
return ""
async def translate_with_deepseek(
text: str,
target_lang: str = "zh-CN",
channel_id: int = 0,
direction: str = "en2zh"
) -> str:
"""使用 DeepSeek API 翻译文本"""
if not config.ENABLE_TRANSLATION or not text.strip():
return text
if config.DEEPSEEK_API_KEY == "your-deepseek-api-key-here":
logger.warning("[CrossPlatform] DeepSeek API 密钥未配置,跳过翻译")
return text
lang_name = "中文" if target_lang == "zh-CN" else "英文"
messages = []
context_ref = ""
if channel_id > 0:
# 1. 获取最近的上下文缓存
context = get_translation_context(channel_id, direction)
if context:
context_ref = "\n\n参考最近的翻译:\n"
for i, ctx in enumerate(context[-5:], 1):
context_ref += f"{i}. 原文: {ctx['original'][:100]}\n 译文: {ctx['translated'][:100]}\n"
# 2. 从向量数据库检索相似的历史翻译
similar_context = get_similar_translations(channel_id, text, direction)
if similar_context:
context_ref += similar_context
system_prompt = f"""你是一个专业的翻译助手。请将以下文本翻译成{lang_name}
只返回翻译后的文本,不要添加任何解释、注释或其他内容。避免翻译出仇视言论以及违反中国大陆相关法律法规的内容。如果有,请在翻译后有敏感的词语中把文本替换成井号(#
保持原文的语气和格式。如果文本已经是目标语言,直接返回原文。{context_ref}"""
messages.append({"role": "user", "content": text})
try:
from openai import AsyncOpenAI
client = AsyncOpenAI(
api_key=config.DEEPSEEK_API_KEY,
base_url=config.DEEPSEEK_API_URL.replace("/chat/completions", "")
)
response = await client.chat.completions.create(
model=config.DEEPSEEK_MODEL,
messages=[{"role": "system", "content": system_prompt}] + messages,
temperature=0.3,
max_tokens=4000
)
translated_text = response.choices[0].message.content
if translated_text:
translated_text = translated_text.strip()
logger.info(f"[CrossPlatform] 翻译成功: {text[:50]}... -> {translated_text[:50]}...")
if channel_id > 0:
add_translation_context(channel_id, direction, text, translated_text)
return translated_text
else:
logger.warning("[CrossPlatform] DeepSeek 返回空翻译结果")
return text
except ImportError:
logger.warning("[CrossPlatform] openai 库未安装,尝试使用同步请求")
return await translate_with_deepseek_sync(text, target_lang, channel_id, direction)
except Exception as e:
logger.error(f"[CrossPlatform] 翻译失败: {e}")
return text
async def translate_with_deepseek_sync(
text: str,
target_lang: str = "zh-CN",
channel_id: int = 0,
direction: str = "en2zh"
) -> str:
"""使用同步请求的 DeepSeek 翻译(备用方案)"""
if not config.ENABLE_TRANSLATION or not text.strip():
return text
if config.DEEPSEEK_API_KEY == "your-deepseek-api-key-here":
return text
lang_name = "中文" if target_lang == "zh-CN" else "英文"
context_ref = ""
if channel_id > 0:
# 1. 获取最近的上下文缓存
context = get_translation_context(channel_id, direction)
if context:
context_ref = "\n\n参考最近的翻译:\n"
for i, ctx in enumerate(context[-5:], 1):
context_ref += f"{i}. 原文: {ctx['original'][:100]}\n 译文: {ctx['translated'][:100]}\n"
# 2. 从向量数据库检索相似的历史翻译
similar_context = get_similar_translations(channel_id, text, direction)
if similar_context:
context_ref += similar_context
system_prompt = f"""你是一个专业的翻译助手。请将以下文本翻译成{lang_name}
只返回翻译后的文本,不要添加任何解释、注释或其他内容。避免翻译出仇视言论以及违反中国大陆相关法律法规的内容。如果有,请在翻译后有敏感的词语中把文本替换成井号(#
保持原文的语气和格式。如果文本已经是目标语言,直接返回原文。{context_ref}"""
messages = [{"role": "user", "content": text}]
try:
from openai import OpenAI
client = OpenAI(
api_key=config.DEEPSEEK_API_KEY,
base_url=config.DEEPSEEK_API_URL.replace("/chat/completions", "")
)
response = client.chat.completions.create(
model=config.DEEPSEEK_MODEL,
messages=[{"role": "system", "content": system_prompt}] + messages,
temperature=0.3,
max_tokens=4000
)
translated_text = response.choices[0].message.content
if translated_text:
translated_text = translated_text.strip()
if channel_id > 0:
add_translation_context(channel_id, direction, text, translated_text)
return translated_text
return text
except Exception as e:
logger.error(f"[CrossPlatform] 同步翻译失败: {e}")
return text