# -*- coding: utf-8 -*- """ 跨平台消息互通插件翻译模块 """ import time import uuid from typing import Dict, List from neobot.core.utils.logger import ModuleLogger from neobot.core.managers.vectordb_manager import vectordb_manager from .config import config # 创建模块专用日志记录器 logger = ModuleLogger("CrossPlatformTranslator") # 翻译上下文缓存(每个通道15条消息) TRANSLATION_CONTEXT_CACHE: Dict[str, List[Dict[str, str]]] = {} MAX_CONTEXT_MESSAGES = 15 def get_translation_context(channel_id: int, direction: str) -> List[Dict[str, str]]: """获取翻译上下文缓存""" cache_key = f"{channel_id}_{direction}" return TRANSLATION_CONTEXT_CACHE.get(cache_key, []) def add_translation_context(channel_id: int, direction: str, original: str, translated: str): """添加翻译到上下文缓存和向量数据库""" cache_key = f"{channel_id}_{direction}" if cache_key not in TRANSLATION_CONTEXT_CACHE: TRANSLATION_CONTEXT_CACHE[cache_key] = [] TRANSLATION_CONTEXT_CACHE[cache_key].append({ "original": original, "translated": translated }) if len(TRANSLATION_CONTEXT_CACHE[cache_key]) > MAX_CONTEXT_MESSAGES: TRANSLATION_CONTEXT_CACHE[cache_key] = TRANSLATION_CONTEXT_CACHE[cache_key][-MAX_CONTEXT_MESSAGES:] # 将翻译记录保存到向量数据库 try: collection_name = f"translation_memory_{channel_id}" doc_id = str(uuid.uuid4()) # 将原文和译文组合作为向量化文本 text_to_embed = f"原文: {original}\n译文: {translated}" metadata = { "channel_id": channel_id, "direction": direction, "original": original, "translated": translated, "timestamp": int(time.time()) } vectordb_manager.add_texts( collection_name=collection_name, texts=[text_to_embed], metadatas=[metadata], ids=[doc_id] ) logger.debug(f"[CrossPlatform] 翻译记录已保存到向量数据库: {collection_name}") except Exception as e: logger.error(f"[CrossPlatform] 保存翻译记录到向量数据库失败: {e}") def get_similar_translations(channel_id: int, text: str, direction: str, limit: int = 3) -> str: """从向量数据库检索相似的翻译记录""" try: collection_name = f"translation_memory_{channel_id}" # 检索相似文本 results = vectordb_manager.query_texts( collection_name=collection_name, query_texts=[text], n_results=limit, where={"direction": direction} ) if not results or not results.get("documents") or not results["documents"][0]: return "" context_ref = "\n\n参考历史相似翻译(向量检索):\n" for i, metadata in enumerate(results["metadatas"][0], 1): original = metadata.get("original", "") translated = metadata.get("translated", "") context_ref += f"{i}. 原文: {original[:100]}\n 译文: {translated[:100]}\n" return context_ref except Exception as e: logger.error(f"[CrossPlatform] 从向量数据库检索翻译记录失败: {e}") return "" async def translate_with_deepseek( text: str, target_lang: str = "zh-CN", channel_id: int = 0, direction: str = "en2zh" ) -> str: """使用 DeepSeek API 翻译文本""" if not config.ENABLE_TRANSLATION or not text.strip(): return text if config.DEEPSEEK_API_KEY == "your-deepseek-api-key-here": logger.warning("[CrossPlatform] DeepSeek API 密钥未配置,跳过翻译") return text lang_name = "中文" if target_lang == "zh-CN" else "英文" messages = [] context_ref = "" if channel_id > 0: # 1. 获取最近的上下文缓存 context = get_translation_context(channel_id, direction) if context: context_ref = "\n\n参考最近的翻译:\n" for i, ctx in enumerate(context[-5:], 1): context_ref += f"{i}. 原文: {ctx['original'][:100]}\n 译文: {ctx['translated'][:100]}\n" # 2. 从向量数据库检索相似的历史翻译 similar_context = get_similar_translations(channel_id, text, direction) if similar_context: context_ref += similar_context system_prompt = f"""你是一个专业的翻译助手。请将以下文本翻译成{lang_name}。 只返回翻译后的文本,不要添加任何解释、注释或其他内容。避免翻译出仇视言论以及违反中国大陆相关法律法规的内容。如果有,请在翻译后有敏感的词语中把文本替换成井号(#) 保持原文的语气和格式。如果文本已经是目标语言,直接返回原文。{context_ref}""" messages.append({"role": "user", "content": text}) try: from openai import AsyncOpenAI client = AsyncOpenAI( api_key=config.DEEPSEEK_API_KEY, base_url=config.DEEPSEEK_API_URL.replace("/chat/completions", "") ) response = await client.chat.completions.create( model=config.DEEPSEEK_MODEL, messages=[{"role": "system", "content": system_prompt}] + messages, temperature=0.3, max_tokens=4000 ) translated_text = response.choices[0].message.content if translated_text: translated_text = translated_text.strip() logger.info(f"[CrossPlatform] 翻译成功: {text[:50]}... -> {translated_text[:50]}...") if channel_id > 0: add_translation_context(channel_id, direction, text, translated_text) return translated_text else: logger.warning("[CrossPlatform] DeepSeek 返回空翻译结果") return text except ImportError: logger.warning("[CrossPlatform] openai 库未安装,尝试使用同步请求") return await translate_with_deepseek_sync(text, target_lang, channel_id, direction) except Exception as e: logger.error(f"[CrossPlatform] 翻译失败: {e}") return text async def translate_with_deepseek_sync( text: str, target_lang: str = "zh-CN", channel_id: int = 0, direction: str = "en2zh" ) -> str: """使用同步请求的 DeepSeek 翻译(备用方案)""" if not config.ENABLE_TRANSLATION or not text.strip(): return text if config.DEEPSEEK_API_KEY == "your-deepseek-api-key-here": return text lang_name = "中文" if target_lang == "zh-CN" else "英文" context_ref = "" if channel_id > 0: # 1. 获取最近的上下文缓存 context = get_translation_context(channel_id, direction) if context: context_ref = "\n\n参考最近的翻译:\n" for i, ctx in enumerate(context[-5:], 1): context_ref += f"{i}. 原文: {ctx['original'][:100]}\n 译文: {ctx['translated'][:100]}\n" # 2. 从向量数据库检索相似的历史翻译 similar_context = get_similar_translations(channel_id, text, direction) if similar_context: context_ref += similar_context system_prompt = f"""你是一个专业的翻译助手。请将以下文本翻译成{lang_name}。 只返回翻译后的文本,不要添加任何解释、注释或其他内容。避免翻译出仇视言论以及违反中国大陆相关法律法规的内容。如果有,请在翻译后有敏感的词语中把文本替换成井号(#) 保持原文的语气和格式。如果文本已经是目标语言,直接返回原文。{context_ref}""" messages = [{"role": "user", "content": text}] try: from openai import OpenAI client = OpenAI( api_key=config.DEEPSEEK_API_KEY, base_url=config.DEEPSEEK_API_URL.replace("/chat/completions", "") ) response = client.chat.completions.create( model=config.DEEPSEEK_MODEL, messages=[{"role": "system", "content": system_prompt}] + messages, temperature=0.3, max_tokens=4000 ) translated_text = response.choices[0].message.content if translated_text: translated_text = translated_text.strip() if channel_id > 0: add_translation_context(channel_id, direction, text, translated_text) return translated_text return text except Exception as e: logger.error(f"[CrossPlatform] 同步翻译失败: {e}") return text