feat(vectordb): 添加向量数据库支持及集成功能
新增向量数据库管理器模块,支持文本的存储、检索和相似度查询 添加知识库插件和AI聊天插件,利用向量数据库实现记忆功能 优化跨平台翻译模块,集成向量数据库存储历史翻译记录 改进消息处理逻辑,优先使用用户显示名称
This commit is contained in:
@@ -148,7 +148,7 @@ async def handle_qq_group_message(event: GroupMessageEvent):
|
||||
group_name = f"群{group_id}"
|
||||
|
||||
await handle_qq_message(
|
||||
nickname=event.sender.nickname or event.sender.card or str(event.user_id),
|
||||
nickname=event.sender.card or event.sender.nickname or str(event.user_id),
|
||||
user_id=event.user_id,
|
||||
group_name=group_name,
|
||||
group_id=group_id,
|
||||
|
||||
@@ -2,8 +2,11 @@
|
||||
"""
|
||||
跨平台消息互通插件翻译模块
|
||||
"""
|
||||
import time
|
||||
import uuid
|
||||
from typing import Dict, List
|
||||
from core.utils.logger import ModuleLogger
|
||||
from core.managers.vectordb_manager import vectordb_manager
|
||||
from .config import config
|
||||
|
||||
# 创建模块专用日志记录器
|
||||
@@ -19,7 +22,7 @@ def get_translation_context(channel_id: int, direction: str) -> List[Dict[str, s
|
||||
return TRANSLATION_CONTEXT_CACHE.get(cache_key, [])
|
||||
|
||||
def add_translation_context(channel_id: int, direction: str, original: str, translated: str):
|
||||
"""添加翻译到上下文缓存"""
|
||||
"""添加翻译到上下文缓存和向量数据库"""
|
||||
cache_key = f"{channel_id}_{direction}"
|
||||
if cache_key not in TRANSLATION_CONTEXT_CACHE:
|
||||
TRANSLATION_CONTEXT_CACHE[cache_key] = []
|
||||
@@ -31,6 +34,59 @@ def add_translation_context(channel_id: int, direction: str, original: str, tran
|
||||
|
||||
if len(TRANSLATION_CONTEXT_CACHE[cache_key]) > MAX_CONTEXT_MESSAGES:
|
||||
TRANSLATION_CONTEXT_CACHE[cache_key] = TRANSLATION_CONTEXT_CACHE[cache_key][-MAX_CONTEXT_MESSAGES:]
|
||||
|
||||
# 将翻译记录保存到向量数据库
|
||||
try:
|
||||
collection_name = f"translation_memory_{channel_id}"
|
||||
doc_id = str(uuid.uuid4())
|
||||
|
||||
# 将原文和译文组合作为向量化文本
|
||||
text_to_embed = f"原文: {original}\n译文: {translated}"
|
||||
|
||||
metadata = {
|
||||
"channel_id": channel_id,
|
||||
"direction": direction,
|
||||
"original": original,
|
||||
"translated": translated,
|
||||
"timestamp": int(time.time())
|
||||
}
|
||||
|
||||
vectordb_manager.add_texts(
|
||||
collection_name=collection_name,
|
||||
texts=[text_to_embed],
|
||||
metadatas=[metadata],
|
||||
ids=[doc_id]
|
||||
)
|
||||
logger.debug(f"[CrossPlatform] 翻译记录已保存到向量数据库: {collection_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"[CrossPlatform] 保存翻译记录到向量数据库失败: {e}")
|
||||
|
||||
def get_similar_translations(channel_id: int, text: str, direction: str, limit: int = 3) -> str:
|
||||
"""从向量数据库检索相似的翻译记录"""
|
||||
try:
|
||||
collection_name = f"translation_memory_{channel_id}"
|
||||
|
||||
# 检索相似文本
|
||||
results = vectordb_manager.query_texts(
|
||||
collection_name=collection_name,
|
||||
query_texts=[text],
|
||||
n_results=limit,
|
||||
where={"direction": direction}
|
||||
)
|
||||
|
||||
if not results or not results.get("documents") or not results["documents"][0]:
|
||||
return ""
|
||||
|
||||
context_ref = "\n\n参考历史相似翻译(向量检索):\n"
|
||||
for i, metadata in enumerate(results["metadatas"][0], 1):
|
||||
original = metadata.get("original", "")
|
||||
translated = metadata.get("translated", "")
|
||||
context_ref += f"{i}. 原文: {original[:100]}\n 译文: {translated[:100]}\n"
|
||||
|
||||
return context_ref
|
||||
except Exception as e:
|
||||
logger.error(f"[CrossPlatform] 从向量数据库检索翻译记录失败: {e}")
|
||||
return ""
|
||||
|
||||
async def translate_with_deepseek(
|
||||
text: str,
|
||||
@@ -51,11 +107,17 @@ async def translate_with_deepseek(
|
||||
messages = []
|
||||
context_ref = ""
|
||||
if channel_id > 0:
|
||||
# 1. 获取最近的上下文缓存
|
||||
context = get_translation_context(channel_id, direction)
|
||||
if context:
|
||||
context_ref = "\n\n参考之前的翻译:\n"
|
||||
context_ref = "\n\n参考最近的翻译:\n"
|
||||
for i, ctx in enumerate(context[-5:], 1):
|
||||
context_ref += f"{i}. 原文: {ctx['original'][:100]}\n 译文: {ctx['translated'][:100]}\n"
|
||||
|
||||
# 2. 从向量数据库检索相似的历史翻译
|
||||
similar_context = get_similar_translations(channel_id, text, direction)
|
||||
if similar_context:
|
||||
context_ref += similar_context
|
||||
|
||||
system_prompt = f"""你是一个专业的翻译助手。请将以下文本翻译成{lang_name}。
|
||||
只返回翻译后的文本,不要添加任何解释、注释或其他内容。避免翻译出仇视言论以及违反中国大陆相关法律法规的内容。如果有,请在翻译后有敏感的词语中把文本替换成井号(#)
|
||||
@@ -115,11 +177,17 @@ async def translate_with_deepseek_sync(
|
||||
|
||||
context_ref = ""
|
||||
if channel_id > 0:
|
||||
# 1. 获取最近的上下文缓存
|
||||
context = get_translation_context(channel_id, direction)
|
||||
if context:
|
||||
context_ref = "\n\n参考之前的翻译:\n"
|
||||
context_ref = "\n\n参考最近的翻译:\n"
|
||||
for i, ctx in enumerate(context[-5:], 1):
|
||||
context_ref += f"{i}. 原文: {ctx['original'][:100]}\n 译文: {ctx['translated'][:100]}\n"
|
||||
|
||||
# 2. 从向量数据库检索相似的历史翻译
|
||||
similar_context = get_similar_translations(channel_id, text, direction)
|
||||
if similar_context:
|
||||
context_ref += similar_context
|
||||
|
||||
system_prompt = f"""你是一个专业的翻译助手。请将以下文本翻译成{lang_name}。
|
||||
只返回翻译后的文本,不要添加任何解释、注释或其他内容。避免翻译出仇视言论以及违反中国大陆相关法律法规的内容。如果有,请在翻译后有敏感的词语中把文本替换成井号(#)
|
||||
|
||||
Reference in New Issue
Block a user