feat(vectordb): 添加向量数据库支持及集成功能
新增向量数据库管理器模块,支持文本的存储、检索和相似度查询 添加知识库插件和AI聊天插件,利用向量数据库实现记忆功能 优化跨平台翻译模块,集成向量数据库存储历史翻译记录 改进消息处理逻辑,优先使用用户显示名称
This commit is contained in:
134
core/managers/vectordb_manager.py
Normal file
134
core/managers/vectordb_manager.py
Normal file
@@ -0,0 +1,134 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
向量数据库管理器模块
|
||||
|
||||
该模块提供了一个基于 ChromaDB 的向量数据库管理器,
|
||||
用于存储和检索文本向量,为大语言模型提供记忆能力。
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from typing import List, Dict, Any, Optional
|
||||
import chromadb
|
||||
from chromadb.config import Settings
|
||||
from core.utils.logger import ModuleLogger
|
||||
from core.utils.singleton import Singleton
|
||||
|
||||
logger = ModuleLogger("VectorDBManager")
|
||||
|
||||
class VectorDBManager(Singleton):
|
||||
"""
|
||||
向量数据库管理器(单例)
|
||||
"""
|
||||
_client = None
|
||||
_collections = {}
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "vectordb")
|
||||
os.makedirs(self.db_path, exist_ok=True)
|
||||
|
||||
def initialize(self):
|
||||
"""初始化 ChromaDB 客户端"""
|
||||
if self._client is None:
|
||||
try:
|
||||
logger.info(f"正在初始化向量数据库,路径: {self.db_path}")
|
||||
self._client = chromadb.PersistentClient(
|
||||
path=self.db_path,
|
||||
settings=Settings(
|
||||
anonymized_telemetry=False,
|
||||
allow_reset=True
|
||||
)
|
||||
)
|
||||
logger.success("向量数据库初始化成功!")
|
||||
except Exception as e:
|
||||
logger.error(f"向量数据库初始化失败: {e}")
|
||||
self._client = None
|
||||
|
||||
def get_collection(self, name: str):
|
||||
"""获取或创建集合"""
|
||||
if self._client is None:
|
||||
self.initialize()
|
||||
|
||||
if self._client is None:
|
||||
return None
|
||||
|
||||
if name not in self._collections:
|
||||
try:
|
||||
# 使用默认的 sentence-transformers 嵌入模型
|
||||
self._collections[name] = self._client.get_or_create_collection(name=name)
|
||||
logger.debug(f"已获取/创建向量集合: {name}")
|
||||
except Exception as e:
|
||||
logger.error(f"获取向量集合 {name} 失败: {e}")
|
||||
return None
|
||||
|
||||
return self._collections[name]
|
||||
|
||||
def add_texts(self, collection_name: str, texts: List[str], metadatas: List[Dict[str, Any]], ids: List[str]) -> bool:
|
||||
"""
|
||||
向集合中添加文本
|
||||
|
||||
Args:
|
||||
collection_name: 集合名称
|
||||
texts: 文本列表
|
||||
metadatas: 元数据列表(用于过滤和存储额外信息)
|
||||
ids: 唯一ID列表
|
||||
"""
|
||||
collection = self.get_collection(collection_name)
|
||||
if collection is None:
|
||||
return False
|
||||
|
||||
try:
|
||||
collection.add(
|
||||
documents=texts,
|
||||
metadatas=metadatas,
|
||||
ids=ids
|
||||
)
|
||||
logger.debug(f"成功向集合 {collection_name} 添加 {len(texts)} 条记录")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"向集合 {collection_name} 添加记录失败: {e}")
|
||||
return False
|
||||
|
||||
def query_texts(self, collection_name: str, query_texts: List[str], n_results: int = 5, where: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
查询相似文本
|
||||
|
||||
Args:
|
||||
collection_name: 集合名称
|
||||
query_texts: 查询文本列表
|
||||
n_results: 返回结果数量
|
||||
where: 过滤条件
|
||||
"""
|
||||
collection = self.get_collection(collection_name)
|
||||
if collection is None:
|
||||
return {"documents": [], "metadatas": [], "distances": []}
|
||||
|
||||
try:
|
||||
results = collection.query(
|
||||
query_texts=query_texts,
|
||||
n_results=n_results,
|
||||
where=where
|
||||
)
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"查询集合 {collection_name} 失败: {e}")
|
||||
return {"documents": [], "metadatas": [], "distances": []}
|
||||
|
||||
def delete_texts(self, collection_name: str, ids: Optional[List[str]] = None, where: Optional[Dict[str, Any]] = None) -> bool:
|
||||
"""
|
||||
删除文本
|
||||
"""
|
||||
collection = self.get_collection(collection_name)
|
||||
if collection is None:
|
||||
return False
|
||||
|
||||
try:
|
||||
collection.delete(ids=ids, where=where)
|
||||
logger.debug(f"成功从集合 {collection_name} 删除记录")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"从集合 {collection_name} 删除记录失败: {e}")
|
||||
return False
|
||||
|
||||
# 全局向量数据库管理器实例
|
||||
vectordb_manager = VectorDBManager()
|
||||
Reference in New Issue
Block a user