# -*- coding: utf-8 -*- """ 向量数据库管理器模块 该模块提供了一个基于 ChromaDB 的向量数据库管理器, 用于存储和检索文本向量,为大语言模型提供记忆能力。 """ import os import json from typing import List, Dict, Any, Optional import chromadb from chromadb.config import Settings from core.utils.logger import ModuleLogger from core.utils.singleton import Singleton logger = ModuleLogger("VectorDBManager") class VectorDBManager(Singleton): """ 向量数据库管理器(单例) """ _client = None _collections = {} def __init__(self): super().__init__() self.db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "vectordb") os.makedirs(self.db_path, exist_ok=True) def initialize(self): """初始化 ChromaDB 客户端""" if self._client is None: try: logger.info(f"正在初始化向量数据库,路径: {self.db_path}") self._client = chromadb.PersistentClient( path=self.db_path, settings=Settings( anonymized_telemetry=False, allow_reset=True ) ) logger.success("向量数据库初始化成功!") except Exception as e: logger.error(f"向量数据库初始化失败: {e}") self._client = None def get_collection(self, name: str): """获取或创建集合""" if self._client is None: self.initialize() if self._client is None: return None if name not in self._collections: try: # 使用默认的 sentence-transformers 嵌入模型 self._collections[name] = self._client.get_or_create_collection(name=name) logger.debug(f"已获取/创建向量集合: {name}") except Exception as e: logger.error(f"获取向量集合 {name} 失败: {e}") return None return self._collections[name] def add_texts(self, collection_name: str, texts: List[str], metadatas: List[Dict[str, Any]], ids: List[str]) -> bool: """ 向集合中添加文本 Args: collection_name: 集合名称 texts: 文本列表 metadatas: 元数据列表(用于过滤和存储额外信息) ids: 唯一ID列表 """ collection = self.get_collection(collection_name) if collection is None: return False try: collection.add( documents=texts, metadatas=metadatas, ids=ids ) logger.debug(f"成功向集合 {collection_name} 添加 {len(texts)} 条记录") return True except Exception as e: logger.error(f"向集合 {collection_name} 添加记录失败: {e}") return False def query_texts(self, collection_name: str, query_texts: List[str], n_results: int = 5, where: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: """ 查询相似文本 Args: collection_name: 集合名称 query_texts: 查询文本列表 n_results: 返回结果数量 where: 过滤条件 """ collection = self.get_collection(collection_name) if collection is None: return {"documents": [], "metadatas": [], "distances": []} try: results = collection.query( query_texts=query_texts, n_results=n_results, where=where ) return results except Exception as e: logger.error(f"查询集合 {collection_name} 失败: {e}") return {"documents": [], "metadatas": [], "distances": []} def delete_texts(self, collection_name: str, ids: Optional[List[str]] = None, where: Optional[Dict[str, Any]] = None) -> bool: """ 删除文本 """ collection = self.get_collection(collection_name) if collection is None: return False try: collection.delete(ids=ids, where=where) logger.debug(f"成功从集合 {collection_name} 删除记录") return True except Exception as e: logger.error(f"从集合 {collection_name} 删除记录失败: {e}") return False # 全局向量数据库管理器实例 vectordb_manager = VectorDBManager()