NeoBot/core/managers/vectordb_manager.py

# -*- coding: utf-8 -*-
"""
向量数据库管理器模块

该模块提供了一个基于 ChromaDB 的向量数据库管理器，
用于存储和检索文本向量，为大语言模型提供记忆能力。
"""
import os
import json
from typing import List, Dict, Any, Optional
import chromadb
from chromadb.config import Settings
from core.utils.logger import ModuleLogger
from core.utils.singleton import Singleton

logger = ModuleLogger("VectorDBManager")

class VectorDBManager(Singleton):
    """
    向量数据库管理器（单例）
    """
    _client = None
    _collections = {}

    def __init__(self):
        super().__init__()
        self.db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "vectordb")
        os.makedirs(self.db_path, exist_ok=True)

    def initialize(self):
        """初始化 ChromaDB 客户端"""
        if self._client is None:
            try:
                logger.info(f"正在初始化向量数据库，路径: {self.db_path}")
                self._client = chromadb.PersistentClient(
                    path=self.db_path,
                    settings=Settings(
                        anonymized_telemetry=False,
                        allow_reset=True
                    )
                )
                logger.success("向量数据库初始化成功！")
            except Exception as e:
                logger.error(f"向量数据库初始化失败: {e}")
                self._client = None

    def get_collection(self, name: str):
        """获取或创建集合"""
        if self._client is None:
            self.initialize()

        if self._client is None:
            return None

        if name not in self._collections:
            try:
                # 使用默认的 sentence-transformers 嵌入模型
                self._collections[name] = self._client.get_or_create_collection(name=name)
                logger.debug(f"已获取/创建向量集合: {name}")
            except Exception as e:
                logger.error(f"获取向量集合 {name} 失败: {e}")
                return None

        return self._collections[name]

    def add_texts(self, collection_name: str, texts: List[str], metadatas: List[Dict[str, Any]], ids: List[str]) -> bool:
        """
        向集合中添加文本

        Args:
            collection_name: 集合名称
            texts: 文本列表
            metadatas: 元数据列表（用于过滤和存储额外信息）
            ids: 唯一ID列表
        """
        collection = self.get_collection(collection_name)
        if collection is None:
            return False

        try:
            logger.info(f"正在将 {len(texts)} 条记忆存入向量集合 {collection_name}...")
            collection.add(
                documents=texts,
                metadatas=metadatas,
                ids=ids
            )
            logger.success(f"成功将记忆存入集合 {collection_name}")
            return True
        except Exception as e:
            logger.error(f"向集合 {collection_name} 添加记录失败: {e}")
            return False

    def query_texts(self, collection_name: str, query_texts: List[str], n_results: int = 5, where: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        """
        查询相似文本

        Args:
            collection_name: 集合名称
            query_texts: 查询文本列表
            n_results: 返回结果数量
            where: 过滤条件
        """
        collection = self.get_collection(collection_name)
        if collection is None:
            return {"documents": [], "metadatas": [], "distances": []}

        try:
            logger.info(f"正在从向量集合 {collection_name} 中检索相关记忆...")
            results = collection.query(
                query_texts=query_texts,
                n_results=n_results,
                where=where
            )

            # 统计检索到的结果数量
            doc_count = 0
            if results and results.get("documents") and results["documents"][0]:
                doc_count = len(results["documents"][0])

            if doc_count > 0:
                logger.success(f"成功从集合 {collection_name} 检索到 {doc_count} 条相关记忆")
            else:
                logger.info(f"集合 {collection_name} 中未检索到相关记忆")

            return results
        except Exception as e:
            logger.error(f"查询集合 {collection_name} 失败: {e}")
            return {"documents": [], "metadatas": [], "distances": []}

    def delete_texts(self, collection_name: str, ids: Optional[List[str]] = None, where: Optional[Dict[str, Any]] = None) -> bool:
        """
        删除文本
        """
        collection = self.get_collection(collection_name)
        if collection is None:
            return False

        try:
            collection.delete(ids=ids, where=where)
            logger.debug(f"成功从集合 {collection_name} 删除记录")
            return True
        except Exception as e:
            logger.error(f"从集合 {collection_name} 删除记录失败: {e}")
            return False

# 全局向量数据库管理器实例
vectordb_manager = VectorDBManager()