Files
NeoBot/core/managers/vectordb_manager.py
K2Cr2O1 fbeceb4dc9 refactor(插件): 优化插件元信息和命令配置
- 为 AI 聊天和知识库插件添加元信息配置
- 简化插件命令配置,移除冗余别名
- 更新 Discord 适配器的 Redis 频道名称
- 增强向量数据库管理器的日志信息
2026-03-24 14:57:10 +08:00

148 lines
5.3 KiB
Python

# -*- coding: utf-8 -*-
"""
向量数据库管理器模块
该模块提供了一个基于 ChromaDB 的向量数据库管理器,
用于存储和检索文本向量,为大语言模型提供记忆能力。
"""
import os
import json
from typing import List, Dict, Any, Optional
import chromadb
from chromadb.config import Settings
from core.utils.logger import ModuleLogger
from core.utils.singleton import Singleton
logger = ModuleLogger("VectorDBManager")
class VectorDBManager(Singleton):
"""
向量数据库管理器(单例)
"""
_client = None
_collections = {}
def __init__(self):
super().__init__()
self.db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "vectordb")
os.makedirs(self.db_path, exist_ok=True)
def initialize(self):
"""初始化 ChromaDB 客户端"""
if self._client is None:
try:
logger.info(f"正在初始化向量数据库,路径: {self.db_path}")
self._client = chromadb.PersistentClient(
path=self.db_path,
settings=Settings(
anonymized_telemetry=False,
allow_reset=True
)
)
logger.success("向量数据库初始化成功!")
except Exception as e:
logger.error(f"向量数据库初始化失败: {e}")
self._client = None
def get_collection(self, name: str):
"""获取或创建集合"""
if self._client is None:
self.initialize()
if self._client is None:
return None
if name not in self._collections:
try:
# 使用默认的 sentence-transformers 嵌入模型
self._collections[name] = self._client.get_or_create_collection(name=name)
logger.debug(f"已获取/创建向量集合: {name}")
except Exception as e:
logger.error(f"获取向量集合 {name} 失败: {e}")
return None
return self._collections[name]
def add_texts(self, collection_name: str, texts: List[str], metadatas: List[Dict[str, Any]], ids: List[str]) -> bool:
"""
向集合中添加文本
Args:
collection_name: 集合名称
texts: 文本列表
metadatas: 元数据列表(用于过滤和存储额外信息)
ids: 唯一ID列表
"""
collection = self.get_collection(collection_name)
if collection is None:
return False
try:
logger.info(f"正在将 {len(texts)} 条记忆存入向量集合 {collection_name}...")
collection.add(
documents=texts,
metadatas=metadatas,
ids=ids
)
logger.success(f"成功将记忆存入集合 {collection_name}")
return True
except Exception as e:
logger.error(f"向集合 {collection_name} 添加记录失败: {e}")
return False
def query_texts(self, collection_name: str, query_texts: List[str], n_results: int = 5, where: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
查询相似文本
Args:
collection_name: 集合名称
query_texts: 查询文本列表
n_results: 返回结果数量
where: 过滤条件
"""
collection = self.get_collection(collection_name)
if collection is None:
return {"documents": [], "metadatas": [], "distances": []}
try:
logger.info(f"正在从向量集合 {collection_name} 中检索相关记忆...")
results = collection.query(
query_texts=query_texts,
n_results=n_results,
where=where
)
# 统计检索到的结果数量
doc_count = 0
if results and results.get("documents") and results["documents"][0]:
doc_count = len(results["documents"][0])
if doc_count > 0:
logger.success(f"成功从集合 {collection_name} 检索到 {doc_count} 条相关记忆")
else:
logger.info(f"集合 {collection_name} 中未检索到相关记忆")
return results
except Exception as e:
logger.error(f"查询集合 {collection_name} 失败: {e}")
return {"documents": [], "metadatas": [], "distances": []}
def delete_texts(self, collection_name: str, ids: Optional[List[str]] = None, where: Optional[Dict[str, Any]] = None) -> bool:
"""
删除文本
"""
collection = self.get_collection(collection_name)
if collection is None:
return False
try:
collection.delete(ids=ids, where=where)
logger.debug(f"成功从集合 {collection_name} 删除记录")
return True
except Exception as e:
logger.error(f"从集合 {collection_name} 删除记录失败: {e}")
return False
# 全局向量数据库管理器实例
vectordb_manager = VectorDBManager()