- 为 AI 聊天和知识库插件添加元信息配置 - 简化插件命令配置,移除冗余别名 - 更新 Discord 适配器的 Redis 频道名称 - 增强向量数据库管理器的日志信息
148 lines
5.3 KiB
Python
148 lines
5.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
向量数据库管理器模块
|
|
|
|
该模块提供了一个基于 ChromaDB 的向量数据库管理器,
|
|
用于存储和检索文本向量,为大语言模型提供记忆能力。
|
|
"""
|
|
import os
|
|
import json
|
|
from typing import List, Dict, Any, Optional
|
|
import chromadb
|
|
from chromadb.config import Settings
|
|
from core.utils.logger import ModuleLogger
|
|
from core.utils.singleton import Singleton
|
|
|
|
logger = ModuleLogger("VectorDBManager")
|
|
|
|
class VectorDBManager(Singleton):
|
|
"""
|
|
向量数据库管理器(单例)
|
|
"""
|
|
_client = None
|
|
_collections = {}
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "vectordb")
|
|
os.makedirs(self.db_path, exist_ok=True)
|
|
|
|
def initialize(self):
|
|
"""初始化 ChromaDB 客户端"""
|
|
if self._client is None:
|
|
try:
|
|
logger.info(f"正在初始化向量数据库,路径: {self.db_path}")
|
|
self._client = chromadb.PersistentClient(
|
|
path=self.db_path,
|
|
settings=Settings(
|
|
anonymized_telemetry=False,
|
|
allow_reset=True
|
|
)
|
|
)
|
|
logger.success("向量数据库初始化成功!")
|
|
except Exception as e:
|
|
logger.error(f"向量数据库初始化失败: {e}")
|
|
self._client = None
|
|
|
|
def get_collection(self, name: str):
|
|
"""获取或创建集合"""
|
|
if self._client is None:
|
|
self.initialize()
|
|
|
|
if self._client is None:
|
|
return None
|
|
|
|
if name not in self._collections:
|
|
try:
|
|
# 使用默认的 sentence-transformers 嵌入模型
|
|
self._collections[name] = self._client.get_or_create_collection(name=name)
|
|
logger.debug(f"已获取/创建向量集合: {name}")
|
|
except Exception as e:
|
|
logger.error(f"获取向量集合 {name} 失败: {e}")
|
|
return None
|
|
|
|
return self._collections[name]
|
|
|
|
def add_texts(self, collection_name: str, texts: List[str], metadatas: List[Dict[str, Any]], ids: List[str]) -> bool:
|
|
"""
|
|
向集合中添加文本
|
|
|
|
Args:
|
|
collection_name: 集合名称
|
|
texts: 文本列表
|
|
metadatas: 元数据列表(用于过滤和存储额外信息)
|
|
ids: 唯一ID列表
|
|
"""
|
|
collection = self.get_collection(collection_name)
|
|
if collection is None:
|
|
return False
|
|
|
|
try:
|
|
logger.info(f"正在将 {len(texts)} 条记忆存入向量集合 {collection_name}...")
|
|
collection.add(
|
|
documents=texts,
|
|
metadatas=metadatas,
|
|
ids=ids
|
|
)
|
|
logger.success(f"成功将记忆存入集合 {collection_name}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"向集合 {collection_name} 添加记录失败: {e}")
|
|
return False
|
|
|
|
def query_texts(self, collection_name: str, query_texts: List[str], n_results: int = 5, where: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
"""
|
|
查询相似文本
|
|
|
|
Args:
|
|
collection_name: 集合名称
|
|
query_texts: 查询文本列表
|
|
n_results: 返回结果数量
|
|
where: 过滤条件
|
|
"""
|
|
collection = self.get_collection(collection_name)
|
|
if collection is None:
|
|
return {"documents": [], "metadatas": [], "distances": []}
|
|
|
|
try:
|
|
logger.info(f"正在从向量集合 {collection_name} 中检索相关记忆...")
|
|
results = collection.query(
|
|
query_texts=query_texts,
|
|
n_results=n_results,
|
|
where=where
|
|
)
|
|
|
|
# 统计检索到的结果数量
|
|
doc_count = 0
|
|
if results and results.get("documents") and results["documents"][0]:
|
|
doc_count = len(results["documents"][0])
|
|
|
|
if doc_count > 0:
|
|
logger.success(f"成功从集合 {collection_name} 检索到 {doc_count} 条相关记忆")
|
|
else:
|
|
logger.info(f"集合 {collection_name} 中未检索到相关记忆")
|
|
|
|
return results
|
|
except Exception as e:
|
|
logger.error(f"查询集合 {collection_name} 失败: {e}")
|
|
return {"documents": [], "metadatas": [], "distances": []}
|
|
|
|
def delete_texts(self, collection_name: str, ids: Optional[List[str]] = None, where: Optional[Dict[str, Any]] = None) -> bool:
|
|
"""
|
|
删除文本
|
|
"""
|
|
collection = self.get_collection(collection_name)
|
|
if collection is None:
|
|
return False
|
|
|
|
try:
|
|
collection.delete(ids=ids, where=where)
|
|
logger.debug(f"成功从集合 {collection_name} 删除记录")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"从集合 {collection_name} 删除记录失败: {e}")
|
|
return False
|
|
|
|
# 全局向量数据库管理器实例
|
|
vectordb_manager = VectorDBManager()
|