From d6623e2cc8ec1759069008e86768027828158863 Mon Sep 17 00:00:00 2001 From: K2Cr2O1 <2221577113@qq.com> Date: Tue, 24 Mar 2026 14:32:36 +0800 Subject: [PATCH] =?UTF-8?q?feat(vectordb):=20=E6=B7=BB=E5=8A=A0=E5=90=91?= =?UTF-8?q?=E9=87=8F=E6=95=B0=E6=8D=AE=E5=BA=93=E6=94=AF=E6=8C=81=E5=8F=8A?= =?UTF-8?q?=E9=9B=86=E6=88=90=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增向量数据库管理器模块,支持文本的存储、检索和相似度查询 添加知识库插件和AI聊天插件,利用向量数据库实现记忆功能 优化跨平台翻译模块,集成向量数据库存储历史翻译记录 改进消息处理逻辑,优先使用用户显示名称 --- adapters/discord_adapter.py | 3 +- adapters/router.py | 3 +- core/managers/__init__.py | 2 + core/managers/vectordb_manager.py | 134 ++++++++++++++++++++++++++++ data/vectordb/chroma.sqlite3 | Bin 0 -> 188416 bytes main.py | 4 + plugins/ai_chat.py | 113 +++++++++++++++++++++++ plugins/discord-cross/handlers.py | 2 +- plugins/discord-cross/translator.py | 74 ++++++++++++++- plugins/knowledge_base.py | 86 ++++++++++++++++++ 10 files changed, 415 insertions(+), 6 deletions(-) create mode 100644 core/managers/vectordb_manager.py create mode 100644 data/vectordb/chroma.sqlite3 create mode 100644 plugins/ai_chat.py create mode 100644 plugins/knowledge_base.py diff --git a/adapters/discord_adapter.py b/adapters/discord_adapter.py index 6e8b33a..3809872 100644 --- a/adapters/discord_adapter.py +++ b/adapters/discord_adapter.py @@ -112,7 +112,8 @@ class DiscordAdapter(discord.Client if DISCORD_AVAILABLE else object): try: data = json.loads(message["data"]) if data.get("type") == "send_message": - await self.handle_send_message(data) + # 使用 asyncio.create_task 异步处理消息,避免阻塞订阅循环 + asyncio.create_task(self.handle_send_message(data)) except json.JSONDecodeError as e: self.logger.error(f"[DiscordAdapter] 解析 Redis 消息失败: {e}") except Exception as e: diff --git a/adapters/router.py b/adapters/router.py index e1c97ef..372540e 100644 --- a/adapters/router.py +++ b/adapters/router.py @@ -356,7 +356,8 @@ class DiscordToOneBotConverter: # 注入 Discord 特定信息(用于跨平台插件识别) discord_channel_id = discord_message.channel.id if not isinstance(discord_message.channel, discord.DMChannel) else None - discord_username = discord_message.author.name + # 使用 global_name (显示名称/昵称) 如果存在,否则使用 name (用户名) + discord_username = getattr(discord_message.author, 'global_name', None) or discord_message.author.name discord_discriminator = f"#{discord_message.author.discriminator}" if discord_message.author.discriminator != "0" else "" if is_private: diff --git a/core/managers/__init__.py b/core/managers/__init__.py index cdda6aa..4e88f1a 100644 --- a/core/managers/__init__.py +++ b/core/managers/__init__.py @@ -13,6 +13,7 @@ from .browser_manager import BrowserManager from .image_manager import ImageManager from .reverse_ws_manager import ReverseWSManager from .thread_manager import thread_manager +from .vectordb_manager import vectordb_manager # --- 实例化所有单例管理器 --- @@ -55,4 +56,5 @@ __all__ = [ "image_manager", "reverse_ws_manager", "thread_manager", + "vectordb_manager", ] diff --git a/core/managers/vectordb_manager.py b/core/managers/vectordb_manager.py new file mode 100644 index 0000000..3be5eb7 --- /dev/null +++ b/core/managers/vectordb_manager.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- +""" +向量数据库管理器模块 + +该模块提供了一个基于 ChromaDB 的向量数据库管理器, +用于存储和检索文本向量,为大语言模型提供记忆能力。 +""" +import os +import json +from typing import List, Dict, Any, Optional +import chromadb +from chromadb.config import Settings +from core.utils.logger import ModuleLogger +from core.utils.singleton import Singleton + +logger = ModuleLogger("VectorDBManager") + +class VectorDBManager(Singleton): + """ + 向量数据库管理器(单例) + """ + _client = None + _collections = {} + + def __init__(self): + super().__init__() + self.db_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", "vectordb") + os.makedirs(self.db_path, exist_ok=True) + + def initialize(self): + """初始化 ChromaDB 客户端""" + if self._client is None: + try: + logger.info(f"正在初始化向量数据库,路径: {self.db_path}") + self._client = chromadb.PersistentClient( + path=self.db_path, + settings=Settings( + anonymized_telemetry=False, + allow_reset=True + ) + ) + logger.success("向量数据库初始化成功!") + except Exception as e: + logger.error(f"向量数据库初始化失败: {e}") + self._client = None + + def get_collection(self, name: str): + """获取或创建集合""" + if self._client is None: + self.initialize() + + if self._client is None: + return None + + if name not in self._collections: + try: + # 使用默认的 sentence-transformers 嵌入模型 + self._collections[name] = self._client.get_or_create_collection(name=name) + logger.debug(f"已获取/创建向量集合: {name}") + except Exception as e: + logger.error(f"获取向量集合 {name} 失败: {e}") + return None + + return self._collections[name] + + def add_texts(self, collection_name: str, texts: List[str], metadatas: List[Dict[str, Any]], ids: List[str]) -> bool: + """ + 向集合中添加文本 + + Args: + collection_name: 集合名称 + texts: 文本列表 + metadatas: 元数据列表(用于过滤和存储额外信息) + ids: 唯一ID列表 + """ + collection = self.get_collection(collection_name) + if collection is None: + return False + + try: + collection.add( + documents=texts, + metadatas=metadatas, + ids=ids + ) + logger.debug(f"成功向集合 {collection_name} 添加 {len(texts)} 条记录") + return True + except Exception as e: + logger.error(f"向集合 {collection_name} 添加记录失败: {e}") + return False + + def query_texts(self, collection_name: str, query_texts: List[str], n_results: int = 5, where: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """ + 查询相似文本 + + Args: + collection_name: 集合名称 + query_texts: 查询文本列表 + n_results: 返回结果数量 + where: 过滤条件 + """ + collection = self.get_collection(collection_name) + if collection is None: + return {"documents": [], "metadatas": [], "distances": []} + + try: + results = collection.query( + query_texts=query_texts, + n_results=n_results, + where=where + ) + return results + except Exception as e: + logger.error(f"查询集合 {collection_name} 失败: {e}") + return {"documents": [], "metadatas": [], "distances": []} + + def delete_texts(self, collection_name: str, ids: Optional[List[str]] = None, where: Optional[Dict[str, Any]] = None) -> bool: + """ + 删除文本 + """ + collection = self.get_collection(collection_name) + if collection is None: + return False + + try: + collection.delete(ids=ids, where=where) + logger.debug(f"成功从集合 {collection_name} 删除记录") + return True + except Exception as e: + logger.error(f"从集合 {collection_name} 删除记录失败: {e}") + return False + +# 全局向量数据库管理器实例 +vectordb_manager = VectorDBManager() diff --git a/data/vectordb/chroma.sqlite3 b/data/vectordb/chroma.sqlite3 new file mode 100644 index 0000000000000000000000000000000000000000..c0ab1dd7daacc89e9757d0a41fe767179f910ce5 GIT binary patch literal 188416 zcmeI5TWlLynwUvlNQ=JE-JWia+wJkOyfYHVq|IXS+Fqk0OSFgEy0|5(-95Vj_ps`e z#4%YVvx>4cvmJnPcV~C92!brIK%Rok1lV0*k;S}ivIwxi0(qM}U&JDajgpM(z0tTdF$O|9t;B|M}0U63@nm%WaDh&FFQhMdZlENHiMxCxk>I zkr@2H1^?Yo0WPN88~7a!T#vhqMQ(k6XPPHYe#z5ZPkwpsZ_n+W{qEVziN8tw@l0dp z`k9}_|2+OA{$A`yF=hJCrgx`)f9mFBapLC_&n6b5KaJKRe_B?q2E|suProcq_J>sxklzjCz_i#*;ZT45R3J?8M3pb z8yWI&y}q)r{t5Y@_DL{0ohFa!jqj{&Hpu$gqx#Zdn#5Cg?nDRI$H}Hri;k$52VXr> zI-Y87zO9n_YNPg`wjLBKLs}g}c}(stuiZ;q*WxKjIx~1{j2H(caO(c3zTVheSat{< zOhz_0>Z=ckW|>8hBSTE9$IjyRpTV1Se5HGn2^wSx;d5_tiXt>)p5ggQv9WR30K+4vZ+lNH3)-k9X0b^uYga&#& zvDFQAr4woSox}9(%NRgretrMaWn0nDpM+7Fe)`mCH0D7h0l}Qs4t0Vs3$F^VlZ4|% zw+#0XQLfTs)M9EAOk$(HQrl=OtUUBO5m~C;U)Wr3kj2e)P*9`kB@vl7SL+{c)(AHk zP7lra8U#@QQj1r|^+G43*_7O?w+Gc!JazSIbiZa}bi>`>@esmUH+a@ZIC_j& zdpm4oFH(%r5sK02FvV&cy2uvjq<$3dq#mtA`tIAY)Q306HMUEkZ!(?g3Tt+ZEp8-2 z0kOebZ^ctLZ$<|&8o}@nMGxIhhd>NP+BC!n8#WhKyUR?Ac6a#r@MP!u1Q0#yH`wh# ziFEO6u~hxWxMuk@%w|4t^H%ZFakEK*Y+u`7d^4W9aU=SfnEnD$fy+yw*3SlF4(kad znx@3PG`@yFgooHcaA_(z(TlzY*CZh$yGq8o4jW@)07wuZkhvQkOcaU zo5;3rWNj6O*m4bK?8SwR#f7C>hA4)vwYQqzGY!27CQev3Vk+CvAN&AY4tMx*@iU%? zV%$k@#8S0u<8ML(zghQXft3By}+6M zY&@Ql<>+TooAQW!BZAI^SdU2H;10yN!~;glk3F5tefeO>XNbT)_|{}RH8&UCKWp=% z%9DP($C^)j;I)Ttrb3KDQ4SsMg}a}C-x|=n=t*uG{+q8=z+O|UZ@SZjH{04{&`q3{ zCl2#AyX`G8D}$5dUr+Aej@sVlXKA~2evE%L5o(toKB7@-TTC>bga_NR%X+~n?~s&Q zy94I%z>FGdG;cgCphkIQE*JhA4C?k#X#!dx^U#nCaYRjfHNvu*T;Xf;iR3T%5<8Rp zGWm_a z{1;3vjqSEcy7VzK-Nm1I%g;n~L$@<=C-GJyq-EDMHZ2Aoo>)J+13CaosP|Xm6W>WB z>Z=>I^#&gWYhHVVfE(caEH=ou$^G@U6))~iVrhNtp|eDHTXZL}u-t$h0kO5=hKgcf zZL+WeyB>b=cM`c`K`s_4)2gf_mvi}+RxB`@hdZfQk~L|+b_P_m^88bwqBC4Ym2AL| z&Vr8~R?9k5jGk(er`s@*yS@zQ$@AiU7uqBgyaVtY`F0aiZzVvh3l`zdH!(C~ue!j2 zggqFpYI_J1xUijvzR6T#8Kh@dM|P>w?{@=552-v*KVMeZN*6}zCAj{;k>*&LRb@$% zXih3us#I0AQlVOsl#-M$D}yRbOVJMpRB()Qj^$t%7Y=QhtQB)0R3Jhdo_IGxS9{7Xyb{zqi9k#24_XqoS zJ)!0d0Vk1ya?O6nf*X;RiqyQDwDn!1v&)=5qHjTBojrB>2-Q_+>&_nRNkP{UP9`zH z;N2p1%kDvNO&+&99j9WucwQ{E-4{l6Vh?-Z*zfAMz|=58cXm2$MJ)a7>5=z7Ub^U) zI%Cl3AqW;d!siwB3Slj_1vBd2I4ybpOHf+}9jTF{``CF4jL~wTRH@3vmXN^!#A|uF&%|*K;x3V_hJZwTzv%5?1WG9U~C%Sl&^A7SVb}*A89u zHU~hLppp%-0pg5gGcmTy+J?>Pvv$QJ8*Y*j=~|LpDl3(Isa4Thw5U}}TE5(3nyN@@ zA+P3tcx8$Y#6NL5^AvAnA?qv%vaY!@11$~B9QFJ5lwSL|zR}ol7Bqp4snNSC?h4di zM}Vb2q(o1ELuRGlh_&D0m;&~#X&pL+pB$E}3fn)jd*LV_>s9V@*fqPIXVmruKmwb! zXm3sum1z`=rMt@J1I}xL<6Eq)!vgW)^1@{X5ZYbFPW}mGhDeV91I`691-mkb@I&_UmiY0`6oN# zhaCYw<}$t@Zo$DYuPvURH7w47E)@`pTQ}G`ayPVwc9FOX?zm`&<(YG3J+vLd#|sD( za`QsrK~FCbVGju3*9*jen^LG$z&x^wrhqb8A(xkxvLxkLp&-k}sx*jC#!}HodtwHd z99#CnS1{G$8Z{`eP*0-vfd3M01x_`Y`{J#5Jw8U1N{^z*Az zu$5z@3ap1lZQI;QJ^9`X%u1G1+ zr^f2iM||W2J8m8AYi(#D9yF>WRODAZtffGVyI>rt`d{@9&_Pv&Bz1Rt?j+=jT+FNG ze37b>B9(IGf<_hoB>!v@ek5iR@bk^&#Yl24c`^CVl24Lq@=@}4l6RB8NdB+ne^36m z15z$>O$aV@_gWC;@sJ(%QH>^XU|Th zqEY+7cp~9n&&>GOXU_Q7@wk5-i}}~n)Bg3;lz%-r>0eJw%uJ-cA0&VTkN^@u0!RP}AOR$R1dsp{KmxBLfu|GGn;%}?EU7uIS}awv zvRck%;cL>>Y^5lv*=kXttfeTlBvtx7{XJzHzVDd6=X|Z>ZTCyCZ;S7$U#T?~mKGWd zGRv_XZB?=rxvXXjIa$t9Ny}x6N-3|v$#s-fa)W4edb53bQ&wf#(yD4!rZUSG;5)gq z6$W3q4d2sVEvl^4Dpg0-ybA|08NJ)vy7?5om0h8oEPS)N**4+es*m9N2I9R}|vha+we zJv{hiXN3l2u+M`Yc|asT8`L|L(mo{AIit6}+F0}2BLs8~m;JUl8`c#*bTF(R7(9;E z4Hd=sXTrMSa3E|EUTlH)O0+5*0bkDLTUxQeXddpQVoBDd{n{B&(aQ5rg^JE_6;-lb z+Sct8brlA0nrwyDvd$Ev2S=el-DbLX0x{^x%R&rzFU3AbU7QjOcbd`bGW8Ze&3FM$ zFouJ6iJ>`XyBm52P7?=Ji*wfD!q7W=q;E2nAGhAKt0TKq>G!*VqK8x-sJ{s!+XuwL z37$Zk1Wq{Rhr9CHO^ZUu;E}6q@PBi8na6l?hYy10+RO37Ygtv6B#Gvva-|9<)@!9g zwInGeDPL9w)i^(Pd_V=qIOkXnc5%hfwe}Fn4S1agI(MKyiBx?-4-O<&thS-kUYnWY zPL?-==kSW`&F`6p-ZZVA-Hba4NhyQ!TP-RVO7N}`l`3$WYpbkDO0`mLG4(l%@nQOf zb7Xpq%U%k~?sTWD`|4A;KWu!s+_vEGXakx81DTlpogJfR5g^pow}`UMl*i<0+uA0y zW3ir4bB0jp9H3kij=E4 zA3JY>F$#Ziu`bZc+DGYz z6}xW72n76uY=;)n;C(ndbkXQ^;GHz+qM%Dq$p+i%GTm}UvYAK(TjTd~lPs;TJ#;D9 z72gRZbnki;A`*3$S zXgzGpf~QV9Q0$p%ZIvw5mTQd~SzOpyTv)1!W3b(piKI}PILAHBMXb5wfWlaJPo>|6 zVQrglLl?ucMNQ@mLbBD~g(-s{=6vwf>@cQ4XR(hv7e{EjjR_j`lf!aVA#m9p+@8*Z zGXn7sg?GoO0OMvp6tJ7<(8Ho##2m}e!SnoRaR9y8W0V_Mrcp4KE@?jC+zJwoZ!vu4 zy`y1btN5dZ<;~g#nV)kC0;fTzx;2-+!x=WXISbbSxWSYTCnpA2zMh!bH#h4`=D5mT z;b8c<{RSp_w_B!(Cl4Q@{F5E=!;XL-a~WR{x8Puy*OtUMaSn8;fKXidz#bd}-2`hk zqy_>X$3;6V&zvjkq3sYpUOZnV?FHw;rn`_EV)9Z0_KrbGzFB&3c0+jlqD(0 z3I$m%R;59FGM0)y+7mOtB$rqY&h#FFz)UbkjG*7)Wpf9~odmbVc{Nuo7gQN6vs6{` z+=b$2$17Et(&rB*z)rvaMN)_|!9~ftYlE@o*_#>08w4?&`Q$!gG@IZxjEwM4k{9FK zrz(mW>*F7^WR8sfv@K+HgIyyQw8U<+4l~7%+1|(&Sv|*x;LR1ngC`#LD@3r?F{m4{ zUR$tTUJ~?<$-FP5f1-T8D#;3XjK!RsgQY~Z$aPpKRC02eRjN|{`ORoNHBK6zScs1JRw1&nm(u^zv^Kv1!7!9ja2=iCjD2Us*t3ocp(l|2y-a^Q_D-|FetlC5Zu}7fN z9r&vjvC`PKtR3^c+qYrOt~}-oJFsIU4OoI#E4LDx58Gmi_C4Dlf}VF4gXXYhm_VJZ76c1FOg(q%$Nd zkqnX2Y4Y2@O-L?nPc`HZUbK*Wppe2z3y}kbR8Cq*!7oJ0pR|x-ppeo@3n}@9Mj!7HSAaP9A+#3k)a7JaC^D8;)ExNP3e#k!RZ7K5kyZBZ zo(H9FJZFwlodui*k4|8V>zM_+c#oOh{ysaX){pAzjm?E+(pb2+Tni|S=jB;pnxs8t zn`~~>S050~GK=#;Ibr4Q6n%FN+P>AJUD!Bq*V^_o4Zjl%mpR`vz#V`oH9#%xE~!Vv zi8Zocp4(|dOJzUWt?o`jtCpz>PHurRN-G+yI<*|^*ymcMLZt<9&##^1{60@R{G3&% zS5(%mDZmf*MsSB)cxMN9xIJ>7VF)&L$4w2cbj5<$U+s=sTzZur*yD3^+ph7w z<+XdBB@;1Vtc2*%F z1q9dnN142!tvg_z?j&HntZ2o&QfRgErE-boCACy3!q0VIF~kN^@u0!RP}AOR$R1dsp{@Co4G z|NAWP3JD+qB!C2v01`j~NB{{S0VIF~kihFofIt6_CVv)zfA~QHNB{{S0VIF~kN^@u z0!RP}AOR$R1YQdQ7o!vD@YlWY=l{6=e=U@YjX(lO00|%gB!C2v01`j~NB{{S0VLoM zcw;g>6JGy|=l_2lN&YoLLIOwt2_OL^fCP{L5#N|KCTFe}CFqi1Hu-B!C2v01`j~NB{{S0VIF~kN^@u0!I;; zo7|icdjp5>4v6*tKSYxMa8!vHj|7ka5S}>Fm(^ z{_fL%_iX_C{r|Jm5%`B6B!C2v01`j~NB{{S0VMF7LEw*`MbB)064`v?U!Rv*j^${p zlC8*PHCxEZa+XS3E?ZPed4;JeWtE)Oqq^ClR@>0wocpfP+iOaaoXc0r`C_gp$;E2E zP?V4J^z8?q{$SSe$j#aJelRPJ{N`7hWj5Qo%AWBXrqfo3sk85Cv}3Z&tYYX|dkbPd z{WRm`fyBG4XEAj|F00oMXM$W>$DmfT(>5($!7fwaAo3Aehbx>lcc{W<-<$2^XEU?j z$?;9CuPdAfFU;E8;VB;L##0><>9oslon}HnNcwHvd}=pQYeG1*zweYI<#L%>eh9qI z+FRQ#qX&Sb64a?RO-6glHr(w8y$cM zfsn7qc#@_@dCMX2R{m4?0L~rSGg(s*;gW>{j)Uk>Dt%pWftp&~e9=*q&sW|9_mlOtJp|NhJA`*OLRb3JD+qB!C2v01`j~NB{{S0VIF~ zkib`x!0OZ|n=>!(4J5w)pGf`{eE%Q(zz-5Y0!RP}AOR$R1dsp{Kmter2_OL^a2g2A zPi{s_YEG*bOO>pwmUG#HTvW4_qNHZ4MTN4KqR^65nXsS#t1@kARW&P9nPm&Kkk3|F zu9PiORxPTm)GAdg{O|uG7f(aePzEG`1dsp{Kmter2_OL^fCP{L5xkvyBAOR$R1dsp{Kmter2_OL^fCNql0et>{I`n{2AOR$R1dsp{Kmter2_OL^ zfCP{L5_pva@cI9%q>E)E0VIF~kN^@u0!RP}AOR$R1dsp{I2{D=`TyzA14@AekN^@u z0!RP}AOR$R1dsp{KmthMRT9AG|F4oRmW>3E01`j~NB{{S0VIF~kN^@u0!ZL=5D2XQ zZ-7Mn|1A8!b~>a(DUbjXKmter2_OL^fCP{L52k#m9d z{|So2EJy$eAOR$R1dsp{Kmter2_OL^fCP}hSAhVo|Gx@aLK%?&5GI|GhnVJ#yi{UidfXe}3+7&+VQ4?%B(Uze)V@Ok?KynV-e~JpLs9 zUhGFPW%|#icc*@T>gMG2iJwnAn^=tgG+K-NMdaT>0fWoe~aR8y)(M#r4`kqedDF_m*p9?qH(WY|L)qtE_{IH#cvxt+tvW7VC90 zWM@k^GUVZUePv<&6Y@drlVEf@O&--7-&xyikoC1k^`*fyiKp(|i4LxhYrje@da!Lx z9(?sE0(Ppo`L;^xtBu-&+Imo|3~6-?CE7*F=8B)z^VJA`g&t? zVcF5b!DM7}qrUopXqH(7IWokwdR)D`w9{v4`5Up+gX_^qTUXgL^GT;|u_of?r@!eSJku*+NzvW$KQLdfle*6lD8hu_Jg6gI?nCEgx@`LeC3jzvV zuidY$*H#y68^lX)0_k+dZpve}CmQLtLj(u4V8@H#Hc+uO> zolz+Ry`I?WhWgWqwEWItdiLdRO8)iN_b*+x75)547@z5f$7%pow;3Br85 zD!fh-ju+iB+(SgUN{>;CsZDB;Mt!BW(O6h{=yf8pRJ*^hx!fR&o9m#oM$=0oGHec9e&Bo}4yT9WhgtKn& ztdDT?7_s(t*vOux7^5Q;qtjuE)i!jIEzoiODBf{BT8Z@Cw_~XfZ;oqhmqLGKI@J}{ z>=;|zNQ44ngSXy_r*7Vi4q!CG=o$C`o z^r+upw+AKC#jnLu^&8`w<iY(szW18h0m z;m5_#cp{2%C%q9%)vk@xnI}av?_ItrT|8bE$;(_F-wZuWbOSleJYDR@#atSz!lO=9Ss zJ@~;qeUPVZYa7yRwRf3r7m!G2FT_#{SH@|_5v?gX7s=l{A5UGm5*;LMyWoU6zb=Fb zJAsGr4YkZR(eIy&r8ctTgmElv({%zoQ|*PE-+%XPJeAExpFa+oY%hMegmWQ@hhrVl z-C)=0363Er?oi#0cO)D?fKL-*+`e2d!WTS4gf~$;6H9#}kK^l`E)Q5UU=O{(nf+`$ zo|5J0XHlE-hjtV92}J2j7~Er{?CO`)6%lRC&^G z_gM3354`r!%~Xg{D9WM3y>Ryv@MRA;X>J<+o3B;CUQ?@Yy3>R=+uCE$O`Mh|4)Zp< z?JY4YgOlW6PwwB2+TP}8X}fiPjDIx|YL_2AqETvFOf;T^Vq~}?^546x7o74ANvX9v zU=9z=sG&yl5@7)~$|K`Z?gtF&_EBj9S|Ib#kc=y8I#gr&{Mo(8rO01K&K44Xm-v@6 z|9hr!=ErBIRr5v*rp5b@f;01(Q@Ifw~ z+JF{ZI=Tg7ojR@&>Eky-6m-mX1XssyQ-Wck*1U5&p8DW!bWlCKHL!^RnjY5@cPHnF zZY2six)rX`-amUQp1ONC`dk>bakY;NzY#Kc<5D^1FJQ;rl}^yd@N(NTo-l_1WRx)q zp?-0bDhp1_{RFnd&Y3ufm_k%L7z!#$Yr$PRWp6HnDa z>L=r*4j7MvHC{VP=s@m6We%isMGgdohQ!>>c&Y{xvvCr8cHyAB?ym0Gk>KS$R9Y{U zE2|d}lJwnggE3u+4sP0$zAufp`)o4WQ?{F85qVVnd}zFk*pY_q%s_E-^N&ND31k<9 z4Q&pSbUd}T79HF^LOj(_%=Y(3?VdXoItk{EE1;Vm_G3I}E8rKvHtzSuzVYD7yb$r5 zM~DanVJ#Jyk47No3BGk)5uF0Z?`|u(^x)Q9JoVr~bnv@JNH_W+g98sDzj<7#e%Z$X z?|cged;J(1hy9cB-I4DL{MKizelgT=#Xj+HCoh;-C5180~7 z9~}fOCOl{{9y1*}{M|WtngH&6ckX155)9{2g5&dh&l~(l1~2JRg0rvn;?EHL4RLQ8 zfcHp0iP>;<(?}HzFrUyJ0FA%h}nc4k$c&veD$58~}2M3B3Jw1X09eEn~_y0}K{PW1!A1CVY3qMEz2_OL^fCP{L5E znj$%wPZ0RDRZ?GV)E?B<$;0*f%EJ05~Bayz7sAw&X z6*R3Svtn73)JjWJm13nR`C)bfqRTv zOl?w&H0mq0jmE;tL$bQoAgi0p%Veo`e_?aEK^8aH*K4bdrk6yt*4SY^YPAjB1LEcs z@s_dER%`}dks<;l;$((GcbQ@tJ#ue(?Vd=+bj9Gxv=fV#?fcGgym11)9Ua zmTa&c3hiUWqOA@i8py_;?Q{&4sl?7k+)~Ic?ev*>D*?G`RM~c-S%-C*Zjmi!nWPQ> zji);48Ba5W=};tZ8Pl}3(&mYN3;K>l+qy}hi}u=1X^m|Lm9|aGPVDyXrdP2LZT@N7 z+O}&5^hh$_Rx{)=+e@dpTs_vcOYiHJ(O0&48+4WMO4~ZfPAr3%fS(alp&i;g*tSGy zjMVSj+N*tB-)L<3BgT`gDF_`HGHwqc>$Uqe=v0ffKyDL6P76hY+@tyk3@RYew0Q@* zMO(L=FlT6rLbQ%S-3S=q%ObR87@crxK1AK-)mN8lACu!X4~A9~1ht3$+Ug+`Tut0L zQ7upfw}$ElQYwoKy=7;<}vPJuSvA%fQsjgeB Kr_;{tr~e-!F5<2L literal 0 HcmV?d00001 diff --git a/main.py b/main.py index a6793eb..e2a8433 100644 --- a/main.py +++ b/main.py @@ -111,6 +111,10 @@ async def main(): 2. 初始化 WebSocket 客户端 3. 建立连接并保持运行 """ + # 初始化向量数据库 + from core.managers.vectordb_manager import vectordb_manager + vectordb_manager.initialize() + # 首先加载所有插件 plugin_manager.load_all_plugins() diff --git a/plugins/ai_chat.py b/plugins/ai_chat.py new file mode 100644 index 0000000..1e94bcf --- /dev/null +++ b/plugins/ai_chat.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +""" +AI 聊天插件,支持向量数据库记忆功能 +""" +import time +import uuid +from core.managers.command_manager import matcher +from models.events.message import GroupMessageEvent, PrivateMessageEvent +from core.managers.vectordb_manager import vectordb_manager +from core.utils.logger import ModuleLogger +from core.config_loader import global_config + +logger = ModuleLogger("AIChat") + +# 尝试导入 OpenAI 客户端 +try: + from openai import AsyncOpenAI + OPENAI_AVAILABLE = True +except ImportError: + OPENAI_AVAILABLE = False + +async def get_ai_response(user_id: int, group_id: int, user_message: str) -> str: + """获取 AI 回复,包含向量数据库记忆""" + if not OPENAI_AVAILABLE: + return "请先安装 openai 库: pip install openai" + + # 从配置中获取 DeepSeek API 配置(复用跨平台插件的配置或全局配置) + api_key = getattr(global_config.cross_platform, 'deepseek_api_key', None) or "your-api-key" + api_url = getattr(global_config.cross_platform, 'deepseek_api_url', "https://api.deepseek.com/v1") + model = getattr(global_config.cross_platform, 'deepseek_model', "deepseek-chat") + + if api_key == "your-api-key": + return "请先在配置中设置 DeepSeek API Key" + + # 1. 从向量数据库检索相关记忆 + collection_name = f"chat_memory_{user_id}" + memory_context = "" + + try: + results = vectordb_manager.query_texts( + collection_name=collection_name, + query_texts=[user_message], + n_results=3 + ) + + if results and results.get("documents") and results["documents"][0]: + memory_context = "\n\n相关历史记忆:\n" + for i, doc in enumerate(results["documents"][0], 1): + memory_context += f"{i}. {doc}\n" + except Exception as e: + logger.error(f"检索聊天记忆失败: {e}") + + # 2. 构建 Prompt + system_prompt = f"""你是一个友好的 AI 助手。请根据用户的输入进行回复。 +如果提供了相关历史记忆,请参考这些记忆来保持对话的连贯性。{memory_context}""" + + try: + client = AsyncOpenAI( + api_key=api_key, + base_url=api_url.replace("/chat/completions", "") + ) + + response = await client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_message} + ], + temperature=0.7, + max_tokens=1000 + ) + + ai_reply = response.choices[0].message.content + + # 3. 将本次对话存入向量数据库 + if ai_reply: + try: + doc_id = str(uuid.uuid4()) + text_to_embed = f"用户: {user_message}\nAI: {ai_reply}" + metadata = { + "user_id": user_id, + "group_id": group_id, + "timestamp": int(time.time()) + } + + vectordb_manager.add_texts( + collection_name=collection_name, + texts=[text_to_embed], + metadatas=[metadata], + ids=[doc_id] + ) + except Exception as e: + logger.error(f"保存聊天记忆失败: {e}") + + return ai_reply + except Exception as e: + logger.error(f"AI 聊天请求失败: {e}") + return f"请求失败: {str(e)}" + +@matcher.command("chat", "聊天") +async def chat_command(event: GroupMessageEvent | PrivateMessageEvent, args: list[str]): + """AI 聊天命令""" + if not args: + await event.reply("请提供要聊天的内容,例如:/chat 你好") + return + + user_message = " ".join(args) + user_id = event.user_id + group_id = getattr(event, 'group_id', 0) + + await event.reply("正在思考中...") + reply = await get_ai_response(user_id, group_id, user_message) + await event.reply(reply) diff --git a/plugins/discord-cross/handlers.py b/plugins/discord-cross/handlers.py index 1e13b51..bc92f9a 100644 --- a/plugins/discord-cross/handlers.py +++ b/plugins/discord-cross/handlers.py @@ -148,7 +148,7 @@ async def handle_qq_group_message(event: GroupMessageEvent): group_name = f"群{group_id}" await handle_qq_message( - nickname=event.sender.nickname or event.sender.card or str(event.user_id), + nickname=event.sender.card or event.sender.nickname or str(event.user_id), user_id=event.user_id, group_name=group_name, group_id=group_id, diff --git a/plugins/discord-cross/translator.py b/plugins/discord-cross/translator.py index 472c7bf..8b9cf55 100644 --- a/plugins/discord-cross/translator.py +++ b/plugins/discord-cross/translator.py @@ -2,8 +2,11 @@ """ 跨平台消息互通插件翻译模块 """ +import time +import uuid from typing import Dict, List from core.utils.logger import ModuleLogger +from core.managers.vectordb_manager import vectordb_manager from .config import config # 创建模块专用日志记录器 @@ -19,7 +22,7 @@ def get_translation_context(channel_id: int, direction: str) -> List[Dict[str, s return TRANSLATION_CONTEXT_CACHE.get(cache_key, []) def add_translation_context(channel_id: int, direction: str, original: str, translated: str): - """添加翻译到上下文缓存""" + """添加翻译到上下文缓存和向量数据库""" cache_key = f"{channel_id}_{direction}" if cache_key not in TRANSLATION_CONTEXT_CACHE: TRANSLATION_CONTEXT_CACHE[cache_key] = [] @@ -31,6 +34,59 @@ def add_translation_context(channel_id: int, direction: str, original: str, tran if len(TRANSLATION_CONTEXT_CACHE[cache_key]) > MAX_CONTEXT_MESSAGES: TRANSLATION_CONTEXT_CACHE[cache_key] = TRANSLATION_CONTEXT_CACHE[cache_key][-MAX_CONTEXT_MESSAGES:] + + # 将翻译记录保存到向量数据库 + try: + collection_name = f"translation_memory_{channel_id}" + doc_id = str(uuid.uuid4()) + + # 将原文和译文组合作为向量化文本 + text_to_embed = f"原文: {original}\n译文: {translated}" + + metadata = { + "channel_id": channel_id, + "direction": direction, + "original": original, + "translated": translated, + "timestamp": int(time.time()) + } + + vectordb_manager.add_texts( + collection_name=collection_name, + texts=[text_to_embed], + metadatas=[metadata], + ids=[doc_id] + ) + logger.debug(f"[CrossPlatform] 翻译记录已保存到向量数据库: {collection_name}") + except Exception as e: + logger.error(f"[CrossPlatform] 保存翻译记录到向量数据库失败: {e}") + +def get_similar_translations(channel_id: int, text: str, direction: str, limit: int = 3) -> str: + """从向量数据库检索相似的翻译记录""" + try: + collection_name = f"translation_memory_{channel_id}" + + # 检索相似文本 + results = vectordb_manager.query_texts( + collection_name=collection_name, + query_texts=[text], + n_results=limit, + where={"direction": direction} + ) + + if not results or not results.get("documents") or not results["documents"][0]: + return "" + + context_ref = "\n\n参考历史相似翻译(向量检索):\n" + for i, metadata in enumerate(results["metadatas"][0], 1): + original = metadata.get("original", "") + translated = metadata.get("translated", "") + context_ref += f"{i}. 原文: {original[:100]}\n 译文: {translated[:100]}\n" + + return context_ref + except Exception as e: + logger.error(f"[CrossPlatform] 从向量数据库检索翻译记录失败: {e}") + return "" async def translate_with_deepseek( text: str, @@ -51,11 +107,17 @@ async def translate_with_deepseek( messages = [] context_ref = "" if channel_id > 0: + # 1. 获取最近的上下文缓存 context = get_translation_context(channel_id, direction) if context: - context_ref = "\n\n参考之前的翻译:\n" + context_ref = "\n\n参考最近的翻译:\n" for i, ctx in enumerate(context[-5:], 1): context_ref += f"{i}. 原文: {ctx['original'][:100]}\n 译文: {ctx['translated'][:100]}\n" + + # 2. 从向量数据库检索相似的历史翻译 + similar_context = get_similar_translations(channel_id, text, direction) + if similar_context: + context_ref += similar_context system_prompt = f"""你是一个专业的翻译助手。请将以下文本翻译成{lang_name}。 只返回翻译后的文本,不要添加任何解释、注释或其他内容。避免翻译出仇视言论以及违反中国大陆相关法律法规的内容。如果有,请在翻译后有敏感的词语中把文本替换成井号(#) @@ -115,11 +177,17 @@ async def translate_with_deepseek_sync( context_ref = "" if channel_id > 0: + # 1. 获取最近的上下文缓存 context = get_translation_context(channel_id, direction) if context: - context_ref = "\n\n参考之前的翻译:\n" + context_ref = "\n\n参考最近的翻译:\n" for i, ctx in enumerate(context[-5:], 1): context_ref += f"{i}. 原文: {ctx['original'][:100]}\n 译文: {ctx['translated'][:100]}\n" + + # 2. 从向量数据库检索相似的历史翻译 + similar_context = get_similar_translations(channel_id, text, direction) + if similar_context: + context_ref += similar_context system_prompt = f"""你是一个专业的翻译助手。请将以下文本翻译成{lang_name}。 只返回翻译后的文本,不要添加任何解释、注释或其他内容。避免翻译出仇视言论以及违反中国大陆相关法律法规的内容。如果有,请在翻译后有敏感的词语中把文本替换成井号(#) diff --git a/plugins/knowledge_base.py b/plugins/knowledge_base.py new file mode 100644 index 0000000..88dd5e2 --- /dev/null +++ b/plugins/knowledge_base.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +""" +群聊知识库插件,支持向量数据库检索 +""" +import time +import uuid +from core.managers.command_manager import matcher +from models.events.message import GroupMessageEvent +from core.managers.vectordb_manager import vectordb_manager +from core.utils.logger import ModuleLogger +from core.permission import Permission + +logger = ModuleLogger("GroupKnowledgeBase") + +@matcher.command("kb_add", "添加知识库", permission=Permission.ADMIN) +async def kb_add_command(event: GroupMessageEvent, args: list[str]): + """添加知识库条目""" + if len(args) < 2: + await event.reply("用法: /kb_add <问题> <答案>") + return + + question = args[0] + answer = " ".join(args[1:]) + group_id = event.group_id + + try: + collection_name = f"knowledge_base_{group_id}" + doc_id = str(uuid.uuid4()) + + text_to_embed = f"问题: {question}\n答案: {answer}" + metadata = { + "group_id": group_id, + "question": question, + "answer": answer, + "added_by": event.user_id, + "timestamp": int(time.time()) + } + + success = vectordb_manager.add_texts( + collection_name=collection_name, + texts=[text_to_embed], + metadatas=[metadata], + ids=[doc_id] + ) + + if success: + await event.reply(f"知识库条目添加成功!\n问题: {question}") + else: + await event.reply("知识库条目添加失败,请查看日志。") + except Exception as e: + logger.error(f"添加知识库失败: {e}") + await event.reply(f"添加失败: {str(e)}") + +@matcher.command("kb_search", "搜索知识库") +async def kb_search_command(event: GroupMessageEvent, args: list[str]): + """搜索知识库条目""" + if not args: + await event.reply("用法: /kb_search <关键词>") + return + + query = " ".join(args) + group_id = event.group_id + + try: + collection_name = f"knowledge_base_{group_id}" + + results = vectordb_manager.query_texts( + collection_name=collection_name, + query_texts=[query], + n_results=3 + ) + + if not results or not results.get("documents") or not results["documents"][0]: + await event.reply("未找到相关的知识库条目。") + return + + reply_msg = f"为您找到以下相关知识:\n" + for i, metadata in enumerate(results["metadatas"][0], 1): + question = metadata.get("question", "") + answer = metadata.get("answer", "") + reply_msg += f"\n{i}. Q: {question}\n A: {answer}" + + await event.reply(reply_msg) + except Exception as e: + logger.error(f"搜索知识库失败: {e}") + await event.reply(f"搜索失败: {str(e)}")