jack 2 hari lalu
induk
melakukan
f3264df9d6
2 mengubah file dengan 89 tambahan dan 0 penghapusan
  1. 3 0
      app/api/v1/files.py
  2. 86 0
      app/core/tools/knowledge_search_tool.py

+ 3 - 0
app/api/v1/files.py

@@ -21,12 +21,15 @@ max_size = 512 * 1024 * 1024
 file_ext = [
     ".csv",
     ".docx",
+    ".doc",
     ".html",
     ".json",
     ".md",
     ".pdf",
+    ".ppt",
     ".pptx",
     ".txt",
+    ".xls",
     ".xlsx",
     ".gif",
     ".png",

+ 86 - 0
app/core/tools/knowledge_search_tool.py

@@ -0,0 +1,86 @@
+from typing import Type, List
+
+from pydantic import BaseModel, Field
+from sqlalchemy.orm import Session
+
+from app.core.tools.base_tool import BaseTool
+from app.models.run import Run
+from app.services.file.file import FileService
+from app.services.assistant.assistant import AssistantService
+
+
+# return '## important:You can use the "retrieval" tool to search for relevant information.\n If you are asking about the content of the files, please specify any keywords, topics, or context you are looking for to help retrieve the most relevant content.'
+
+
+# query: str = Field(
+#    ...,
+#    description="query to look up in retrieval",
+# )
+# asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+# query: str = Field(..., description="query to look up in retrieval")
+class KnowledgeSearchToolInput(BaseModel):
+    query: str = Field(
+        ...,
+        description="query to look up in retrieval",
+    )
+
+
+class KnowledgeSearchTool(BaseTool):
+    name: str = "knowledge_search"
+    description: str = (
+        "Can be used to look up knowledge base or files content that was uploaded to this assistant."
+        + "If the user is retrieve specified content from the knowledge base or file content, that is often a good hint that information may be here."
+        + "Singleton operation: Strictly 1 invocation per API call"
+    )
+    args_schema: Type[BaseModel] = KnowledgeSearchToolInput
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.__filenames = []
+        self.__keys = []
+        self.__dirkeys = []
+        self.loop = None
+        self.index = 0
+
+    def configure(self, session: Session, run: Run, **kwargs):
+        # 获取当前事件循环
+        # document_id = []
+        print("====KnowledgeSearchToolKnowledgeSearchToolKnowledgeSearchTool====")
+        file_key = []
+        for key in run.file_ids:
+            if len(key) == 36:
+                self.__keys.append(key)  # 添加文件id 作为检索
+            else:
+                file_key.append(key)
+        print(file_key)
+        files = []
+        # 这种情况是uuid.ex 这种格式的在最早的时候存在的,后续要去掉
+        if len(file_key) > 0:
+            ## 获取文件信息
+            files = FileService.get_file_list_by_ids(session=session, file_ids=file_key)
+            for file in files:
+                self.__keys.append(file.key)
+        print(self.__keys)
+
+    def run(self, query: str) -> dict:
+        print(self.__keys)
+        files = []
+        ## 必须有总结的内容query和才能触发
+        if self.index == 0 and query:
+            files = FileService.search_in_files(
+                query=query, file_keys=self.__keys, folder_keys=self.__dirkeys
+            )
+            self.index = 1
+        # print(files)
+        return files
+
+    def instruction_supplement(self) -> str:
+        """
+        为 Retrieval 提供文件选择信息,用于 llm 调用抉择
+        """
+        if (self.__keys and len(self.__keys) > 0) or (
+            self.__dirkeys and len(self.__dirkeys) > 0
+        ):
+            return ""
+        else:
+            return ""