jack 1 день назад
Родитель
Сommit
7b291a4a4a

+ 29 - 10
app/core/tools/file_allcontent.py

@@ -1,28 +1,47 @@
-from typing import Type
+from typing import Type, List
 
 from pydantic import BaseModel, Field
+from sqlalchemy.orm import Session
 
 from app.core.tools.base_tool import BaseTool
-from config.llm import tool_settings
-from sqlalchemy.orm import Session
 from app.models.run import Run
 from app.services.file.file import FileService
 
 
+class FileContnetToolInput(BaseModel):
+    """This tool requires no arguments."""
+    pass
+
+
 class FileContnetTool(BaseTool):
-    name: str = "file_content"
+    name: str = "read_full_file_content"
     description: str = (
-        "读取文件的所有或者全部内容并返回给用户,这里每一次只允许触发一次"
-        "只有提到读取全部内容的时候才会返回全部内容,其他时候这个工具不会调用"
-        "和file_search工具不会同时使用,用了此工具就不会调用file_search"
+        "ONLY trigger this tool when the user explicitly requests to read the COMPLETE or FULL content of an uploaded file. "
+        "Key trigger phrases include: '读取全部内容', '显示所有内容', '完整的文件', 'read the full content', 'read all content', 'complete file', 'entire content'. "
+        "Critical rules: "
+        "1. STRICTLY requires an explicit user command to read everything. Do NOT trigger for summaries, searches, or overviews. "
+        "2. This tool is mutually exclusive with 'file_search' or 'file_content_processor'. If this tool is triggered, those must NOT be used. "
+        "3. Operates as a strict singleton: ABSOLUTELY ONLY 1 invocation is permitted per user request. "
+        "4. MUST confirm that files are available before execution."
     )
+    args_schema: Type[BaseModel] = FileContnetToolInput
 
-    file_ids: list[str] = []
-    args_schema: Type[BaseModel] = {}
+    def __init__(self) -> None:
+        super().__init__()
+        self.file_ids = []
 
     def configure(self, session: Session, run: Run, **kwargs):
         if run.file_ids is not None and len(run.file_ids) > 0:
             self.file_ids = run.file_ids
 
     def run(self) -> dict:
-        return FileService.retrieve_documents(ids=self.file_ids)
+        files = []
+        if self.file_ids is not None and len(self.file_ids) > 0:
+            files = FileService.list_chunks(ids=self.file_ids)
+        return files
+
+    def instruction_supplement(self) -> str:
+        """
+        为 Retrieval 提供文件选择信息,用于 llm 调用抉择
+        """
+        return ""

+ 13 - 15
app/core/tools/file_search_tool.py

@@ -8,22 +8,16 @@ from app.models.run import Run
 from app.services.file.file import FileService
 from app.services.assistant.assistant import AssistantService
 
-
-# return '## important:You can use the "retrieval" tool to search for relevant information.\n If you are asking about the content of the files, please specify any keywords, topics, or context you are looking for to help retrieve the most relevant content.'
-
-
-# query: str = Field(
-#    ...,
-#    description="query to look up in retrieval",
-# )
-# asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
-# query: str = Field(..., description="query to look up in retrieval")
 class FileSearchToolInput(BaseModel):
     query: str = Field(
         ...,
         description="query to look up in retrieval",
     )
-
+    full_text_search: bool = Field(
+        default=False,
+        description="If True, performs full text search instead of snippet matching. "
+                   "Use when user requests complete content analysis rather than just keyword matching"
+    )
 
 class FileSearchTool(BaseTool):
     name: str = "file_content_processor"
@@ -103,7 +97,7 @@ class FileSearchTool(BaseTool):
         print(self.__keys)
 
     # indexes: List[int],
-    def run(self, query: str) -> dict:
+    def run(self, query: str,full_text_search:bool=False) -> dict:
         print(
             "file_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keysfile_keys"
         )
@@ -111,9 +105,13 @@ class FileSearchTool(BaseTool):
         print(self.__keys)
         print(self.__dirkeys)
         files = []
-        ## 必须有总结的内容query和才能触发
-        if self.index == 0 and query:
-            try:
+        if full_text_search:
+            files = FileService.list_chunks(ids=self.__keys)
+            pass
+        else:
+            ## 必须有总结的内容query和才能触发
+            if self.index == 0 and query:
+                try:
                 files = FileService.search_in_files(
                     query=query, file_keys=self.__keys, folder_keys=self.__dirkeys
                 )

+ 0 - 2
app/services/file/impl/r2r_file.py

@@ -197,8 +197,6 @@ class R2RFileService(OSSFileService):
 
             files = {}
             for doc in list_results:
-                # file_key = doc.metadata.file_key
-                # file_key = doc.metadata.title if file_key is None else file_key
                 text = doc.text
                 if "text" in files:
                     files["text"] += f"\n\n{text}"