jack 3 ay önce
ebeveyn
işleme
5c1a3382b9

+ 10 - 3
app/core/tools/file_search_tool.py

@@ -9,7 +9,9 @@ from app.services.file.file import FileService
 
 
 class FileSearchToolInput(BaseModel):
-    indexes: List[int] = Field(..., description="file index list to look up in retrieval")
+    indexes: List[int] = Field(
+        ..., description="file index list to look up in retrieval"
+    )
     query: str = Field(..., description="query to look up in retrieval")
 
 
@@ -33,7 +35,9 @@ class FileSearchTool(BaseTool):
         """
         files = FileService.get_file_list_by_ids(session=session, file_ids=run.file_ids)
         # pre-cache data to prevent thread conflicts that may occur later on.
-        print("---------ssssssssssss-----------------sssssssssssss---------------ssssssssssssss-------------sssssssssssss-------------ss-------")
+        print(
+            "---------ssssssssssss-----------------sssssssssssss---------------ssssssssssssss-------------sssssssssssss-------------ss-------"
+        )
         print(files)
         for file in files:
             self.__filenames.append(file.filename)
@@ -57,7 +61,10 @@ class FileSearchTool(BaseTool):
         if len(self.__filenames) == 0:
             return ""
         else:
-            filenames_info = [f"({index}){filename}" for index, filename in enumerate(self.__filenames)]
+            filenames_info = [
+                f"({index}){filename}"
+                for index, filename in enumerate(self.__filenames)
+            ]
             return (
                 'You can use the "retrieval" tool to retrieve relevant context from the following attached files. '
                 + 'Each line represents a file in the format "(index)filename":\n'

+ 77 - 0
app/core/tools/file_search_tool_.py

@@ -0,0 +1,77 @@
+import fnmatch
+import os
+from typing import Optional, Type
+
+from langchain_core.callbacks import CallbackManagerForToolRun
+from langchain_core.tools import BaseTool
+from pydantic import BaseModel, Field
+
+from langchain_community.tools.file_management.utils import (
+    INVALID_PATH_TEMPLATE,
+    BaseFileToolMixin,
+    FileValidationError,
+)
+
+from app.services.file.file import FileService
+
+
+class FileSearchInput(BaseModel):
+    """Input for FileSearchTool."""
+
+    dir_path: str = Field(
+        default=".",
+        description="Subdirectory to search in.",
+    )
+    pattern: str = Field(
+        ...,
+        description="Unix shell regex, where * matches everything.",
+    )
+
+
+class FileSearchTool(BaseFileToolMixin, BaseTool):  # type: ignore[override, override]
+    """Tool that searches for files in a subdirectory that match a regex pattern."""
+
+    name: str = "file_search"
+    args_schema: Type[BaseModel] = FileSearchInput
+    description: str = (
+        "Recursively search for files in a subdirectory that match the regex pattern"
+    )
+
+    def run(self, indexes: List[int], query: str) -> dict:
+        file_keys = []
+        print(self.__keys)
+        for index in indexes:
+            file_key = self.__keys[index]
+            file_keys.append(file_key)
+
+        files = FileService.search_in_files(query=query, file_keys=file_keys)
+        return files
+
+
+"""
+    def _run(
+        self,
+        pattern: str,
+        dir_path: str = ".",
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> str:
+        try:
+            dir_path_ = self.get_relative_path(dir_path)
+        except FileValidationError:
+            return INVALID_PATH_TEMPLATE.format(arg_name="dir_path", value=dir_path)
+        matches = []
+        try:
+            for root, _, filenames in os.walk(dir_path_):
+                for filename in fnmatch.filter(filenames, pattern):
+                    absolute_path = os.path.join(root, filename)
+                    relative_path = os.path.relpath(absolute_path, dir_path_)
+                    matches.append(relative_path)
+            if matches:
+                return "\n".join(matches)
+            else:
+                return f"No files found for pattern {pattern} in directory {dir_path}"
+        except Exception as e:
+            return "Error: " + str(e)
+
+    # TODO: Add aiofiles method
+"""

+ 7 - 3
app/providers/storage.py

@@ -19,7 +19,7 @@ class Storage:
             service_name="s3",
             aws_access_key_id=s3_settings.S3_ACCESS_KEY,
             aws_secret_access_key=s3_settings.S3_SECRET_KEY,
-            endpoint_url=s3_settings.S3_ENDPOINT,
+            # endpoint_url=s3_settings.S3_ENDPOINT,
             region_name=s3_settings.S3_REGION,
         )
 
@@ -27,7 +27,9 @@ class Storage:
         self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
 
     def save_from_path(self, filename, local_file_path):
-        self.client.upload_file(Filename=local_file_path, Bucket=self.bucket_name, Key=filename)
+        self.client.upload_file(
+            Filename=local_file_path, Bucket=self.bucket_name, Key=filename
+        )
 
     def load(self, filename: str, stream: bool = False) -> Union[bytes, Generator]:
         if stream:
@@ -38,7 +40,9 @@ class Storage:
     def load_once(self, filename: str) -> bytes:
         try:
             with closing(self.client) as client:
-                data = client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
+                data = client.get_object(Bucket=self.bucket_name, Key=filename)[
+                    "Body"
+                ].read()
         except ClientError as ex:
             if ex.response["Error"]["Code"] == "NoSuchKey":
                 raise ResourceNotFoundError("File not found")

+ 2 - 2
docker-compose.yml

@@ -52,7 +52,7 @@ services:
       #S3_ACCESS_KEY: minioadmin
       #S3_SECRET_KEY: minioadmin
       #S3_REGION: us-east-1
-      S3_ENDPOINT: None
+      #S3_ENDPOINT: None
       S3_BUCKET_NAME: ccrb
       S3_ACCESS_KEY: Q2SQw37HfolS7yeaR1Ndpy9Jl4E2YZKUuuy2muZR
       S3_SECRET_KEY: AKIATLPEDU37QV5CHLMH
@@ -119,7 +119,7 @@ services:
       #S3_ACCESS_KEY: minioadmin
       #S3_SECRET_KEY: minioadmin
       #S3_REGION: us-east-1
-      S3_ENDPOINT: None
+      #S3_ENDPOINT: None
       S3_BUCKET_NAME: ccrb
       S3_ACCESS_KEY: Q2SQw37HfolS7yeaR1Ndpy9Jl4E2YZKUuuy2muZR
       S3_SECRET_KEY: AKIATLPEDU37QV5CHLMH