jack 1 week ago
parent
commit
1d01ac61a4
1 changed files with 22 additions and 5 deletions
  1. 22 5
      app/services/file/impl/r2r_file.py

+ 22 - 5
app/services/file/impl/r2r_file.py

@@ -6,7 +6,7 @@ import aiofiles
 import aiofiles.os
 import aiofiles.os
 from fastapi import UploadFile
 from fastapi import UploadFile
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.ext.asyncio import AsyncSession
-
+import os
 from app.models import File
 from app.models import File
 from app.providers.r2r import R2R
 from app.providers.r2r import R2R
 from app.providers.storage import storage
 from app.providers.storage import storage
@@ -15,9 +15,10 @@ from app.services.file.impl.oss_file import OSSFileService
 # import asyncio
 # import asyncio
 from pathlib import Path
 from pathlib import Path
 
 
+allowed_formats = [".bmp", ".heic", ".jpeg", ".png", ".tiff"]
 
 
-class R2RFileService(OSSFileService):
 
 
+class R2RFileService(OSSFileService):
     @staticmethod
     @staticmethod
     async def create_file(
     async def create_file(
         *, session: AsyncSession, purpose: str, file: UploadFile
         *, session: AsyncSession, purpose: str, file: UploadFile
@@ -131,10 +132,26 @@ class R2RFileService(OSSFileService):
             return files
             return files
 
 
         for doc in search_results:
         for doc in search_results:
+            file_extension = os.path.splitext(doc.metadata["title"])[1].lower()
+            if file_extension in allowed_formats:
+                files.append(
+                    {
+                        "id": str(doc.id),
+                        "text": doc.text,
+                        "title": doc.metadata["title"],
+                        "url": "https://r2r.s3.cn-north-1.amazonaws.com.cn/r2r/documents/"
+                        + doc.document_id,
+                    }
+                )
             # print(doc.metadata)
             # print(doc.metadata)
-            files.append(
-                {"id": str(doc.id), "text": doc.text, "title": doc.metadata["title"]}
-            )
+            else:
+                files.append(
+                    {
+                        "id": str(doc.id),
+                        "text": doc.text,
+                        "title": doc.metadata["title"],
+                    }
+                )
             # file_key = doc.metadata.file_key
             # file_key = doc.metadata.file_key
             # file_key = doc.metadata.title if file_key is None else file_key
             # file_key = doc.metadata.title if file_key is None else file_key
             # text = doc.text
             # text = doc.text