jack 1 settimana fa
parent
commit
60f37280af
1 ha cambiato i file con 25 aggiunte e 1 eliminazioni
  1. 25 1
      app/services/file/impl/r2r_file.py

+ 25 - 1
app/services/file/impl/r2r_file.py

@@ -213,13 +213,37 @@ class R2RFileService(OSSFileService):
             print("list_chunkslist_chunkslist_chunkslist_chunkslist_chunkslist_chunks")
             list_results = r2r.list_chunks(ids=ids)
             print(list_results)
-            files = {}
+            files = []
+
+            for doc in list_results:
+                file_extension = os.path.splitext(doc.metadata["title"])[1].lower()
+                if file_extension in allowed_formats:
+                    files.append(
+                        {
+                            "id": str(doc.id),
+                            "text": doc.text,
+                            "title": doc.metadata["title"],
+                            "url": "https://r2r.s3.cn-north-1.amazonaws.com.cn/r2r/documents/"
+                            + str(doc.document_id),
+                        }
+                    )
+                # print(doc.metadata)
+                else:
+                    files.append(
+                        {
+                            "id": str(doc.id),
+                            "text": doc.text,
+                            "title": doc.metadata["title"],
+                        }
+                    )
+            '''
             for doc in list_results:
                 text = doc.text
                 if "text" in files:
                     files["text"] += f"\n\n{text}"
                 else:
                     files["text"] = text
+            '''
             print(files)
             return files
         else: