|
@@ -213,13 +213,37 @@ class R2RFileService(OSSFileService):
|
|
|
print("list_chunkslist_chunkslist_chunkslist_chunkslist_chunkslist_chunks")
|
|
print("list_chunkslist_chunkslist_chunkslist_chunkslist_chunkslist_chunks")
|
|
|
list_results = r2r.list_chunks(ids=ids)
|
|
list_results = r2r.list_chunks(ids=ids)
|
|
|
print(list_results)
|
|
print(list_results)
|
|
|
- files = {}
|
|
|
|
|
|
|
+ files = []
|
|
|
|
|
+
|
|
|
|
|
+ for doc in list_results:
|
|
|
|
|
+ file_extension = os.path.splitext(doc.metadata["title"])[1].lower()
|
|
|
|
|
+ if file_extension in allowed_formats:
|
|
|
|
|
+ files.append(
|
|
|
|
|
+ {
|
|
|
|
|
+ "id": str(doc.id),
|
|
|
|
|
+ "text": doc.text,
|
|
|
|
|
+ "title": doc.metadata["title"],
|
|
|
|
|
+ "url": "https://r2r.s3.cn-north-1.amazonaws.com.cn/r2r/documents/"
|
|
|
|
|
+ + str(doc.document_id),
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
+ # print(doc.metadata)
|
|
|
|
|
+ else:
|
|
|
|
|
+ files.append(
|
|
|
|
|
+ {
|
|
|
|
|
+ "id": str(doc.id),
|
|
|
|
|
+ "text": doc.text,
|
|
|
|
|
+ "title": doc.metadata["title"],
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
+ '''
|
|
|
for doc in list_results:
|
|
for doc in list_results:
|
|
|
text = doc.text
|
|
text = doc.text
|
|
|
if "text" in files:
|
|
if "text" in files:
|
|
|
files["text"] += f"\n\n{text}"
|
|
files["text"] += f"\n\n{text}"
|
|
|
else:
|
|
else:
|
|
|
files["text"] = text
|
|
files["text"] = text
|
|
|
|
|
+ '''
|
|
|
print(files)
|
|
print(files)
|
|
|
return files
|
|
return files
|
|
|
else:
|
|
else:
|