jack 1 일 전
부모
커밋
774b31a845
1개의 변경된 파일9개의 추가작업 그리고 7개의 파일을 삭제
  1. 9 7
      py/core/providers/ingestion/unstructured/base.py

+ 9 - 7
py/core/providers/ingestion/unstructured/base.py

@@ -308,7 +308,8 @@ class UnstructuredIngestionProvider(IngestionProvider):
         # TODO - Remove code duplication between Unstructured & R2R
         logger.info(f"Parser overrides: {parser_overrides}")
         logger.info(f"R2R fallback parsers is: {document.document_type.value in parser_overrides or document.document_type.value in self.EXTRA_PARSERS.keys()}")
-        if document.document_type.value in parser_overrides or document.document_type.value in self.EXTRA_PARSERS.keys(): ##  or document.document_type.value in self.EXTRA_PARSERS.keys()
+        if document.document_type.value in self.EXTRA_PARSERS.keys():
+            '''
             logger.info(
                 f"Using parser_override for {document.document_type} with input value {parser_overrides[document.document_type.value]}"
             )
@@ -332,16 +333,17 @@ class UnstructuredIngestionProvider(IngestionProvider):
                         f"Using OCR parser_override for {document.document_type}"
                     )
                     elements.append(element)
-            else:
-                async for element in self.parse_fallback(
+            '''
+
+            async for element in self.parse_fallback(
                     file_content,
                     ingestion_config=ingestion_config,
                     parser_name=f"ocr_{DocumentType.PDF.value}",
-                ):
-                    logger.warning(
+            ):
+                logger.warning(
                         f"Using OCR parser_override for {document.document_type}"
-                    )
-                    elements.append(element)
+                )
+                elements.append(element)
 
         elif document.document_type in self.R2R_FALLBACK_PARSERS.keys():
             logger.info(