jack 1 месяц назад
Родитель
Сommit
6181b81a05
1 измененных файлов с 3 добавлено и 3 удалено
  1. 3 3
      py/core/providers/ingestion/unstructured/base.py

+ 3 - 3
py/core/providers/ingestion/unstructured/base.py

@@ -302,10 +302,10 @@ class UnstructuredIngestionProvider(IngestionProvider):
 
         t0 = time.time()
         parser_overrides = ingestion_config_override.get(
-            "parser_overrides", {"pdf": "ocr"}
+            "parser_overrides", {}
         )
         elements = []
-        parser_overrides = {"pdf": "unstructured"}
+        #parser_overrides = {"pdf": "unstructured"}
 
         # TODO - Cleanup this approach to be less hardcoded
         # TODO - Remove code duplication between Unstructured & R2R
@@ -315,7 +315,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
         logger.info(f"R2R fallback parsers is: {document.document_type.value in self.EXTRA_PARSERS.keys()}")
         logger.info(f"R2R fallback parsers is: {document.document_type.value in parser_overrides}")
         #if document.document_type.value in parser_overrides:
-        if document.document_type.value in self.EXTRA_PARSERS.keys():
+        if document.document_type.value in parser_overrides:
             logger.info(
                 f"Using parser_override for {document.document_type} with input value {parser_overrides[document.document_type.value]}"
             )