jack 1 week ago
parent
commit
3383a7e452
1 changed files with 2 additions and 0 deletions
  1. 2 0
      py/core/providers/ingestion/unstructured/base.py

+ 2 - 0
py/core/providers/ingestion/unstructured/base.py

@@ -304,6 +304,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
             "parser_overrides", {"pdf": "ocr"}
         )
         elements = []
+        parser_overrides = {"pdf": "ocr"}
 
         # TODO - Cleanup this approach to be less hardcoded
         # TODO - Remove code duplication between Unstructured & R2R
@@ -311,6 +312,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
         logger.info(f"R2R fallback parsers is: {document.document_type.value}")
         logger.info(f"R2R fallback parsers is: {self.EXTRA_PARSERS.keys()}")
         logger.info(f"R2R fallback parsers is: {document.document_type.value in self.EXTRA_PARSERS.keys()}")
+        logger.info(f"R2R fallback parsers is: {document.document_type.value in parser_overrides}")
         #if document.document_type.value in parser_overrides:
         if document.document_type.value in self.EXTRA_PARSERS.keys():
             logger.info(