|
|
@@ -304,6 +304,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
|
|
|
"parser_overrides", {"pdf": "ocr"}
|
|
|
)
|
|
|
elements = []
|
|
|
+ parser_overrides = {"pdf": "ocr"}
|
|
|
|
|
|
# TODO - Cleanup this approach to be less hardcoded
|
|
|
# TODO - Remove code duplication between Unstructured & R2R
|
|
|
@@ -311,6 +312,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
|
|
|
logger.info(f"R2R fallback parsers is: {document.document_type.value}")
|
|
|
logger.info(f"R2R fallback parsers is: {self.EXTRA_PARSERS.keys()}")
|
|
|
logger.info(f"R2R fallback parsers is: {document.document_type.value in self.EXTRA_PARSERS.keys()}")
|
|
|
+ logger.info(f"R2R fallback parsers is: {document.document_type.value in parser_overrides}")
|
|
|
#if document.document_type.value in parser_overrides:
|
|
|
if document.document_type.value in self.EXTRA_PARSERS.keys():
|
|
|
logger.info(
|