|
@@ -302,10 +302,10 @@ class UnstructuredIngestionProvider(IngestionProvider):
|
|
|
|
|
|
|
|
t0 = time.time()
|
|
t0 = time.time()
|
|
|
parser_overrides = ingestion_config_override.get(
|
|
parser_overrides = ingestion_config_override.get(
|
|
|
- "parser_overrides", {"pdf": "ocr"}
|
|
|
|
|
|
|
+ "parser_overrides", {}
|
|
|
)
|
|
)
|
|
|
elements = []
|
|
elements = []
|
|
|
- parser_overrides = {"pdf": "unstructured"}
|
|
|
|
|
|
|
+ #parser_overrides = {"pdf": "unstructured"}
|
|
|
|
|
|
|
|
# TODO - Cleanup this approach to be less hardcoded
|
|
# TODO - Cleanup this approach to be less hardcoded
|
|
|
# TODO - Remove code duplication between Unstructured & R2R
|
|
# TODO - Remove code duplication between Unstructured & R2R
|
|
@@ -315,7 +315,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
|
|
|
logger.info(f"R2R fallback parsers is: {document.document_type.value in self.EXTRA_PARSERS.keys()}")
|
|
logger.info(f"R2R fallback parsers is: {document.document_type.value in self.EXTRA_PARSERS.keys()}")
|
|
|
logger.info(f"R2R fallback parsers is: {document.document_type.value in parser_overrides}")
|
|
logger.info(f"R2R fallback parsers is: {document.document_type.value in parser_overrides}")
|
|
|
#if document.document_type.value in parser_overrides:
|
|
#if document.document_type.value in parser_overrides:
|
|
|
- if document.document_type.value in self.EXTRA_PARSERS.keys():
|
|
|
|
|
|
|
+ if document.document_type.value in parser_overrides:
|
|
|
logger.info(
|
|
logger.info(
|
|
|
f"Using parser_override for {document.document_type} with input value {parser_overrides[document.document_type.value]}"
|
|
f"Using parser_override for {document.document_type} with input value {parser_overrides[document.document_type.value]}"
|
|
|
)
|
|
)
|