|
@@ -301,11 +301,11 @@ class UnstructuredIngestionProvider(IngestionProvider):
|
|
|
ingestion_config.pop("excluded_parsers", None)
|
|
ingestion_config.pop("excluded_parsers", None)
|
|
|
|
|
|
|
|
t0 = time.time()
|
|
t0 = time.time()
|
|
|
- parser_overrides = ingestion_config_override.get(
|
|
|
|
|
- "parser_overrides", {"pdf": "ocr"}
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ #parser_overrides = ingestion_config_override.get(
|
|
|
|
|
+ # "parser_overrides", {"pdf": "ocr"}
|
|
|
|
|
+ #)
|
|
|
elements = []
|
|
elements = []
|
|
|
- parser_overrides = {"pdf": "ocr"}
|
|
|
|
|
|
|
+ #parser_overrides = {"pdf": "ocr"}
|
|
|
|
|
|
|
|
# TODO - Cleanup this approach to be less hardcoded
|
|
# TODO - Cleanup this approach to be less hardcoded
|
|
|
# TODO - Remove code duplication between Unstructured & R2R
|
|
# TODO - Remove code duplication between Unstructured & R2R
|