|
@@ -101,13 +101,13 @@ class UnstructuredIngestionProvider(IngestionProvider):
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
EXTRA_PARSERS = {
|
|
EXTRA_PARSERS = {
|
|
|
- DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced}, # type: ignore
|
|
|
|
|
|
|
+ #DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced}, # type: ignore
|
|
|
DocumentType.PDF: {
|
|
DocumentType.PDF: {
|
|
|
"ocr": parsers.OCRPDFParser, # type: ignore
|
|
"ocr": parsers.OCRPDFParser, # type: ignore
|
|
|
"unstructured": parsers.VLMPDFParser, # type: ignore
|
|
"unstructured": parsers.VLMPDFParser, # type: ignore
|
|
|
"zerox": parsers.VLMPDFParser, # type: ignore
|
|
"zerox": parsers.VLMPDFParser, # type: ignore
|
|
|
},
|
|
},
|
|
|
- DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced}, # type: ignore
|
|
|
|
|
|
|
+ #DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced}, # type: ignore
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
IMAGE_TYPES = {
|
|
IMAGE_TYPES = {
|