|
@@ -103,11 +103,11 @@ class UnstructuredIngestionProvider(IngestionProvider):
|
|
|
|
|
|
|
|
EXTRA_PARSERS = {
|
|
EXTRA_PARSERS = {
|
|
|
DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced}, # type: ignore
|
|
DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced}, # type: ignore
|
|
|
- #DocumentType.PDF: {
|
|
|
|
|
- # "ocr": parsers.OCRPDFParser, # type: ignore
|
|
|
|
|
- # "unstructured": parsers.VLMPDFParser, # type: ignore
|
|
|
|
|
- # "zerox": parsers.VLMPDFParser, # type: ignore
|
|
|
|
|
- #},
|
|
|
|
|
|
|
+ DocumentType.PDF: {
|
|
|
|
|
+ "ocr": parsers.OCRPDFParser, # type: ignore
|
|
|
|
|
+ "unstructured": parsers.PDFParserUnstructured, # type: ignore
|
|
|
|
|
+ "zerox": parsers.VLMPDFParser, # type: ignore
|
|
|
|
|
+ },
|
|
|
DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced}, # type: ignore
|
|
DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced}, # type: ignore
|
|
|
}
|
|
}
|
|
|
|
|
|