|
|
@@ -102,13 +102,13 @@ class UnstructuredIngestionProvider(IngestionProvider):
|
|
|
}
|
|
|
|
|
|
EXTRA_PARSERS = {
|
|
|
- DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced}, # type: ignore
|
|
|
- DocumentType.PDF: {
|
|
|
- "ocr": parsers.OCRPDFParser, # type: ignore
|
|
|
- "unstructured": parsers.PDFParserUnstructured, # type: ignore
|
|
|
- "zerox": parsers.VLMPDFParser, # type: ignore
|
|
|
- },
|
|
|
- DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced}, # type: ignore
|
|
|
+ #DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced}, # type: ignore
|
|
|
+ #DocumentType.PDF: {
|
|
|
+ # "ocr": parsers.OCRPDFParser, # type: ignore
|
|
|
+ # "unstructured": parsers.PDFParserUnstructured, # type: ignore
|
|
|
+ # "zerox": parsers.VLMPDFParser, # type: ignore
|
|
|
+ #},
|
|
|
+ #DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced}, # type: ignore
|
|
|
}
|
|
|
|
|
|
IMAGE_TYPES = {
|