jack 2 mesiacov pred
rodič
commit
0c4182bf2d

+ 5 - 5
py/core/providers/ingestion/unstructured/base.py

@@ -103,11 +103,11 @@ class UnstructuredIngestionProvider(IngestionProvider):
 
     EXTRA_PARSERS = {
         DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced},  # type: ignore
-        #DocumentType.PDF: {
-        #    "ocr": parsers.OCRPDFParser,  # type: ignore
-        #    "unstructured": parsers.VLMPDFParser,  # type: ignore
-        #    "zerox": parsers.VLMPDFParser,  # type: ignore
-        #},
+        DocumentType.PDF: {
+            "ocr": parsers.OCRPDFParser,  # type: ignore
+            "unstructured": parsers.PDFParserUnstructured,  # type: ignore
+            "zerox": parsers.VLMPDFParser,  # type: ignore
+        },
         DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced},  # type: ignore
     }