jack 1 month ago
parent
commit
d6c0f78f7f

+ 1 - 0
py/core/parsers/structured/csv_parser.py

@@ -92,6 +92,7 @@ class CSVParserAdvanced(AsyncParser[str | bytes]):
 
         chunk_rows = []
         for row_num, row in enumerate(csv_reader):
+            print(row)
             chunk_rows.append(row)
             if row_num % num_rows == 0:
                 yield (

+ 2 - 2
py/core/providers/ingestion/unstructured/base.py

@@ -98,7 +98,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
         DocumentType.XLSX: [parsers.XLSXParser],  # type: ignore
         #DocumentType.DOC: [parsers.DOCParser],  # type: ignore
         DocumentType.PPT: [parsers.PPTParser],  # type: ignore
-        #DocumentType.CSV: [parsers.CSVParser],  # type: ignore
+        DocumentType.CSV: [parsers.CSVParserAdvanced],  # type: ignore
     }
 
     EXTRA_PARSERS = {
@@ -176,7 +176,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
 
         self.parsers: dict[DocumentType, AsyncParser] = {}
         self._initialize_parsers()
-
+ 
     def _initialize_parsers(self):
         for doc_type, parsers in self.R2R_FALLBACK_PARSERS.items():
             for parser in parsers: