jack 3 달 전
부모
커밋
d6c0f78f7f
2개의 변경된 파일3개의 추가작업 그리고 2개의 파일을 삭제
  1. 1 0
      py/core/parsers/structured/csv_parser.py
  2. 2 2
      py/core/providers/ingestion/unstructured/base.py

+ 1 - 0
py/core/parsers/structured/csv_parser.py

@@ -92,6 +92,7 @@ class CSVParserAdvanced(AsyncParser[str | bytes]):
 
 
         chunk_rows = []
         chunk_rows = []
         for row_num, row in enumerate(csv_reader):
         for row_num, row in enumerate(csv_reader):
+            print(row)
             chunk_rows.append(row)
             chunk_rows.append(row)
             if row_num % num_rows == 0:
             if row_num % num_rows == 0:
                 yield (
                 yield (

+ 2 - 2
py/core/providers/ingestion/unstructured/base.py

@@ -98,7 +98,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
         DocumentType.XLSX: [parsers.XLSXParser],  # type: ignore
         DocumentType.XLSX: [parsers.XLSXParser],  # type: ignore
         #DocumentType.DOC: [parsers.DOCParser],  # type: ignore
         #DocumentType.DOC: [parsers.DOCParser],  # type: ignore
         DocumentType.PPT: [parsers.PPTParser],  # type: ignore
         DocumentType.PPT: [parsers.PPTParser],  # type: ignore
-        #DocumentType.CSV: [parsers.CSVParser],  # type: ignore
+        DocumentType.CSV: [parsers.CSVParserAdvanced],  # type: ignore
     }
     }
 
 
     EXTRA_PARSERS = {
     EXTRA_PARSERS = {
@@ -176,7 +176,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
 
 
         self.parsers: dict[DocumentType, AsyncParser] = {}
         self.parsers: dict[DocumentType, AsyncParser] = {}
         self._initialize_parsers()
         self._initialize_parsers()
-
+ 
     def _initialize_parsers(self):
     def _initialize_parsers(self):
         for doc_type, parsers in self.R2R_FALLBACK_PARSERS.items():
         for doc_type, parsers in self.R2R_FALLBACK_PARSERS.items():
             for parser in parsers:
             for parser in parsers: