jack 1 month ago
parent
commit
8d43e7b53f
1 changed files with 13 additions and 2 deletions
  1. 13 2
      py/core/parsers/structured/csv_parser.py

+ 13 - 2
py/core/parsers/structured/csv_parser.py

@@ -32,7 +32,6 @@ class CSVParser(AsyncParser[str | bytes]):
         self, data: str | bytes, *args, **kwargs
     ) -> AsyncGenerator[str, None]:
         """Ingest CSV data and yield text from each row."""
-        print(data)
         if isinstance(data, bytes):
             data = data.decode("utf-8")
         csv_reader = self.csv.reader(self.StringIO(data))
@@ -80,8 +79,20 @@ class CSVParserAdvanced(AsyncParser[str | bytes]):
         **kwargs,
     ) -> AsyncGenerator[str, None]:
         """Ingest CSV data and yield text from each row."""
+        print(data)
         if isinstance(data, bytes):
-            data = data.decode("utf-8")
+            try:
+                data = data.decode("utf-8")
+            except UnicodeDecodeError:
+                # 尝试其他常见编码
+                for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
+                    try:
+                        data = data.decode(encoding)
+                        break
+                    except UnicodeDecodeError:
+                        continue
+                else:
+                    raise ValueError("Unable to decode the provided byte data with any supported encoding")
         # let the first row be the header
         delimiter = self.get_delimiter(file=self.StringIO(data))