|
|
@@ -32,7 +32,6 @@ class CSVParser(AsyncParser[str | bytes]):
|
|
|
self, data: str | bytes, *args, **kwargs
|
|
|
) -> AsyncGenerator[str, None]:
|
|
|
"""Ingest CSV data and yield text from each row."""
|
|
|
- print(data)
|
|
|
if isinstance(data, bytes):
|
|
|
data = data.decode("utf-8")
|
|
|
csv_reader = self.csv.reader(self.StringIO(data))
|
|
|
@@ -80,8 +79,20 @@ class CSVParserAdvanced(AsyncParser[str | bytes]):
|
|
|
**kwargs,
|
|
|
) -> AsyncGenerator[str, None]:
|
|
|
"""Ingest CSV data and yield text from each row."""
|
|
|
+ print(data)
|
|
|
if isinstance(data, bytes):
|
|
|
- data = data.decode("utf-8")
|
|
|
+ try:
|
|
|
+ data = data.decode("utf-8")
|
|
|
+ except UnicodeDecodeError:
|
|
|
+ # 尝试其他常见编码
|
|
|
+ for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
|
|
|
+ try:
|
|
|
+ data = data.decode(encoding)
|
|
|
+ break
|
|
|
+ except UnicodeDecodeError:
|
|
|
+ continue
|
|
|
+ else:
|
|
|
+ raise ValueError("Unable to decode the provided byte data with any supported encoding")
|
|
|
# let the first row be the header
|
|
|
delimiter = self.get_delimiter(file=self.StringIO(data))
|
|
|
|