From 941b88f26b6b36c34a4968d1289c18a38a796a7e Mon Sep 17 00:00:00 2001 From: yunfan Date: Mon, 26 Nov 2018 22:01:57 +0800 Subject: [PATCH] fix dataset.read_csv --- fastNLP/core/dataset.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/fastNLP/core/dataset.py b/fastNLP/core/dataset.py index e2a990ca..4fea967a 100644 --- a/fastNLP/core/dataset.py +++ b/fastNLP/core/dataset.py @@ -304,23 +304,18 @@ class DataSet(object): with open(csv_path, 'r') as f: start_idx = 0 if headers is None: - headers = f.readline() + headers = f.readline().rstrip('\r\n') headers = headers.split(sep) start_idx += 1 else: - assert isinstance(headers, list), "headers should be list, not {}.".format(type(headers)) + assert isinstance(headers, (list, tuple)), "headers should be list or tuple, not {}.".format(type(headers)) _dict = {} for col in headers: _dict[col] = [] for line_idx, line in enumerate(f, start_idx): - contents = line.split(sep) - if len(contents)!=len(headers): - if dropna: - continue - else: - #TODO change error type - raise ValueError("Line {} has {} parts, while header has {} parts."\ - .format(line_idx, len(contents), len(headers))) + contents = line.rstrip('\r\n').split(sep) + assert len(contents)==len(headers), "Line {} has {} parts, while header has {}."\ + .format(line_idx, len(contents), len(headers)) for header, content in zip(headers, contents): _dict[header].append(content) - return cls(_dict) \ No newline at end of file + return cls(_dict)