fix dataset.read_csv

6 years ago · 941b88f26b
--- a/fastNLP/core/dataset.py
+++ b/fastNLP/core/dataset.py
@@ -304,23 +304,18 @@ class DataSet(object):
        with open(csv_path, 'r') as f:
            start_idx = 0
            if headers is None:
                headers = f.readline()
                headers = f.readline().rstrip('\r\n')
                headers = headers.split(sep)
                start_idx += 1
            else:
                assert isinstance(headers, list), "headers should be list, not {}.".format(type(headers))
                assert isinstance(headers, (list, tuple)), "headers should be list or tuple, not {}.".format(type(headers))
            _dict = {}
            for col in headers:
                _dict[col] = []
            for line_idx, line in enumerate(f, start_idx):
                contents = line.split(sep)
                if len(contents)!=len(headers):
                    if dropna:
                        continue
                    else:
                        #TODO change error type
                        raise ValueError("Line {} has {} parts, while header has {} parts."\
                            .format(line_idx, len(contents), len(headers)))
                contents = line.rstrip('\r\n').split(sep)
                assert len(contents)==len(headers), "Line {} has {} parts, while header has {}."\
                    .format(line_idx, len(contents), len(headers))
                for header, content in zip(headers, contents):
                    _dict[header].append(content)
        return cls(_dict)
        return cls(_dict)