Browse Source

fix dataset.read_csv

tags/v0.2.0
yunfan 6 years ago
parent
commit
941b88f26b
1 changed files with 6 additions and 11 deletions
  1. +6
    -11
      fastNLP/core/dataset.py

+ 6
- 11
fastNLP/core/dataset.py View File

@@ -304,23 +304,18 @@ class DataSet(object):
with open(csv_path, 'r') as f:
start_idx = 0
if headers is None:
headers = f.readline()
headers = f.readline().rstrip('\r\n')
headers = headers.split(sep)
start_idx += 1
else:
assert isinstance(headers, list), "headers should be list, not {}.".format(type(headers))
assert isinstance(headers, (list, tuple)), "headers should be list or tuple, not {}.".format(type(headers))
_dict = {}
for col in headers:
_dict[col] = []
for line_idx, line in enumerate(f, start_idx):
contents = line.split(sep)
if len(contents)!=len(headers):
if dropna:
continue
else:
#TODO change error type
raise ValueError("Line {} has {} parts, while header has {} parts."\
.format(line_idx, len(contents), len(headers)))
contents = line.rstrip('\r\n').split(sep)
assert len(contents)==len(headers), "Line {} has {} parts, while header has {}."\
.format(line_idx, len(contents), len(headers))
for header, content in zip(headers, contents):
_dict[header].append(content)
return cls(_dict)
return cls(_dict)

Loading…
Cancel
Save