|
|
@@ -304,23 +304,18 @@ class DataSet(object): |
|
|
|
with open(csv_path, 'r') as f: |
|
|
|
start_idx = 0 |
|
|
|
if headers is None: |
|
|
|
headers = f.readline() |
|
|
|
headers = f.readline().rstrip('\r\n') |
|
|
|
headers = headers.split(sep) |
|
|
|
start_idx += 1 |
|
|
|
else: |
|
|
|
assert isinstance(headers, list), "headers should be list, not {}.".format(type(headers)) |
|
|
|
assert isinstance(headers, (list, tuple)), "headers should be list or tuple, not {}.".format(type(headers)) |
|
|
|
_dict = {} |
|
|
|
for col in headers: |
|
|
|
_dict[col] = [] |
|
|
|
for line_idx, line in enumerate(f, start_idx): |
|
|
|
contents = line.split(sep) |
|
|
|
if len(contents)!=len(headers): |
|
|
|
if dropna: |
|
|
|
continue |
|
|
|
else: |
|
|
|
#TODO change error type |
|
|
|
raise ValueError("Line {} has {} parts, while header has {} parts."\ |
|
|
|
.format(line_idx, len(contents), len(headers))) |
|
|
|
contents = line.rstrip('\r\n').split(sep) |
|
|
|
assert len(contents)==len(headers), "Line {} has {} parts, while header has {}."\ |
|
|
|
.format(line_idx, len(contents), len(headers)) |
|
|
|
for header, content in zip(headers, contents): |
|
|
|
_dict[header].append(content) |
|
|
|
return cls(_dict) |
|
|
|
return cls(_dict) |