From 53bcc0b26a9b4e5560946ef2a4b7134bc589a7e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=A6=E4=B9=A0=E7=9A=84=E8=8F=9C=E9=B8=A1=E7=BA=A2?= =?UTF-8?q?=E7=91=9E?= Date: Sun, 8 Sep 2019 16:28:15 +0800 Subject: [PATCH] loader-pr --- fastNLP/io/file_reader.py | 40 +++++++++++++++++++-------------------- fastNLP/io/test.csv | 6 ++++++ 2 files changed, 26 insertions(+), 20 deletions(-) create mode 100644 fastNLP/io/test.csv diff --git a/fastNLP/io/file_reader.py b/fastNLP/io/file_reader.py index 0ae0a319..17a0a6ca 100644 --- a/fastNLP/io/file_reader.py +++ b/fastNLP/io/file_reader.py @@ -2,6 +2,7 @@ 此模块用于给其它模块提供读取文件的函数,没有为用户提供 API """ import json +import csv def _read_csv(path, encoding='utf-8', headers=None, sep=',', dropna=True): @@ -16,27 +17,26 @@ def _read_csv(path, encoding='utf-8', headers=None, sep=',', dropna=True): :if False, raise ValueError when reading invalid data. default: True :return: generator, every time yield (line number, csv item) """ - with open(path, 'r', encoding=encoding) as f: - start_idx = 0 - if headers is None: - headers = f.readline().rstrip('\r\n') - headers = headers.split(sep) - start_idx += 1 - elif not isinstance(headers, (list, tuple)): - raise TypeError("headers should be list or tuple, not {}." \ + f = csv.reader(open(path, encoding=encoding), delimiter=sep) + start_idx = 0 + if headers is None: + headers = next(f) + start_idx += 1 + elif not isinstance(headers, (list, tuple)): + raise TypeError("headers should be list or tuple, not {}." \ .format(type(headers))) - for line_idx, line in enumerate(f, start_idx): - contents = line.rstrip('\r\n').split(sep) - if len(contents) != len(headers): - if dropna: - continue - else: - raise ValueError("Line {} has {} parts, while header has {} parts." \ - .format(line_idx, len(contents), len(headers))) - _dict = {} - for header, content in zip(headers, contents): - _dict[header] = content - yield line_idx, _dict + for line_idx, line in enumerate(f, start_idx): + contents = line + if len(contents) != len(headers): + if dropna: + continue + else: + raise ValueError("Line {} has {} parts, while header has {} parts." \ + .format(line_idx, len(contents), len(headers))) + _dict = {} + for header, content in zip(headers, contents): + _dict[header] = content + yield line_idx, _dict def _read_json(path, encoding='utf-8', fields=None, dropna=True): diff --git a/fastNLP/io/test.csv b/fastNLP/io/test.csv new file mode 100644 index 00000000..88293b2f --- /dev/null +++ b/fastNLP/io/test.csv @@ -0,0 +1,6 @@ +a b +1 "Contrary to other reviews, I have zero complaints about the service or the prices. I have been getting tire service here for the past 5 years now, and compared to my experience with places like Pep Boys, these guys are experienced and know what they're doing. \nAlso, this is one place that I do not feel like I am being taken advantage of, just because of my gender. Other auto mechanics have been notorious for capitalizing on my ignorance of cars, and have sucked my bank account dry. But here, my service and road coverage has all been well explained - and let up to me to decide. \nAnd they just renovated the waiting room. It looks a lot better than it did in previous years." +2 "Last summer I had an appointment to get new tires and had to wait a super long time. I also went in this week for them to fix a minor problem with a tire they put on. They \""fixed\"" it for free, and the very next morning I had the same issue. I called to complain, and the \""manager\"" didn't even apologize!!! So frustrated. Never going back. They seem overpriced, too." +3 "Friendly staff, same starbucks fair you get anywhere else. Sometimes the lines can get long." +4 "The food is good. Unfortunately the service is very hit or miss. The main issue seems to be with the kitchen, the waiters and waitresses are often very apologetic for the long waits and it's pretty obvious that some of them avoid the tables after taking the initial order to avoid hearing complaints." +5 "Even when we didn't have a car Filene's Basement was worth the bus trip to the Waterfront. I always find something (usually I find 3-4 things and spend about $60) and better still, I am always still wearing the clothes and shoes 3 months later. \n\nI kind of suspect this is the best shopping in Pittsburgh; it's much better than the usual department stores, better than Marshall's and TJ Maxx and better than the Saks downtown, even when it has a sale. Selection, bargains AND quality.\n\nI like this Filene's better than Gabriel Brothers, which are harder to get to. Gabriel Brothers are a real discount shopper's challenge and I'm afraid I didn't live in Pittsburgh long enough to develop the necessary skills . . . Filene's was still up and running in June 2007 when I left town." \ No newline at end of file