@@ -0,0 +1,75 @@ | |||
from fastNLP.io.embed_loader import EmbeddingOption, EmbedLoader | |||
from fastNLP.core.vocabulary import VocabularyOption | |||
from fastNLP.io.base_loader import DataSetLoader, DataInfo | |||
from typing import Union, Dict, List, Iterator | |||
from fastNLP import DataSet | |||
from fastNLP import Instance | |||
from fastNLP import Vocabulary | |||
from fastNLP import Const | |||
from reproduction.utils import check_dataloader_paths | |||
from functools import partial | |||
class MTL16Loader(DataSetLoader): | |||
""" | |||
读取MTL16数据集,DataSet包含以下fields: | |||
words: list(str), 需要分类的文本 | |||
target: str, 文本的标签 | |||
数据来源:https://pan.baidu.com/s/1c2L6vdA | |||
""" | |||
def __init__(self): | |||
super(MTL16Loader, self).__init__() | |||
def _load(self, path): | |||
dataset = DataSet() | |||
with open(path, 'r', encoding="utf-8") as f: | |||
for line in f: | |||
line = line.strip() | |||
if not line: | |||
continue | |||
parts = line.split('\t') | |||
target = parts[0] | |||
words = parts[1].split() | |||
dataset.append(Instance(words=words, target=target)) | |||
if len(dataset)==0: | |||
raise RuntimeError(f"{path} has no valid data.") | |||
return dataset | |||
def process(self, | |||
paths: Union[str, Dict[str, str]], | |||
src_vocab_opt: VocabularyOption = None, | |||
tgt_vocab_opt: VocabularyOption = None, | |||
src_embed_opt: EmbeddingOption = None): | |||
paths = check_dataloader_paths(paths) | |||
datasets = {} | |||
info = DataInfo() | |||
for name, path in paths.items(): | |||
dataset = self.load(path) | |||
datasets[name] = dataset | |||
src_vocab = Vocabulary() if src_vocab_opt is None else Vocabulary(**src_vocab_opt) | |||
src_vocab.from_dataset(datasets['train'], field_name='words') | |||
src_vocab.index_dataset(*datasets.values(), field_name='words') | |||
tgt_vocab = Vocabulary(unknown=None, padding=None) \ | |||
if tgt_vocab_opt is None else Vocabulary(**tgt_vocab_opt) | |||
tgt_vocab.from_dataset(datasets['train'], field_name='target') | |||
tgt_vocab.index_dataset(*datasets.values(), field_name='target') | |||
info.vocabs = { | |||
"words": src_vocab, | |||
"target": tgt_vocab | |||
} | |||
info.datasets = datasets | |||
if src_embed_opt is not None: | |||
embed = EmbedLoader.load_with_vocab(**src_embed_opt, vocab=src_vocab) | |||
info.embeddings['words'] = embed | |||
return info |
@@ -0,0 +1,10 @@ | |||
1 the only thing better than these sunglasses is the customer service i got , after i dropped and broke the lenses on these i called 80 's purple and they actually sent me out a replacement free of charge . i was blown away | |||
0 this light worked for one day . i should have known better because in the past , i bought a tap light , and it worked for only a few days , too . do n't waste your money | |||
1 i 've tried 6 different nursing bras . this one , with the center snap closure , is the easiest to use . it is also the lightest and most comfortable , while providing good support . my only complaint is that after about 50 washes the underwire begins to poke free from the fabric . even when i try to sew it back into place , it breaks loose after a few washes . perhaps if i handwashed the bra instead of using a machine , it would last longer . this bra is less durabe than my other nursing bras ( particularly the leading lady bra , which seems to be indestructible ) , but it is well worth the sacrifice for comfort , lightness , and ease of use . it is by far my favorite | |||
0 i have had my bag for a couple of months . the liner on the inside has already ripped | |||
0 the photo is quite deceiving . this suit is made out of cheap polyester fabric that looks cheap , shiny , and is horrible to the touch . my three year olds hate the uncomfortable stiffness . spend the extra money for a decent fabric that is actually practical for a toddler if they really need a suit | |||
1 i had bought a bra of this model at a discount store , just got lucky . it quickly became my favorite , and i was glad to find it at amazon . | |||
0 lookslike it would be a nice product , but it 's only for very small babies up to 12 pounds and 23 inches . my baby is very long and just does n't fit - wish target/amazon would have been more upfront with the sizing | |||
0 i purchased the non-premium kit ( $ 9.99 ) with a silicone skin case cover and 2 screen protectors ( one for each screen ) , but it is the same case . the problem is that the silicone skin cover is slippery , twice as slippery as the nintendo lite without the cover . we thought that washing them in dove dish soap would wash away the slipperyness , but that did n't work . after handling the cover , your hands have a slippery residue on them . the other issue is that the cover is so thin that it is little more than scratch protection , not impact protection . the screen covers that come with the non-premium kit are ok , i guess , but one of them had 2 defect particles that were raised ( trust me , the screen was clean ) . i purchased 2 kits , and i had one screen protector defect and my wife accidentally broke one of the silicone covers hinge straps with little effort . i do not recommend this product at all | |||
1 good quality jeans at an affordable price . size is just right , quite comfortable | |||
0 not the best fabric , scratchy and see thru . you get what you pay for on these |
@@ -0,0 +1,10 @@ | |||
import unittest | |||
from reproduction.text_classification.data.MTL16Loader import MTL16Loader | |||
class TestDataLoader(unittest.TestCase): | |||
def test_MTL16Loader(self): | |||
loader = MTL16Loader() | |||
data = loader.process('sample_MTL16.txt') | |||
print(data.datasets) | |||