@@ -0,0 +1,75 @@ | |||||
from fastNLP.io.embed_loader import EmbeddingOption, EmbedLoader | |||||
from fastNLP.core.vocabulary import VocabularyOption | |||||
from fastNLP.io.base_loader import DataSetLoader, DataInfo | |||||
from typing import Union, Dict, List, Iterator | |||||
from fastNLP import DataSet | |||||
from fastNLP import Instance | |||||
from fastNLP import Vocabulary | |||||
from fastNLP import Const | |||||
from reproduction.utils import check_dataloader_paths | |||||
from functools import partial | |||||
class MTL16Loader(DataSetLoader): | |||||
""" | |||||
读取MTL16数据集,DataSet包含以下fields: | |||||
words: list(str), 需要分类的文本 | |||||
target: str, 文本的标签 | |||||
数据来源:https://pan.baidu.com/s/1c2L6vdA | |||||
""" | |||||
def __init__(self): | |||||
super(MTL16Loader, self).__init__() | |||||
def _load(self, path): | |||||
dataset = DataSet() | |||||
with open(path, 'r', encoding="utf-8") as f: | |||||
for line in f: | |||||
line = line.strip() | |||||
if not line: | |||||
continue | |||||
parts = line.split('\t') | |||||
target = parts[0] | |||||
words = parts[1].split() | |||||
dataset.append(Instance(words=words, target=target)) | |||||
if len(dataset)==0: | |||||
raise RuntimeError(f"{path} has no valid data.") | |||||
return dataset | |||||
def process(self, | |||||
paths: Union[str, Dict[str, str]], | |||||
src_vocab_opt: VocabularyOption = None, | |||||
tgt_vocab_opt: VocabularyOption = None, | |||||
src_embed_opt: EmbeddingOption = None): | |||||
paths = check_dataloader_paths(paths) | |||||
datasets = {} | |||||
info = DataInfo() | |||||
for name, path in paths.items(): | |||||
dataset = self.load(path) | |||||
datasets[name] = dataset | |||||
src_vocab = Vocabulary() if src_vocab_opt is None else Vocabulary(**src_vocab_opt) | |||||
src_vocab.from_dataset(datasets['train'], field_name='words') | |||||
src_vocab.index_dataset(*datasets.values(), field_name='words') | |||||
tgt_vocab = Vocabulary(unknown=None, padding=None) \ | |||||
if tgt_vocab_opt is None else Vocabulary(**tgt_vocab_opt) | |||||
tgt_vocab.from_dataset(datasets['train'], field_name='target') | |||||
tgt_vocab.index_dataset(*datasets.values(), field_name='target') | |||||
info.vocabs = { | |||||
"words": src_vocab, | |||||
"target": tgt_vocab | |||||
} | |||||
info.datasets = datasets | |||||
if src_embed_opt is not None: | |||||
embed = EmbedLoader.load_with_vocab(**src_embed_opt, vocab=src_vocab) | |||||
info.embeddings['words'] = embed | |||||
return info |
@@ -0,0 +1,10 @@ | |||||
1 the only thing better than these sunglasses is the customer service i got , after i dropped and broke the lenses on these i called 80 's purple and they actually sent me out a replacement free of charge . i was blown away | |||||
0 this light worked for one day . i should have known better because in the past , i bought a tap light , and it worked for only a few days , too . do n't waste your money | |||||
1 i 've tried 6 different nursing bras . this one , with the center snap closure , is the easiest to use . it is also the lightest and most comfortable , while providing good support . my only complaint is that after about 50 washes the underwire begins to poke free from the fabric . even when i try to sew it back into place , it breaks loose after a few washes . perhaps if i handwashed the bra instead of using a machine , it would last longer . this bra is less durabe than my other nursing bras ( particularly the leading lady bra , which seems to be indestructible ) , but it is well worth the sacrifice for comfort , lightness , and ease of use . it is by far my favorite | |||||
0 i have had my bag for a couple of months . the liner on the inside has already ripped | |||||
0 the photo is quite deceiving . this suit is made out of cheap polyester fabric that looks cheap , shiny , and is horrible to the touch . my three year olds hate the uncomfortable stiffness . spend the extra money for a decent fabric that is actually practical for a toddler if they really need a suit | |||||
1 i had bought a bra of this model at a discount store , just got lucky . it quickly became my favorite , and i was glad to find it at amazon . | |||||
0 lookslike it would be a nice product , but it 's only for very small babies up to 12 pounds and 23 inches . my baby is very long and just does n't fit - wish target/amazon would have been more upfront with the sizing | |||||
0 i purchased the non-premium kit ( $ 9.99 ) with a silicone skin case cover and 2 screen protectors ( one for each screen ) , but it is the same case . the problem is that the silicone skin cover is slippery , twice as slippery as the nintendo lite without the cover . we thought that washing them in dove dish soap would wash away the slipperyness , but that did n't work . after handling the cover , your hands have a slippery residue on them . the other issue is that the cover is so thin that it is little more than scratch protection , not impact protection . the screen covers that come with the non-premium kit are ok , i guess , but one of them had 2 defect particles that were raised ( trust me , the screen was clean ) . i purchased 2 kits , and i had one screen protector defect and my wife accidentally broke one of the silicone covers hinge straps with little effort . i do not recommend this product at all | |||||
1 good quality jeans at an affordable price . size is just right , quite comfortable | |||||
0 not the best fabric , scratchy and see thru . you get what you pay for on these |
@@ -0,0 +1,10 @@ | |||||
import unittest | |||||
from reproduction.text_classification.data.MTL16Loader import MTL16Loader | |||||
class TestDataLoader(unittest.TestCase): | |||||
def test_MTL16Loader(self): | |||||
loader = MTL16Loader() | |||||
data = loader.process('sample_MTL16.txt') | |||||
print(data.datasets) | |||||