@@ -201,19 +201,6 @@ class TorchLoaderIter(BatchIter): | |||
self.batch_size = dataset.batch_size | |||
class OnlineDataGettter: | |||
# TODO | |||
pass | |||
class OnlineDataIter(BatchIter): | |||
# TODO | |||
def __init__(self, dataset, batch_size=1, buffer_size=10000, sampler=None, as_numpy=False, | |||
num_workers=0, pin_memory=False, drop_last=False, | |||
timeout=0, worker_init_fn=None, **kwargs): | |||
super().__init__() | |||
def _to_tensor(batch, field_dtype): | |||
""" | |||
@@ -1,6 +1,5 @@ | |||
""" | |||
.. todo:: | |||
doc | |||
r""" | |||
fastNLP包当中的field命名均符合一定的规范,该规范由fastNLP.Const类进行定义。 | |||
""" | |||
__all__ = [ | |||
@@ -50,11 +49,13 @@ class Const: | |||
@staticmethod | |||
def RAW_WORDS(i): | |||
"""得到第 i 个 ``RAW_WORDS`` 的命名""" | |||
i = int(i) + 1 | |||
return Const.RAW_WORD + str(i) | |||
@staticmethod | |||
def RAW_CHARS(i): | |||
"""得到第 i 个 ``RAW_CHARS`` 的命名""" | |||
i = int(i) + 1 | |||
return Const.RAW_CHAR + str(i) | |||
@@ -352,82 +352,3 @@ def _prepare_losser(losser): | |||
return losser | |||
else: | |||
raise TypeError(f"Type of loss should be `fastNLP.LossBase`, got {type(losser)}") | |||
def squash(predict, truth, **kwargs): | |||
"""To reshape tensors in order to fit loss functions in PyTorch. | |||
:param predict: Tensor, model output | |||
:param truth: Tensor, truth from dataset | |||
:param kwargs: extra arguments | |||
:return predict , truth: predict & truth after processing | |||
""" | |||
return predict.view(-1, predict.size()[-1]), truth.view(-1, ) | |||
def unpad(predict, truth, **kwargs): | |||
"""To process padded sequence output to get true loss. | |||
:param predict: Tensor, [batch_size , max_len , tag_size] | |||
:param truth: Tensor, [batch_size , max_len] | |||
:param kwargs: kwargs["lens"] is a list or LongTensor, with size [batch_size]. The i-th element is true lengths of i-th sequence. | |||
:return predict , truth: predict & truth after processing | |||
""" | |||
if kwargs.get("lens") is None: | |||
return predict, truth | |||
lens = torch.LongTensor(kwargs["lens"]) | |||
lens, idx = torch.sort(lens, descending=True) | |||
predict = torch.nn.utils.rnn.pack_padded_sequence(predict[idx], lens, batch_first=True).data | |||
truth = torch.nn.utils.rnn.pack_padded_sequence(truth[idx], lens, batch_first=True).data | |||
return predict, truth | |||
def unpad_mask(predict, truth, **kwargs): | |||
"""To process padded sequence output to get true loss. | |||
:param predict: Tensor, [batch_size , max_len , tag_size] | |||
:param truth: Tensor, [batch_size , max_len] | |||
:param kwargs: kwargs["lens"] is a list or LongTensor, with size [batch_size]. The i-th element is true lengths of i-th sequence. | |||
:return predict , truth: predict & truth after processing | |||
""" | |||
if kwargs.get("lens") is None: | |||
return predict, truth | |||
mas = make_mask(kwargs["lens"], truth.size()[1]) | |||
return mask(predict, truth, mask=mas) | |||
def mask(predict, truth, **kwargs): | |||
"""To select specific elements from Tensor. This method calls ``squash()``. | |||
:param predict: Tensor, [batch_size , max_len , tag_size] | |||
:param truth: Tensor, [batch_size , max_len] | |||
:param kwargs: extra arguments, kwargs["mask"]: ByteTensor, [batch_size , max_len], the mask Tensor. The position that is 1 will be selected. | |||
:return predict , truth: predict & truth after processing | |||
""" | |||
if kwargs.get("mask") is None: | |||
return predict, truth | |||
mask = kwargs["mask"] | |||
predict, truth = squash(predict, truth) | |||
mask = mask.view(-1, ) | |||
predict = torch.masked_select(predict.permute(1, 0), mask).view(predict.size()[-1], -1).permute(1, 0) | |||
truth = torch.masked_select(truth, mask) | |||
return predict, truth | |||
def make_mask(lens, tar_len): | |||
"""To generate a mask over a sequence. | |||
:param lens: list or LongTensor, [batch_size] | |||
:param tar_len: int | |||
:return mask: ByteTensor | |||
""" | |||
lens = torch.LongTensor(lens) | |||
mask = [torch.ge(lens, i + 1) for i in range(tar_len)] | |||
mask = torch.stack(mask, 1) | |||
return mask |
@@ -33,8 +33,9 @@ class Optimizer(object): | |||
def construct_from_pytorch(self, model_params): | |||
raise NotImplementedError | |||
def _get_require_grads_param(self, params): | |||
@staticmethod | |||
def _get_require_grads_param(params): | |||
""" | |||
将params中不需要gradient的删除 | |||
@@ -43,6 +44,7 @@ class Optimizer(object): | |||
""" | |||
return [param for param in params if param.requires_grad] | |||
class NullOptimizer(Optimizer): | |||
""" | |||
当不希望Trainer更新optimizer时,传入本optimizer,但请确保通过callback的方式对参数进行了更新。 | |||
@@ -113,7 +115,8 @@ class Adam(Optimizer): | |||
class AdamW(TorchOptimizer): | |||
r""" | |||
对AdamW的实现,该实现应该会在pytorch更高版本中出现,https://github.com/pytorch/pytorch/pull/21250。这里提前加入 | |||
对AdamW的实现,该实现在pytorch 1.2.0版本中已经出现,https://github.com/pytorch/pytorch/pull/21250。 | |||
这里加入以适配低版本的pytorch | |||
.. todo:: | |||
翻译成中文 | |||
@@ -51,7 +51,7 @@ class MatchingBertPipe(Pipe): | |||
super().__init__() | |||
self.lower = bool(lower) | |||
self.tokenizer = get_tokenizer(tokenizer=tokenizer) | |||
self.tokenizer = get_tokenizer(tokenize_method=tokenizer) | |||
def _tokenize(self, data_bundle, field_names, new_field_names): | |||
""" | |||
@@ -191,7 +191,7 @@ class MatchingPipe(Pipe): | |||
super().__init__() | |||
self.lower = bool(lower) | |||
self.tokenizer = get_tokenizer(tokenizer=tokenizer) | |||
self.tokenizer = get_tokenizer(tokenize_method=tokenizer) | |||
def _tokenize(self, data_bundle, field_names, new_field_names): | |||
""" | |||
@@ -65,27 +65,36 @@ def iob2bioes(tags: List[str]) -> List[str]: | |||
return new_tags | |||
def get_tokenizer(tokenizer: str, lang='en'): | |||
def get_tokenizer(tokenize_method: str, lang='en'): | |||
""" | |||
:param str tokenizer: 获取tokenzier方法 | |||
:param str tokenize_method: 获取tokenzier方法 | |||
:param str lang: 语言,当前仅支持en | |||
:return: 返回tokenize函数 | |||
""" | |||
if tokenizer == 'spacy': | |||
tokenizer_dict = { | |||
'spacy': None, | |||
'raw': _raw_split, | |||
'cn-char': _cn_char_split, | |||
} | |||
if tokenize_method == 'spacy': | |||
import spacy | |||
spacy.prefer_gpu() | |||
if lang != 'en': | |||
raise RuntimeError("Spacy only supports en right right.") | |||
en = spacy.load(lang) | |||
tokenizer = lambda x: [w.text for w in en.tokenizer(x)] | |||
elif tokenizer == 'raw': | |||
tokenizer = _raw_split | |||
elif tokenize_method in tokenizer_dict: | |||
tokenizer = tokenizer_dict[tokenize_method] | |||
else: | |||
raise RuntimeError("Only support `spacy`, `raw` tokenizer.") | |||
raise RuntimeError(f"Only support {tokenizer_dict.keys()} tokenizer.") | |||
return tokenizer | |||
def _cn_char_split(sent): | |||
return [chars for chars in sent] | |||
def _raw_split(sent): | |||
return sent.split() | |||
@@ -8,8 +8,7 @@ from fastNLP.core.optimizer import AdamW | |||
from fastNLP.embeddings import BertEmbedding | |||
from fastNLP.io.pipe.matching import SNLIBertPipe, RTEBertPipe, MNLIBertPipe,\ | |||
QNLIBertPipe, QuoraBertPipe | |||
from reproduction.matching.model.bert import BertForNLI | |||
from fastNLP.models.bert import BertForSentenceMatching | |||
# define hyper-parameters | |||
@@ -65,7 +64,7 @@ print(data_bundle) # print details in data_bundle | |||
embed = BertEmbedding(data_bundle.vocabs[Const.INPUT], model_dir_or_name=arg.bert_model_dir_or_name) | |||
# define model | |||
model = BertForNLI(embed, class_num=len(data_bundle.vocabs[Const.TARGET])) | |||
model = BertForSentenceMatching(embed, num_labels=len(data_bundle.vocabs[Const.TARGET])) | |||
# define optimizer and callback | |||
optimizer = AdamW(lr=arg.lr, params=model.parameters()) | |||
@@ -76,11 +75,11 @@ if arg.task in ['snli']: | |||
# evaluate test set in every epoch if task is snli. | |||
# define trainer | |||
trainer = Trainer(train_data=data_bundle.datasets[arg.train_dataset_name], model=model, | |||
trainer = Trainer(train_data=data_bundle.get_dataset(arg.train_dataset_name), model=model, | |||
optimizer=optimizer, | |||
batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu, | |||
n_epochs=arg.n_epochs, print_every=-1, | |||
dev_data=data_bundle.datasets[arg.dev_dataset_name], | |||
dev_data=data_bundle.get_dataset(arg.dev_dataset_name), | |||
metrics=AccuracyMetric(), metric_key='acc', | |||
device=[i for i in range(torch.cuda.device_count())], | |||
check_code_level=-1, | |||
@@ -92,7 +91,7 @@ trainer.train(load_best_model=True) | |||
# define tester | |||
tester = Tester( | |||
data=data_bundle.datasets[arg.test_dataset_name], | |||
data=data_bundle.get_dataset(arg.test_dataset_name), | |||
model=model, | |||
metrics=AccuracyMetric(), | |||
batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu, | |||
@@ -4,7 +4,6 @@ import torch | |||
import torch.nn.functional as F | |||
import fastNLP as loss | |||
from fastNLP.core.losses import squash, unpad | |||
class TestLoss(unittest.TestCase): | |||
@@ -73,15 +72,3 @@ class TestLosserError(unittest.TestCase): | |||
with self.assertRaises(Exception): | |||
ans = l1({"my_predict": a}, {"truth": b, "my": a}) | |||
class TestLossUtils(unittest.TestCase): | |||
def test_squash(self): | |||
a, b = squash(torch.randn(3, 5), torch.randn(3, 5)) | |||
self.assertEqual(tuple(a.size()), (3, 5)) | |||
self.assertEqual(tuple(b.size()), (15,)) | |||
def test_unpad(self): | |||
a, b = unpad(torch.randn(5, 8, 3), torch.randn(5, 8)) | |||
self.assertEqual(tuple(a.size()), (5, 8, 3)) | |||
self.assertEqual(tuple(b.size()), (5, 8)) |
@@ -2,7 +2,7 @@ import unittest | |||
import torch | |||
from fastNLP import SGD, Adam | |||
from fastNLP import SGD, Adam, AdamW | |||
class TestOptim(unittest.TestCase): | |||
@@ -52,3 +52,12 @@ class TestOptim(unittest.TestCase): | |||
self.assertEqual(optim.__dict__["settings"]["lr"], 0.001) | |||
res = optim.construct_from_pytorch(torch.nn.Linear(10, 3).parameters()) | |||
self.assertTrue(isinstance(res, torch.optim.Adam)) | |||
def test_AdamW(self): | |||
optim = AdamW(params=torch.nn.Linear(10, 3).parameters()) | |||
self.assertTrue('lr' in optim.defaults) | |||
self.assertTrue('weight_decay' in optim.defaults) | |||
optim = AdamW(params=torch.nn.Linear(10, 3).parameters(), lr=0.002, weight_decay=0.989) | |||
self.assertEqual(optim.defaults['lr'], 0.002) | |||
self.assertTrue(optim.defaults['weight_decay'], 0.989) |
@@ -1,3 +0,0 @@ | |||
index sentence1 sentence2 label | |||
0 Dana Reeve, the widow of the actor Christopher Reeve, has died of lung cancer at age 44, according to the Christopher Reeve Foundation. Christopher Reeve had an accident. not_entailment | |||
1 Yet, we now are discovering that antibiotics are losing their effectiveness against illness. Disease-causing bacteria are mutating faster than we can come up with new antibiotics to fight the new variations. Bacteria is winning the war against antibiotics. entailment |
@@ -1,3 +0,0 @@ | |||
index sentence1 sentence2 | |||
0 Mangla was summoned after Madhumita's sister Nidhi Shukla, who was the first witness in the case. Shukla is related to Mangla. | |||
1 Authorities in Brazil say that more than 200 people are being held hostage in a prison in the country's remote, Amazonian-jungle state of Rondonia. Authorities in Brazil hold 200 people as hostage. |
@@ -1,4 +0,0 @@ | |||
index sentence1 sentence2 label | |||
0 No Weapons of Mass Destruction Found in Iraq Yet. Weapons of Mass Destruction Found in Iraq. not_entailment | |||
1 A place of sorrow, after Pope John Paul II died, became a place of celebration, as Roman Catholic faithful gathered in downtown Chicago to mark the installation of new Pope Benedict XVI. Pope Benedict XVI is the new leader of the Roman Catholic Church. entailment | |||
2 Herceptin was already approved to treat the sickest breast cancer patients, and the company said, Monday, it will discuss with federal regulators the possibility of prescribing the drug for more breast cancer patients. Herceptin can be used to treat breast cancer. entailment |