Browse Source

增加部分测试

tags/v0.4.10
yh 5 years ago
parent
commit
5ebce3176f
6 changed files with 132 additions and 20 deletions
  1. +17
    -0
      fastNLP/core/batch.py
  2. +15
    -16
      fastNLP/core/callback.py
  3. +55
    -1
      test/core/test_callbacks.py
  4. +26
    -2
      test/core/test_utils.py
  5. +10
    -1
      test/embeddings/test_bert_embedding.py
  6. +9
    -0
      test/modules/test_utils.py

+ 17
- 0
fastNLP/core/batch.py View File

@@ -122,6 +122,14 @@ class BatchIter:

@staticmethod
def get_num_batches(num_samples, batch_size, drop_last):
"""
计算batch的数量。

:param int num_samples:
:param int batch_size:
:param bool drop_last: 如果最后一个batch没有batch_size这么多,是否就丢掉。
:return:
"""
num_batches = num_samples // batch_size
if not drop_last and (num_samples % batch_size > 0):
num_batches += 1
@@ -134,6 +142,11 @@ class BatchIter:
yield batch_x, batch_y

def get_batch_indices(self):
"""
获取当前已经输出的batch的index。

:return:
"""
return self.cur_batch_indices

def __len__(self):
@@ -193,6 +206,10 @@ class DataSetIter(BatchIter):


class TorchLoaderIter(BatchIter):
"""
与DataSetIter类似,但用于pytorch的DataSet对象。通过使用TorchLoaderIter封装pytorch的DataSet,然后将其传入到Trainer中。

"""
def __init__(self, dataset):
super().__init__()
assert isinstance(dataset, torch.utils.data.DataLoader)


+ 15
- 16
fastNLP/core/callback.py View File

@@ -590,7 +590,7 @@ class FitlogCallback(Callback):
try:
eval_result = tester.test()
if self.verbose != 0:
self.pbar.write("Evaluation on DataSet {}:".format(key))
self.pbar.write("FitlogCallback evaluation on {}:".format(key))
self.pbar.write(tester._format_eval_results(eval_result))
fitlog.add_metric(eval_result, name=key, step=self.step, epoch=self.epoch)
if better_result:
@@ -609,14 +609,16 @@ class FitlogCallback(Callback):

class EvaluateCallback(Callback):
"""
该callback用于扩展Trainer训练过程中只能对dev数据进行验证的问题。
通过使用该Callback可以使得Trainer在evaluate dev之外还可以evaluate其它数据集,比如测试集。每一次验证dev之前都会先验证EvaluateCallback
中的数据。
"""

def __init__(self, data=None, tester=None):
"""
:param ~fastNLP.DataSet,Dict[~fastNLP.DataSet] data: 传入DataSet对象,会使用多个Trainer中的metric对数据进行验证。如果需要传入多个
:param ~fastNLP.DataSet,Dict[~fastNLP.DataSet] data: 传入DataSet对象,会使用Trainer中的metric对数据进行验证。如果需要传入多个
DataSet请通过dict的方式传入。
:param ~fastNLP.Tester,Dict[~fastNLP.DataSet] tester: Tester对象,将在on_valid_end时调用。
:param ~fastNLP.Tester,Dict[~fastNLP.DataSet] tester: Tester对象, 通过使用Tester对象,可以使得验证的metric与Trainer中
的metric不一样。
"""
super().__init__()
self.datasets = {}
@@ -659,13 +661,10 @@ class EvaluateCallback(Callback):
for key, tester in self.testers.items():
try:
eval_result = tester.test()
# self.pbar.write("Evaluation on {}:".format(key))
self.logger.info("Evaluation on {}:".format(key))
# self.pbar.write(tester._format_eval_results(eval_result))
self.logger.info("EvaluateCallback evaluation on {}:".format(key))
self.logger.info(tester._format_eval_results(eval_result))
except Exception:
# self.pbar.write("Exception happens when evaluate on DataSet named `{}`.".format(key))
self.logger.info("Exception happens when evaluate on DataSet named `{}`.".format(key))
self.logger.error("Exception happens when evaluate on DataSet named `{}`.".format(key))


class LRScheduler(Callback):
@@ -872,15 +871,16 @@ class TensorboardCallback(Callback):

class WarmupCallback(Callback):
"""
按一定的周期调节Learning rate的大小
learning rate按照一定的速率从0上升到设置的learning rate
"""
def __init__(self, warmup=0.1, schedule='constant'):
"""
:param int,float warmup: 如果warmup为int,则在该step之前,learning rate根据schedule的策略变化; 如果warmup为float,
如0.1, 则前10%的step是按照schedule策略调整learning rate。
:param str schedule: 以哪种方式调整。linear: 前warmup的step上升到指定的learning rate(从Trainer中的optimizer处获取的), 后
warmup的step下降到0; constant前warmup的step上升到指定learning rate,后面的step保持learning rate.
:param str schedule: 以哪种方式调整。
linear: 前warmup的step上升到指定的learning rate(从Trainer中的optimizer处获取的), 后warmup的step下降到0;
constant前warmup的step上升到指定learning rate,后面的step保持learning rate.
"""
super().__init__()
self.warmup = max(warmup, 0.)
@@ -935,15 +935,14 @@ class SaveModelCallback(Callback):
def __init__(self, save_dir, top=3, only_param=False, save_on_exception=False):
"""
:param str save_dir: 将模型存放在哪个目录下,会在该目录下创建以时间戳命名的目录,并存放模型
:param str save_dir: 将模型存放在哪个目录下,会在该目录下创建以时间戳命名的目录,并存放模型。如果save_dir不存在将自动创建
:param int top: 保存dev表现top多少模型。-1为保存所有模型。
:param bool only_param: 是否只保存模型d饿权重。
:param bool only_param: 是否只保存模型权重。
:param save_on_exception: 发生exception时,是否保存一份发生exception的模型。模型名称为epoch:x_step:x_Exception:{exception_name}.
"""
super().__init__()

if not os.path.isdir(save_dir):
raise IsADirectoryError("{} is not a directory.".format(save_dir))
os.makedirs(save_dir, exist_ok=True)
self.save_dir = save_dir
if top < 0:
self.top = sys.maxsize


+ 55
- 1
test/core/test_callbacks.py View File

@@ -2,6 +2,8 @@ import unittest

import numpy as np
import torch
import os
import shutil

from fastNLP.core.callback import EarlyStopCallback, GradientClipCallback, LRScheduler, ControlC, \
LRFinder, TensorboardCallback
@@ -13,7 +15,8 @@ from fastNLP import SGD
from fastNLP import Trainer
from fastNLP.models.base_model import NaiveClassifier
from fastNLP.core.callback import EarlyStopError

from fastNLP.core.callback import EvaluateCallback, FitlogCallback, SaveModelCallback
from fastNLP.core.callback import WarmupCallback

def prepare_env():
def prepare_fake_dataset():
@@ -113,3 +116,54 @@ class TestCallback(unittest.TestCase):
check_code_level=2)
trainer.train()
assert passed_epochs == list(range(1, total_epochs + 1))

def test_evaluate_callback(self):
data_set, model = prepare_env()
from fastNLP import Tester
tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y"))
evaluate_callback = EvaluateCallback(data_set, tester)

trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False,
callbacks=evaluate_callback, check_code_level=2)
trainer.train()

def test_fitlog_callback(self):
import fitlog
os.makedirs('logs/')
fitlog.set_log_dir('logs/')
data_set, model = prepare_env()
from fastNLP import Tester
tester = Tester(data=data_set, model=model, metrics=AccuracyMetric(pred="predict", target="y"))
fitlog_callback = FitlogCallback(data_set, tester)

trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
callbacks=fitlog_callback, check_code_level=2)
trainer.train()
shutil.rmtree('logs/')

def test_save_model_callback(self):
data_set, model = prepare_env()
top = 3
save_model_callback = SaveModelCallback('save_models/', top=top)
trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
callbacks=save_model_callback, check_code_level=2)
trainer.train()

timestamp = os.listdir('save_models')[0]
self.assertEqual(len(os.listdir(os.path.join('save_models', timestamp))), top)
shutil.rmtree('save_models/')

def test_warmup_callback(self):
data_set, model = prepare_env()
warmup_callback = WarmupCallback()
trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
batch_size=32, n_epochs=5, print_every=50, dev_data=data_set,
metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=True,
callbacks=warmup_callback, check_code_level=2)
trainer.train()

+ 26
- 2
test/core/test_utils.py View File

@@ -10,7 +10,8 @@ import torch
from torch import nn
from fastNLP.core.utils import _move_model_to_device, _get_model_device
import numpy as np
from fastNLP.core.utils import seq_len_to_mask
from fastNLP.core.utils import seq_len_to_mask, get_seq_len
from fastNLP.core.utils import iob2, iob2bioes

class Model(nn.Module):
def __init__(self):
@@ -263,4 +264,27 @@ class TestSeqLenToMask(unittest.TestCase):
# 3. pad到指定长度
seq_len = torch.randint(1, 10, size=(10, ))
mask = seq_len_to_mask(seq_len, 100)
self.assertEqual(100, mask.size(1))
self.assertEqual(100, mask.size(1))


class TestUtils(unittest.TestCase):
def test_get_seq_len(self):
seq_len = torch.randint(1, 10, size=(10, ))
mask = seq_len_to_mask(seq_len)
new_seq_len = get_seq_len(mask)
self.assertSequenceEqual(seq_len.tolist(), new_seq_len.tolist())

def test_iob2(self):
tags = ['B-NP', 'O', 'B-NP', 'B-VP', 'B-NP', 'I-NP', 'O', 'B-NP', 'B-PP', 'B-NP', 'I-NP', 'O', 'B-NP', 'I-NP', 'B-NP', 'O', 'B-NP', 'I-NP', 'I-NP']
convert_tags = ['B-NP', 'O', 'B-NP', 'B-VP', 'B-NP', 'I-NP', 'O', 'B-NP', 'B-PP', 'B-NP', 'I-NP', 'O', 'B-NP', 'I-NP', 'B-NP', 'O', 'B-NP', 'I-NP', 'I-NP']
self.assertSequenceEqual(convert_tags, iob2(tags))

tags = ['I-NP', 'O', 'I-NP', 'I-VP', 'B-NP', 'I-NP', 'O', 'I-NP', 'I-PP', 'B-NP', 'I-NP', 'O', 'B-NP', 'I-NP', 'B-NP', 'O', 'B-NP', 'I-NP', 'I-NP']
self.assertSequenceEqual(convert_tags, iob2(tags))

def test_iob2bioes(self):
tags = ['B-NP', 'O', 'B-NP', 'B-VP', 'B-NP', 'I-NP', 'O', 'B-NP', 'B-PP', 'B-NP', 'I-NP', 'O', 'B-NP', 'I-NP', 'B-NP', 'O', 'B-NP', 'I-NP', 'I-NP']
convert_tags = ['S-NP', 'O', 'S-NP', 'S-VP', 'B-NP', 'E-NP', 'O', 'S-NP', 'S-PP', 'B-NP', 'E-NP', 'O', 'B-NP', 'E-NP', 'S-NP', 'O', 'B-NP', 'I-NP', 'E-NP']

self.assertSequenceEqual(convert_tags, iob2bioes(tags))


+ 10
- 1
test/embeddings/test_bert_embedding.py View File

@@ -1,6 +1,6 @@
import unittest
from fastNLP import Vocabulary
from fastNLP.embeddings import BertEmbedding
from fastNLP.embeddings import BertEmbedding, BertWordPieceEncoder
import torch
import os

@@ -37,3 +37,12 @@ class TestBertEmbedding(unittest.TestCase):
words = torch.LongTensor([[2, 3, 4, 0]])
result = embed(words)
self.assertEqual(result.size(), (1, 4, 16))


class TestBertWordPieceEncoder(unittest.TestCase):
def test_bert_word_piece_encoder(self):
embed = BertWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1)
from fastNLP import DataSet
ds = DataSet({'words': ["this is a test . [SEP]".split()]})
embed.index_datasets(ds, field_name='words')
self.assertTrue(ds.has_field('word_pieces'))

+ 9
- 0
test/modules/test_utils.py View File

@@ -0,0 +1,9 @@
import unittest
import torch
from fastNLP.modules.utils import get_dropout_mask

class TestUtil(unittest.TestCase):
def test_get_dropout_mask(self):
tensor = torch.randn(3, 4)
mask = get_dropout_mask(0.3, tensor)
self.assertSequenceEqual(mask.size(), torch.Size([3, 4]))

Loading…
Cancel
Save