Browse Source

- fix callback & tests

tags/v0.4.10
yunfan 6 years ago
parent
commit
f4e64906d4
4 changed files with 40 additions and 115 deletions
  1. +33
    -1
      fastNLP/core/callback.py
  2. +6
    -2
      test/core/test_callbacks.py
  3. +1
    -0
      test/core/test_dataset.py
  4. +0
    -112
      test/models/test_enas.py

+ 33
- 1
fastNLP/core/callback.py View File

@@ -29,7 +29,7 @@ class Callback(object):
@property @property
def n_steps(self): def n_steps(self):
"""total number of steps for training""" """total number of steps for training"""
return self.n_steps
return self._trainer.n_steps


@property @property
def batch_size(self): def batch_size(self):
@@ -124,6 +124,21 @@ class Callback(object):
pass pass




def transfer(func):
"""装饰器,将对CallbackManager的调用转发到各个Callback子类.
:param func:
:return:
"""

def wrapper(manager, *arg):
returns = []
for callback in manager.callbacks:
returns.append(getattr(callback, func.__name__)(*arg))
return returns

return wrapper


class CallbackManager(Callback): class CallbackManager(Callback):
"""A manager for all callbacks passed into Trainer. """A manager for all callbacks passed into Trainer.
It collects resources inside Trainer and raise callbacks. It collects resources inside Trainer and raise callbacks.
@@ -150,42 +165,59 @@ class CallbackManager(Callback):
else: else:
raise TypeError(f"Expect callbacks in CallbackManager(callbacks) to be list. Got {type(callbacks)}.") raise TypeError(f"Expect callbacks in CallbackManager(callbacks) to be list. Got {type(callbacks)}.")


for env_name, env_val in env.items():
for callback in self.callbacks:
setattr(callback, '_'+env_name, env_val) # Callback.trainer

@transfer
def on_train_begin(self): def on_train_begin(self):
pass pass


@transfer
def on_epoch_begin(self): def on_epoch_begin(self):
pass pass


@transfer
def on_batch_begin(self, batch_x, batch_y, indices): def on_batch_begin(self, batch_x, batch_y, indices):
pass pass


@transfer
def on_loss_begin(self, batch_y, predict_y): def on_loss_begin(self, batch_y, predict_y):
pass pass


@transfer
def on_backward_begin(self, loss): def on_backward_begin(self, loss):
pass pass


@transfer
def on_backward_end(self): def on_backward_end(self):
pass pass


@transfer
def on_step_end(self): def on_step_end(self):
pass pass


@transfer
def on_batch_end(self): def on_batch_end(self):
pass pass


@transfer
def on_valid_begin(self): def on_valid_begin(self):
pass pass


@transfer
def on_valid_end(self, eval_result, metric_key): def on_valid_end(self, eval_result, metric_key):
pass pass


@transfer
def on_epoch_end(self): def on_epoch_end(self):
pass pass


@transfer
def on_train_end(self): def on_train_end(self):
pass pass


@transfer
def on_exception(self, exception): def on_exception(self, exception):
pass pass




+ 6
- 2
test/core/test_callbacks.py View File

@@ -139,11 +139,14 @@ class TestCallback(unittest.TestCase):


def test_readonly_property(self): def test_readonly_property(self):
from fastNLP.core.callback import Callback from fastNLP.core.callback import Callback
passed_epochs = []
total_epochs = 5
class MyCallback(Callback): class MyCallback(Callback):
def __init__(self): def __init__(self):
super(MyCallback, self).__init__() super(MyCallback, self).__init__()


def on_epoch_begin(self, cur_epoch, total_epoch):
def on_epoch_begin(self):
passed_epochs.append(self.epoch)
print(self.n_epochs, self.n_steps, self.batch_size) print(self.n_epochs, self.n_steps, self.batch_size)
print(self.model) print(self.model)
print(self.optimizer) print(self.optimizer)
@@ -151,7 +154,7 @@ class TestCallback(unittest.TestCase):
data_set, model = prepare_env() data_set, model = prepare_env()
trainer = Trainer(data_set, model, trainer = Trainer(data_set, model,
loss=BCELoss(pred="predict", target="y"), loss=BCELoss(pred="predict", target="y"),
n_epochs=5,
n_epochs=total_epochs,
batch_size=32, batch_size=32,
print_every=50, print_every=50,
optimizer=SGD(lr=0.1), optimizer=SGD(lr=0.1),
@@ -161,3 +164,4 @@ class TestCallback(unittest.TestCase):
metrics=AccuracyMetric(pred="predict", target="y"), metrics=AccuracyMetric(pred="predict", target="y"),
callbacks=[MyCallback()]) callbacks=[MyCallback()])
trainer.train() trainer.train()
assert passed_epochs == list(range(1, total_epochs+1))

+ 1
- 0
test/core/test_dataset.py View File

@@ -217,6 +217,7 @@ class TestDataSetMethods(unittest.TestCase):
self.assertTrue(len(ds) > 0) self.assertTrue(len(ds) > 0)


def test_add_null(self): def test_add_null(self):
# TODO test failed because 'fastNLP\core\fieldarray.py:143: RuntimeError'
ds = DataSet() ds = DataSet()
ds.add_field('test', []) ds.add_field('test', [])
ds.set_target('test') ds.set_target('test')


+ 0
- 112
test/models/test_enas.py View File

@@ -1,112 +0,0 @@
import unittest

from fastNLP import DataSet
from fastNLP import Instance
from fastNLP import Vocabulary
from fastNLP.core.losses import CrossEntropyLoss
from fastNLP.core.metrics import AccuracyMetric


class TestENAS(unittest.TestCase):
def testENAS(self):
# 从csv读取数据到DataSet
sample_path = "tutorials/sample_data/tutorial_sample_dataset.csv"
dataset = DataSet.read_csv(sample_path, headers=('raw_sentence', 'label'),
sep='\t')
print(len(dataset))
print(dataset[0])
print(dataset[-3])

dataset.append(Instance(raw_sentence='fake data', label='0'))
# 将所有数字转为小写
dataset.apply(lambda x: x['raw_sentence'].lower(), new_field_name='raw_sentence')
# label转int
dataset.apply(lambda x: int(x['label']), new_field_name='label')

# 使用空格分割句子
def split_sent(ins):
return ins['raw_sentence'].split()

dataset.apply(split_sent, new_field_name='words')

# 增加长度信息
dataset.apply(lambda x: len(x['words']), new_field_name='seq_len')
print(len(dataset))
print(dataset[0])

# DataSet.drop(func)筛除数据
dataset.drop(lambda x: x['seq_len'] <= 3)
print(len(dataset))

# 设置DataSet中,哪些field要转为tensor
# set target,loss或evaluate中的golden,计算loss,模型评估时使用
dataset.set_target("label")
# set input,模型forward时使用
dataset.set_input("words", "seq_len")

# 分出测试集、训练集
test_data, train_data = dataset.split(0.5)
print(len(test_data))
print(len(train_data))

# 构建词表, Vocabulary.add(word)
vocab = Vocabulary(min_freq=2)
train_data.apply(lambda x: [vocab.add(word) for word in x['words']])
vocab.build_vocab()

# index句子, Vocabulary.to_index(word)
train_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words')
test_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words')
print(test_data[0])

# 如果你们需要做强化学习或者GAN之类的项目,你们也可以使用这些数据预处理的工具
from fastNLP.core.batch import Batch
from fastNLP.core.sampler import RandomSampler

batch_iterator = Batch(dataset=train_data, batch_size=2, sampler=RandomSampler())
for batch_x, batch_y in batch_iterator:
print("batch_x has: ", batch_x)
print("batch_y has: ", batch_y)
break

from fastNLP.models.enas_model import ENASModel
from fastNLP.models.enas_controller import Controller
model = ENASModel(embed_num=len(vocab), num_classes=5)
controller = Controller()

from fastNLP.models.enas_trainer import ENASTrainer
from copy import deepcopy

# 更改DataSet中对应field的名称,要以模型的forward等参数名一致
train_data.rename_field('words', 'word_seq') # input field 与 forward 参数一致
train_data.rename_field('label', 'label_seq')
test_data.rename_field('words', 'word_seq')
test_data.rename_field('label', 'label_seq')

loss = CrossEntropyLoss(pred="output", target="label_seq")
metric = AccuracyMetric(pred="predict", target="label_seq")

trainer = ENASTrainer(model=model, controller=controller, train_data=train_data, dev_data=test_data,
loss=CrossEntropyLoss(pred="output", target="label_seq"),
metrics=AccuracyMetric(pred="predict", target="label_seq"),
check_code_level=-1,
save_path=None,
batch_size=32,
print_every=1,
n_epochs=3,
final_epochs=1)
trainer.train()
print('Train finished!')

# 调用Tester在test_data上评价效果
from fastNLP import Tester

tester = Tester(data=test_data, model=model, metrics=AccuracyMetric(pred="predict", target="label_seq"),
batch_size=4)

acc = tester.test()
print(acc)


if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save