Browse Source

Trainer支持fp16

tags/v1.0.0alpha
yh_cc 3 years ago
parent
commit
2e8350cd16
15 changed files with 466 additions and 62 deletions
  1. +1
    -1
      docs/source/tutorials/tutorial_2_vocabulary.rst
  2. +2
    -1
      fastNLP/core/__init__.py
  3. +20
    -16
      fastNLP/core/callback.py
  4. +0
    -1
      fastNLP/core/dist_trainer.py
  5. +20
    -0
      fastNLP/core/losses.py
  6. +103
    -2
      fastNLP/core/sampler.py
  7. +18
    -9
      fastNLP/core/tester.py
  8. +58
    -12
      fastNLP/core/trainer.py
  9. +92
    -11
      fastNLP/core/utils.py
  10. +5
    -5
      fastNLP/core/vocabulary.py
  11. +1
    -1
      fastNLP/embeddings/bert_embedding.py
  12. +3
    -1
      fastNLP/embeddings/static_embedding.py
  13. +53
    -0
      fastNLP/models/base_model.py
  14. +8
    -0
      tests/core/test_sampler.py
  15. +82
    -2
      tests/core/test_trainer.py

+ 1
- 1
docs/source/tutorials/tutorial_2_vocabulary.rst View File

@@ -86,7 +86,7 @@ fastNLP中的Vocabulary
# 将验证集或者测试集在建立词表是放入no_create_entry_dataset这个参数中。
vocab.from_dataset(tr_data, field_name='chars', no_create_entry_dataset=[dev_data])

:class:`~fastNLP.Vocabulary` 中的 `no_create_entry` , 建议在添加来自于测试集和验证集的词的时候将该参数置为True, 或将验证集和测试集
:class:`~fastNLP.Vocabulary` 中的 `no_create_entry` ,如果您并不关心具体的原理,您可以直接采取以下的建议:在添加来自于非训练集的词的时候将该参数置为True, 或将非训练集数据
传入 `no_create_entry_dataset` 参数。它们的意义是在接下来的模型会使用pretrain的embedding(包括glove, word2vec, elmo与bert)且会finetune的
情况下,如果仅使用来自于train的数据建立vocabulary,会导致只出现在test与dev中的词语无法充分利用到来自于预训练embedding的信息(因为他们
会被认为是unk),所以在建立词表的时候将test与dev考虑进来会使得最终的结果更好。


+ 2
- 1
fastNLP/core/__init__.py View File

@@ -62,6 +62,7 @@ __all__ = [
"CrossEntropyLoss",
"L1Loss",
"BCELoss",
"BCEWithLogits",
"NLLLoss",
"LossInForward",
"CMRC2018Loss",
@@ -98,7 +99,7 @@ from .dataset import DataSet
from .field import FieldArray, Padder, AutoPadder, EngChar2DPadder
from .instance import Instance
from .losses import LossFunc, CrossEntropyLoss, L1Loss, BCELoss, NLLLoss, \
LossInForward, CMRC2018Loss, LossBase, MSELoss
LossInForward, CMRC2018Loss, LossBase, MSELoss, BCEWithLogits
from .metrics import AccuracyMetric, SpanFPreRecMetric, CMRC2018Metric, ClassifyFPreRecMetric, MetricBase,\
ConfusionMatrixMetric
from .optimizer import Optimizer, SGD, Adam, AdamW


+ 20
- 16
fastNLP/core/callback.py View File

@@ -86,7 +86,6 @@ except:
from .dataset import DataSet
from .tester import Tester
from ._logger import logger
from .utils import _check_fp16
from ._parallel_utils import _model_contains_inner_module

try:
@@ -94,11 +93,6 @@ try:
except:
pass

try:
from apex import amp
except:
amp = None


class Callback(object):
r"""
@@ -123,6 +117,20 @@ class Callback(object):
该属性可以通过self.trainer获取到,一般情况下不需要使用这个属性。
"""
return self._trainer

@property
def grad_scaler(self):
r"""
float16的gradient scaler
"""
return self._trainer.grad_scaler

@property
def auto_cast(self):
r"""
float16用的auto cast环境
"""
return self._trainer.auto_cast
@property
def step(self):
@@ -472,14 +480,9 @@ class GradientClipCallback(Callback):
def on_backward_end(self):
if self.step%self.update_every==0:
if self.parameters is None:
if getattr(self.trainer, 'fp16', ''):
_check_fp16()
self.clip_fun(amp.master_params(self.optimizer), self.clip_value)
else:
self.clip_fun(self.model.parameters(), self.clip_value)
else:
self.clip_fun(self.parameters, self.clip_value)
if self.trainer.fp16:
self.grad_scaler.unscale_(self.optimizer)
self.clip_fun(self.parameters, self.clip_value)


class EarlyStopCallback(Callback):
@@ -569,10 +572,10 @@ class FitlogCallback(Callback):
if len(self.datasets) > 0:
for key, data in self.datasets.items():
tester = Tester(data=data, model=self.model,
batch_size=self.trainer.kwargs.get('dev_batch_size', self.batch_size),
batch_size=self.trainer.kwargs.get('dev_batch_size', self.trainer.batch_size),
metrics=self.trainer.metrics,
verbose=0,
use_tqdm=self.trainer.test_use_tqdm,
use_tqdm=self.trainer.kwargs.get('test_use_tqdm', self.trainer.use_tqdm),
sampler=self.trainer.kwargs.get('test_sampler', None))
self.testers[key] = tester
fitlog.add_progress(total_steps=self.n_steps)
@@ -948,6 +951,7 @@ class CheckPointCallback(Callback):
model = model.module
model.load_state_dict(states['model'])
self.optimizer.load_state_dict(states['optimizer'])
self.grad_scaler.load_state_dict(states['grad_scaler'])
self.trainer.epoch = states['epoch'] + 1 # 因为是结束储存的,所以需要从下一个epoch开始
self.trainer.step = states['step']
if 'best_dev_epoch' in states:


+ 0
- 1
fastNLP/core/dist_trainer.py View File

@@ -29,7 +29,6 @@ from .dataset import DataSet
from .losses import _prepare_losser
from .optimizer import Optimizer
from .utils import _build_args
from .utils import _check_fp16
from .utils import _get_func_signature
from .utils import _move_dict_value_to_device



+ 20
- 0
fastNLP/core/losses.py View File

@@ -10,6 +10,7 @@ __all__ = [
"CrossEntropyLoss",
"BCELoss",
"BCEWithLogits",
"L1Loss",
"NLLLoss",
"MSELoss",
@@ -311,6 +312,25 @@ class BCELoss(LossBase):
return F.binary_cross_entropy(input=pred, target=target, reduction=self.reduction)


class BCEWithLogits(LossBase):
r"""
二分类交叉熵损失函数, 传入数据之前不需要做sigmoid操作

:param pred: 参数映射表中 `pred` 的映射关系,None表示映射关系为 `pred` -> `pred`
:param target: 参数映射表中 `target` 的映射关系,None表示映射关系为 `target` -> `target`
:param str reduction: 支持 `mean` ,`sum` 和 `none` .
"""

def __init__(self, pred=None, target=None, reduction='mean'):
super(BCEWithLogits, self).__init__()
self._init_param_map(pred=pred, target=target)
assert reduction in ('mean', 'sum', 'none')
self.reduction = reduction

def get_loss(self, pred, target):
return F.binary_cross_entropy_with_logits(input=pred, target=target, reduction=self.reduction)


class NLLLoss(LossBase):
r"""
负对数似然损失函数


+ 103
- 2
fastNLP/core/sampler.py View File

@@ -112,6 +112,108 @@ class BucketSampler(Sampler):
return list(chain(*batchs))


class ConstTokenNumSampler(Sampler):
"""
尽量保证每个batch的输入token数量是接近的。

使用示例
>>> # 假设已经有了tr_data并有一个field叫做seq_len保存了每个instance的token数量
>>> from fastNLP import DataSetIter, Trainer
>>> sampler = ConstTokenNumSampler('src_seq_len', max_token=4096)
>>>
>>> # 直接将sampler传入Trainer中,此时batch_size参数的值会被忽略
>>> trainer = Trainer(tr_data, model, optimizer=optimizer, loss=TranslationLoss(),
>>> batch_size=1, sampler=sampler, drop_last=False, update_every=1)
"""
def __init__(self, seq_len_field_name, max_token=4096, max_sentence=-1, need_be_multiple_of=1, num_bucket=-1):
"""

:param List[int] seq_len_field_name: 哪个field指示的sample的长度
:param int max_token: 每个batch的最大的token数量
:param int max_sentence: 每个batch最多多少个instance, -1表示根据max_token决定
:param int need_be_multiple_of: 生成的batch的instance的数量需要是几的倍数,在DataParallel场景下会用到
:param int num_bucket: 将数据按长度拆分为num_bucket个bucket,batch中的sample尽量在bucket之中进行组合,这样可以减少padding。
"""
assert (max_sentence!=-1 and max_sentence>=need_be_multiple_of) or max_sentence<1
self.seq_len_field_name = seq_len_field_name
self.num_bucket = num_bucket
self.max_token = max_token
self._max_sentence = max_sentence
self.need_be_multiple_of = need_be_multiple_of

def __call__(self, data_set):
assert len(data_set)>self.num_bucket, "The number of samples should be larger than buckets."
seq_len = data_set.get_field(self.seq_len_field_name)
self.seq_len = seq_len
seq_len_indice = [(length, i) for i, length in enumerate(seq_len)]
seq_len_indice.sort(key=lambda x: x[0])
indice_in_buckets = []
if self.num_bucket>0:
sample_per_bucket = len(seq_len_indice)//self.num_bucket
i = 0
while len(indice_in_buckets)<len(seq_len_indice):
indice_in_buckets.append(seq_len_indice[i*sample_per_bucket:(i+1)*sample_per_bucket])
i += 1
else:
indice_in_buckets = [seq_len_indice]
self.indice_in_buckets = indice_in_buckets
self.get_new_order()

@property
def max_sentence(self):
if self._max_sentence<1:
return 100000000
return self._max_sentence

@max_sentence.setter
def max_sentence(self, max_sentence):
self._max_sentence = max_sentence

def get_new_order(self):
np.random.shuffle(self.indice_in_buckets)
for bucket in self.indice_in_buckets:
np.random.shuffle(bucket)
indices = list(chain(*self.indice_in_buckets))
batches = []
cur_max_len = 0
batch = []
for length, i in indices:
max_len = max(length, cur_max_len)
if max_len*(len(batch)+1)>self.max_token or len(batch)>=self.max_sentence:
left_sample = len(batch) % self.need_be_multiple_of
add_samples = batch.copy()
cur_max_len =length
if left_sample!=0:
add_samples = add_samples[:-left_sample]
batch = batch[-left_sample:]
cur_max_len = max(cur_max_len, max(batch))
else:
batch = []
if len(add_samples)==0:
raise RuntimeError(f"The sample `{i}` is too long to make a batch with {self.need_be_multiple_of} samples.")
batches.append(add_samples)
else:
cur_max_len = max_len
batch.append(i)
if batch:
left_sample = len(batch) % self.need_be_multiple_of
add_samples = batch.copy()
if left_sample != 0:
add_samples = add_samples[:-left_sample].copy()
if add_samples:
batches.append(add_samples)
np.random.shuffle(batches)
self.batches = batches

def __iter__(self):
for batch in self.batches:
yield batch
self.get_new_order()

def __len__(self):
return len(self.batches)


class ConstantTokenNumSampler:
"""
尽量保证每个batch的输入token数量是接近的。
@@ -119,7 +221,7 @@ class ConstantTokenNumSampler:
使用示例
>>> # 假设已经有了tr_data并有一个field叫做seq_len保存了每个instance的token数量
>>> from fastNLP import DataSetIter, Trainer
>>> sampler = BatchSampler(tr_data.get_field('seq_len').content, max_token=4096)
>>> sampler = ConstantTokenNumSampler(tr_data.get_field('seq_len').content, max_token=4096)
>>> tr_iter = DataSetIter(tr_data,
>>> batch_size=1, sampler=None, as_numpy=False, num_workers=0, pin_memory=False,
>>> drop_last=False, timeout=0, worker_init_fn=None,
@@ -128,7 +230,6 @@ class ConstantTokenNumSampler:
>>> # 直接将tr_iter传入Trainer中,此时batch_size参数的值会被忽略
>>> trainer = Trainer(tr_iter, model, optimizer=optimizer, loss=TranslationLoss(),
>>> batch_size=1, sampler=None, drop_last=False, update_every=1)

"""
def __init__(self, seq_len, max_token=4096, max_sentence=-1, need_be_multiple_of=1, num_bucket=-1):
"""


+ 18
- 9
fastNLP/core/tester.py View File

@@ -53,6 +53,8 @@ from .utils import _move_dict_value_to_device
from .utils import _get_func_signature
from .utils import _get_model_device
from .utils import _move_model_to_device
from .utils import _build_fp16_env
from .utils import _can_use_fp16
from ._parallel_utils import _data_parallel_wrapper
from ._parallel_utils import _model_contains_inner_module
from functools import partial
@@ -70,7 +72,7 @@ class Tester(object):
"""
def __init__(self, data, model, metrics, batch_size=16, num_workers=0, device=None, verbose=1, use_tqdm=True,
**kwargs):
fp16=False, **kwargs):
r"""
:param ~fastNLP.DataSet,~fastNLP.BatchIter data: 需要测试的数据集
@@ -93,7 +95,9 @@ class Tester(object):
如果模型是通过predict()进行预测的话,那么将不能使用多卡(DataParallel)进行验证,只会使用第一张卡上的模型。
:param int verbose: 如果为0不输出任何信息; 如果为1,打印出验证结果。
:param bool use_tqdm: 是否使用tqdm来显示测试进度; 如果为False,则不会显示任何内容。
:param kwargs: 支持传入sampler控制测试顺序
:param bool fp16: 是否使用float16进行验证
:param kwargs:
Sampler sampler: 支持传入sampler控制测试顺序
"""
super(Tester, self).__init__()

@@ -147,7 +151,11 @@ class Tester(object):
else:
self._predict_func = self._model.forward
self._predict_func_wrapper = self._model.forward

if fp16:
_can_use_fp16(model=model, device=device, func=self._predict_func)
self.auto_cast, _grad_scaler = _build_fp16_env(not fp16)

def test(self):
r"""开始进行验证,并返回验证结果。

@@ -172,12 +180,13 @@ class Tester(object):

for batch_x, batch_y in data_iterator:
_move_dict_value_to_device(batch_x, batch_y, device=self._model_device)
pred_dict = self._data_forward(self._predict_func, batch_x)
if not isinstance(pred_dict, dict):
raise TypeError(f"The return value of {_get_func_signature(self._predict_func)} "
f"must be `dict`, got {type(pred_dict)}.")
for metric in self.metrics:
metric(pred_dict, batch_y)
with self.auto_cast():
pred_dict = self._data_forward(self._predict_func, batch_x)
if not isinstance(pred_dict, dict):
raise TypeError(f"The return value of {_get_func_signature(self._predict_func)} "
f"must be `dict`, got {type(pred_dict)}.")
for metric in self.metrics:
metric(pred_dict, batch_y)

if self.use_tqdm:
pbar.update()


+ 58
- 12
fastNLP/core/trainer.py View File

@@ -342,7 +342,7 @@ from .losses import _prepare_losser
from .metrics import _prepare_metrics
from .optimizer import Optimizer
from .sampler import Sampler
from .sampler import RandomSampler
from .sampler import RandomSampler, ConstTokenNumSampler
from .tester import Tester
from .utils import _CheckError
from .utils import _build_args
@@ -352,6 +352,8 @@ from .utils import _move_dict_value_to_device
from .utils import _get_func_signature
from .utils import _get_model_device
from .utils import _move_model_to_device
from .utils import _build_fp16_env
from .utils import _can_use_fp16
from ._parallel_utils import _model_contains_inner_module
from ._logger import logger

@@ -373,7 +375,7 @@ class Trainer(object):
num_workers=0, n_epochs=10, print_every=5,
dev_data=None, metrics=None, metric_key=None,
validate_every=-1, save_path=None, use_tqdm=True, device=None,
callbacks=None, check_code_level=0, **kwargs):
callbacks=None, check_code_level=0, fp16=False, **kwargs):
r"""
:param train_data: 训练集, :class:`~fastNLP.DataSet` 类型或 :class:`~fastNLP.BatchIter` 的子类
:param nn.modules model: 待训练的模型
@@ -422,9 +424,14 @@ class Trainer(object):
报告警告信息; 2: 有任何field没有被使用都报错. 检查的原理是通过使用很小的batch(默认2个sample)来运行代码,但是
这个过程理论上不会修改任何参数,只是会检查能否运行。但如果(1)模型中存在将batch_size写为某个固定值的情况;
(2)模型中存在累加前向计算次数的,可能会多计算1次。以上情况建议将check_code_level设置为-1。
:param bool fp16: 是否使用fp16进行训练。
:param kwargs: 支持配置可选参数
bool test_use_tqdm: 在dev上验证的时候是否开启tqdm
Sampler test_sampler: 在evaluate的时候使用的sampler
bool test_use_fp16: evalute的时候是否使用fp16测试,默认与fp16相同的取值。
bool set_grad_to_none: 在zero_grad的时候是否将gradient设置为None,而不是设置为zero
GradScaler grad_scaler: 仅在fp16为True时有效,如果不使用torch.cuda.amp.GradScaler的初始化参数,可传入一个已经初始化后的
grad_scaler。
"""
super(Trainer, self).__init__()
if not isinstance(model, nn.Module):
@@ -488,6 +495,15 @@ class Trainer(object):
sampler = RandomSampler()
elif hasattr(sampler, 'set_batch_size'):
sampler.set_batch_size(batch_size)
if isinstance(sampler, ConstTokenNumSampler): # 直接使用固定token数量的Sampler
assert isinstance(train_data,
DataSet), f"When sampler is `ConstTokenNumSampler`, the train_data must" \
f" be `DataSet`."
sampler(train_data)
train_data = DataSetIter(train_data,
batch_size=1, sampler=None, as_numpy=False, num_workers=num_workers,
pin_memory=False, drop_last=drop_last, timeout=0, worker_init_fn=None,
batch_sampler=sampler)

if isinstance(train_data, DataSet):
self.data_iterator = DataSetIter(dataset=train_data, batch_size=batch_size, sampler=sampler,
@@ -505,6 +521,21 @@ class Trainer(object):
self._forward_func = self.model.module.forward
else:
self._forward_func = self.model.forward

self.fp16 = fp16

# check fp16相关的设置
self.auto_cast, _grad_scaler = _build_fp16_env(dummy=not fp16)
if self.fp16:
_can_use_fp16(device=device, model=model, func=self._forward_func)
grad_scaler = kwargs.get('grad_scaler', None)
if grad_scaler is not None:
self.grad_scaler = grad_scaler
else:
self.grad_scaler = _grad_scaler()
self.test_use_fp16 = kwargs.get('test_use_fp16', fp16)
self.set_grad_to_none = kwargs.get('set_grad_to_none', True)

if check_code_level > -1:
# _check_code 是 fastNLP 帮助你检查代码是否正确的方法 。如果你在错误栈中看到这行注释,请认真检查你的field名与模型的输入
# 名是否匹配
@@ -553,10 +584,7 @@ class Trainer(object):
self.logger = logger

self.use_tqdm = use_tqdm
if 'test_use_tqdm' in kwargs:
self.test_use_tqdm = kwargs.get('test_use_tqdm')
else:
self.test_use_tqdm = self.use_tqdm
self.test_use_tqdm = kwargs.get('test_use_tqdm', self.use_tqdm)
self.pbar = None
self.print_every = abs(self.print_every)
self.kwargs = kwargs
@@ -568,7 +596,8 @@ class Trainer(object):
device=None, # 由上面的部分处理device
verbose=0,
use_tqdm=self.test_use_tqdm,
sampler=kwargs.get('test_sampler', None))
sampler=kwargs.get('test_sampler', None),
fp16=self.test_use_fp16)

self.start_time = None # start timestamp

@@ -677,7 +706,8 @@ class Trainer(object):

# edit prediction
self.callback_manager.on_loss_begin(batch_y, prediction)
loss = self._compute_loss(prediction, batch_y).mean()
with self.auto_cast():
loss = self._compute_loss(prediction, batch_y).mean()
loss = loss / self.update_every
avg_loss += loss.item()

@@ -762,11 +792,13 @@ class Trainer(object):

"""
if self.step % self.update_every == 0:
self.optimizer.step()
self.grad_scaler.step(self.optimizer)
self.grad_scaler.update()

def _data_forward(self, network, x):
x = _build_args(self._forward_func, **x)
y = network(**x)
with self.auto_cast():
y = network(**x)
if not isinstance(y, dict):
raise TypeError(
f"The return value of {_get_func_signature(self._forward_func)} should be dict, got {type(y)}.")
@@ -780,8 +812,22 @@ class Trainer(object):
For PyTorch, just do "loss.backward()"
"""
if (self.step-1) % self.update_every == 0:
self.model.zero_grad()
loss.backward()
self._clear_grad(self.optimizer, self.set_grad_to_none)
self.grad_scaler.scale(loss).backward()

def _clear_grad(self, optimizer, set_to_none=True):
param_groups = optimizer.param_groups
for group in param_groups:
for p in group['params']:
if p.grad is not None:
if set_to_none:
p.grad = None
else:
if p.grad.grad_fn is not None:
p.grad.detach_()
else:
p.grad.requires_grad_(False)
p.grad.zero_()

def _compute_loss(self, predict, truth):
r"""Compute loss given prediction and ground truth.


+ 92
- 11
fastNLP/core/utils.py View File

@@ -12,23 +12,20 @@ import inspect
import os
import warnings
from collections import Counter, namedtuple
from copy import deepcopy
from typing import List

import _pickle
import numpy as np
import torch
import torch.nn as nn
from prettytable import PrettyTable

from ._logger import logger
from ._parallel_utils import _model_contains_inner_module
# from .vocabulary import Vocabulary
import torch
import contextlib
from pkg_resources import parse_version

try:
from apex import amp
except:
amp = None

_CheckRes = namedtuple('_CheckRes', ['missing', 'unused', 'duplicated', 'required', 'all_needed',
'varargs'])
@@ -1032,8 +1029,92 @@ def sub_column(string: str, c: int, c_size: int, title: str) -> str:
return res


def _check_fp16():
if amp is None:
raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
if not torch.backends.cudnn.enabled:
raise RuntimeError("Amp requires cudnn backend to be enabled.")
def _is_function_contains_autocast(func):
"""
检查func是否包含autocast,(1)是否使用了autocast的修饰器或, (2)使用使用with autocast()环境

:param func: 待检查的函数
"""
import re
source = inspect.getsource(func)
lines = source.split('\n')
for line in lines:
line = line.strip()
if re.search(r'@[\w\.]*autocast\(\)', line):
raise RuntimeError("Please do not use `autocast()` decorator, use `with autocast():` instead. Please refer to"
" https://pytorch.org/docs/stable/notes/amp_examples.html#dataparallel-in-a-single-process ")
if re.search(r'with [\w\.]*autocast\(\):', line):
return True
return False


class DummyGradScaler:
"""
用于Dummy pytorch的GradScaler对象,防止重复写大量的if判断

"""
def __init__(self, *args, **kwargs):
pass

def get_scale(self):
return 1.0

def is_enabled(self):
return False

def scale(self, outputs):
return outputs

def step(self, optimizer, *args, **kwargs):
optimizer.step(*args, **kwargs)

def update(self, new_scale=None):
pass

def unscale_(self, optimizer):
pass

def load_state_dict(self, state_dict):
pass

def state_dict(self):
return {}


def _build_fp16_env(dummy=False):
if dummy:
autocast = contextlib.ExitStack
GradScaler = DummyGradScaler
else:
if not torch.cuda.is_available():
raise RuntimeError("No cuda")
if torch.cuda.get_device_capability(0)[0] < 7:
warnings.warn(
"NOTE: your device does NOT support faster training with fp16, "
"please switch to FP32 which is likely to be faster"
)
try:
from torch.cuda.amp import autocast, GradScaler
except ImportError:
raise RuntimeError("torch version too low (less than 1.6)")
return autocast, GradScaler


def _can_use_fp16(device, model, func):
if parse_version(torch.__version__) < parse_version('1.6'):
raise RuntimeError("Pytorch supports float16 after version 1.6, please upgrade your pytorch version.")
model_device = _get_model_device(model)
if device is None and model_device is not None and model_device.type != 'cuda':
raise RuntimeError("You have to run in cuda device to use fp16.")
if isinstance(device, str):
if device=='cpu':
raise RuntimeError("You have to run in cuda device to use fp16.")
if isinstance(device, torch.device) and device.type=='cpu':
raise RuntimeError("You have to run in cuda device to use fp16.")

if (_model_contains_inner_module(model) or (isinstance(device, list) and len(device) > 1)):
# 需要提醒用户
if not _is_function_contains_autocast(func):
raise RuntimeError("When use fp16 in Parallel Training, you have to set autocast() in your forward "
"function as described in "
"https://pytorch.org/docs/stable/notes/amp_examples.html#dataparallel-in-a-single-process")

+ 5
- 5
fastNLP/core/vocabulary.py View File

@@ -125,7 +125,7 @@ class Vocabulary(object):
r"""依次增加序列中词在词典中的出现频率

:param list word_lst: a list of strings
:param bool no_create_entry: 在使用fastNLP.TokenEmbedding加载预训练模型时,没有从预训练词表中找到这个词的处理方式。
:param bool no_create_entry: 如果词语来自于非训练集建议设置为True。在使用fastNLP.TokenEmbedding加载预训练模型时,没有从预训练词表中找到这个词的处理方式。
如果为True,则不会有这个词语创建一个单独的entry,它将一直被指向unk的表示; 如果为False,则为这个词创建一个单独
的entry。如果这个word来自于dev或者test,一般设置为True,如果来自与train一般设置为False。以下两种情况: 如果新
加入一个word,且no_create_entry为True,但这个词之前已经在Vocabulary中且并不是no_create_entry的,则还是会为这
@@ -142,7 +142,7 @@ class Vocabulary(object):
增加一个新词在词典中的出现频率

:param str word: 新词
:param bool no_create_entry: 在使用fastNLP.TokenEmbedding加载预训练模型时,没有从预训练词表中找到这个词的处理方式。
:param bool no_create_entry: 如果词语来自于非训练集建议设置为True。在使用fastNLP.TokenEmbedding加载预训练模型时,没有从预训练词表中找到这个词的处理方式。
如果为True,则不会有这个词语创建一个单独的entry,它将一直被指向unk的表示; 如果为False,则为这个词创建一个单独
的entry。如果这个word来自于dev或者test,一般设置为True,如果来自与train一般设置为False。以下两种情况: 如果新
加入一个word,且no_create_entry为True,但这个词之前已经在Vocabulary中且并不是no_create_entry的,则还是会为这
@@ -175,7 +175,7 @@ class Vocabulary(object):
增加一个新词在词典中的出现频率

:param str word: 新词
:param bool no_create_entry: 在使用fastNLP.TokenEmbedding加载预训练模型时,没有从预训练词表中找到这个词的处理方式。
:param bool no_create_entry: 如果词语来自于非训练集建议设置为True。在使用fastNLP.TokenEmbedding加载预训练模型时,没有从预训练词表中找到这个词的处理方式。
如果为True,则不会有这个词语创建一个单独的entry,它将一直被指向unk的表示; 如果为False,则为这个词创建一个单独
的entry。如果这个word来自于dev或者test,一般设置为True,如果来自与train一般设置为False。以下两种情况: 如果新
加入一个word,且no_create_entry为True,但这个词之前已经在Vocabulary中且并不是no_create_entry的,则还是会为这
@@ -190,7 +190,7 @@ class Vocabulary(object):
依次增加序列中词在词典中的出现频率

:param list[str] word_lst: 词的序列
:param bool no_create_entry: 在使用fastNLP.TokenEmbedding加载预训练模型时,没有从预训练词表中找到这个词的处理方式。
:param bool no_create_entry: 如果词语来自于非训练集建议设置为True。在使用fastNLP.TokenEmbedding加载预训练模型时,没有从预训练词表中找到这个词的处理方式。
如果为True,则不会有这个词语创建一个单独的entry,它将一直被指向unk的表示; 如果为False,则为这个词创建一个单独
的entry。如果这个word来自于dev或者test,一般设置为True,如果来自与train一般设置为False。以下两种情况: 如果新
加入一个word,且no_create_entry为True,但这个词之前已经在Vocabulary中且并不是no_create_entry的,则还是会为这
@@ -344,7 +344,7 @@ class Vocabulary(object):
:param str,List[str] field_name: 可为 ``str`` 或 ``List[str]`` .
构建词典所使用的 field(s), 支持一个或多个field,若有多个 DataSet, 每个DataSet都必须有这些field. 目前支持的field结构
: ``str`` , ``List[str]``
:param no_create_entry_dataset: 可以传入DataSet, List[DataSet]或者None(默认)该选项用在接下来的模型会使用pretrain
:param no_create_entry_dataset: 可以传入DataSet, List[DataSet]或者None(默认), 建议直接将非训练数据都传入到这个参数。该选项用在接下来的模型会使用pretrain
的embedding(包括glove, word2vec, elmo与bert)且会finetune的情况。如果仅使用来自于train的数据建立vocabulary,会导致test与dev
中的数据无法充分利用到来自于预训练embedding的信息,所以在建立词表的时候将test与dev考虑进来会使得最终的结果更好。
如果一个词出现在了train中,但是没在预训练模型中,embedding会为它用unk初始化,但它是单独的一个vector,如果


+ 1
- 1
fastNLP/embeddings/bert_embedding.py View File

@@ -108,7 +108,7 @@ class BertEmbedding(ContextualEmbedding):
self._word_sep_index = vocab['[SEP]']
self._word_cls_index = -100
if '[CLS]' in vocab:
self._word_cls_index = vocab['CLS']
self._word_cls_index = vocab['[CLS]']

min_freq = kwargs.get('min_freq', 1)
self._min_freq = min_freq


+ 3
- 1
fastNLP/embeddings/static_embedding.py View File

@@ -281,7 +281,9 @@ class StaticEmbedding(TokenEmbedding):
if word in vocab:
index = vocab.to_index(word)
if index in matrix:
warnings.warn(f"Word:{word} occurs again in line:{idx}(starts from 0)")
warnings.warn(f"Word has more than one vector in embedding file. Set logger level to "
f"DEBUG for detail.")
logger.debug(f"Word:{word} occurs again in line:{idx}(starts from 0)")
matrix[index] = torch.from_numpy(np.fromstring(' '.join(nums), sep=' ', dtype=dtype, count=dim))
if self.only_norm_found_vector:
matrix[index] = matrix[index] / np.linalg.norm(matrix[index])


+ 53
- 0
fastNLP/models/base_model.py View File

@@ -34,3 +34,56 @@ class NaiveClassifier(BaseModel):
def predict(self, x):
return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}


class NaiveClassifier2(BaseModel):
r"""
一个简单的分类器例子,可用于各种测试
"""

def __init__(self, in_feature_dim, out_feature_dim):
super(NaiveClassifier2, self).__init__()
self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])

def forward(self, x):
return {"predict": self.mlp(x)}

def predict(self, x):
return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}


class NaiveClassifier3(BaseModel):
r"""
一个简单的分类器例子,可用于各种测试
"""

def __init__(self, in_feature_dim, out_feature_dim):
super(NaiveClassifier3, self).__init__()
self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])

@torch.cuda.amp.autocast()
def forward(self, x):
return {"predict": self.mlp(x)}

@torch.cuda.amp.autocast()
def predict(self, x):
return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}


class NaiveClassifier4(BaseModel):
r"""
一个简单的分类器例子,可用于各种测试
"""

def __init__(self, in_feature_dim, out_feature_dim):
super(NaiveClassifier4, self).__init__()
self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])

def forward(self, x):
with torch.cuda.amp.autocast():
return {"predict": self.mlp(x)}


def predict(self, x):
with torch.cuda.amp.autocast():
return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}

+ 8
- 0
tests/core/test_sampler.py View File

@@ -44,3 +44,11 @@ class TestSampler(unittest.TestCase):
indices = sampler(data_set)
self.assertEqual(len(indices), 10)
# 跑通即可,不验证效果

def test_ConstantTokenNumSampler(self):
# 需要check的是,是否在number上是接近的
pass

def test_ConstTokenNumSampler(self):
# 需要check的是,是否可以直接运行
pass

+ 82
- 2
tests/core/test_trainer.py View File

@@ -9,12 +9,12 @@ import torch

from fastNLP import DataSet
from fastNLP import Instance
from fastNLP import BCELoss
from fastNLP import BCELoss, BCEWithLogits
from fastNLP import CrossEntropyLoss
from fastNLP import AccuracyMetric
from fastNLP import SGD
from fastNLP import Trainer
from fastNLP.models.base_model import NaiveClassifier
from fastNLP.models.base_model import NaiveClassifier, NaiveClassifier2, NaiveClassifier3, NaiveClassifier4
from fastNLP import TorchLoaderIter


@@ -575,3 +575,83 @@ class TrainerTestGround(unittest.TestCase):
)
trainer.train()
"""


class Fp16TrainerTest(unittest.TestCase):
def test_raise_error(self):
data_set = prepare_fake_dataset()
data_set.set_input("x", flag=True)
data_set.set_target("y", flag=True)

train_set, dev_set = data_set.split(0.3)

model = NaiveClassifier2(2, 1)

with self.assertRaises(RuntimeError):
trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set,
metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=None,
use_tqdm=True, check_code_level=2, fp16=True)

with self.assertRaises(RuntimeError):
trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set,
metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=None,
use_tqdm=True, check_code_level=2, fp16=True, device='cpu')

with self.assertRaises(RuntimeError):
trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set,
metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=None,
use_tqdm=True, check_code_level=2, fp16=True, device=torch.device('cpu'))

@unittest.skipIf(torch.cuda.is_available()==False, "Skip when no cuda device detch")
def test_run_fp16(self):
data_set = prepare_fake_dataset()
data_set.set_input("x", flag=True)
data_set.set_target("y", flag=True)

train_set, dev_set = data_set.split(0.3)

model = NaiveClassifier2(2, 1)
trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set,
metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=None,
use_tqdm=True, check_code_level=2, fp16=True, device=0)
trainer.train(load_best_model=False)

model = NaiveClassifier2(2, 1)
trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set,
metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=None,
use_tqdm=True, check_code_level=2, fp16=True, device=0, test_use_fp16=False)
trainer.train(load_best_model=False)

@unittest.skipIf(torch.cuda.device_count()<2, "Skip when lower than 1 gpus.")
def test_run_data_parallel(self):
data_set = prepare_fake_dataset()
data_set.set_input("x", flag=True)
data_set.set_target("y", flag=True)

train_set, dev_set = data_set.split(0.3)

model = NaiveClassifier2(2, 1)
with self.assertRaises(RuntimeError):
trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set,
metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=None,
use_tqdm=True, check_code_level=2, fp16=True, device=[0, 1])

with self.assertRaises(RuntimeError):
model = NaiveClassifier3(2, 1)
trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set,
metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=None,
use_tqdm=True, check_code_level=2, fp16=True, device=[0, 1], test_use_fp16=True)

model = NaiveClassifier4(2, 1)
trainer = Trainer(train_set, model, optimizer=SGD(lr=0.1), loss=BCEWithLogits(pred="predict", target="y"),
batch_size=32, n_epochs=10, print_every=50, dev_data=dev_set,
metrics=AccuracyMetric(pred="predict", target="y"), validate_every=-1, save_path=None,
use_tqdm=True, check_code_level=2, fp16=True, device=[0, 1], test_use_fp16=True)
trainer.train(load_best_model=False)

Loading…
Cancel
Save