@@ -93,9 +93,13 @@ class DataSetGetter: | |||||
class SamplerAdapter(torch.utils.data.Sampler): | class SamplerAdapter(torch.utils.data.Sampler): | ||||
def __init__(self, sampler, dataset): | def __init__(self, sampler, dataset): | ||||
super().__init__(dataset) | |||||
self.sampler = sampler | self.sampler = sampler | ||||
self.dataset = dataset | self.dataset = dataset | ||||
def __len__(self): | |||||
return len(self.dataset) | |||||
def __iter__(self): | def __iter__(self): | ||||
return iter(self.sampler(self.dataset)) | return iter(self.sampler(self.dataset)) | ||||
@@ -165,15 +169,19 @@ class DataSetIter(BatchIter): | |||||
timeout=0, worker_init_fn=None): | timeout=0, worker_init_fn=None): | ||||
super().__init__() | super().__init__() | ||||
assert isinstance(dataset, DataSet) | assert isinstance(dataset, DataSet) | ||||
sampler = SamplerAdapter(sampler=sampler or SequentialSampler(), dataset=dataset) | |||||
if not isinstance(sampler, torch.utils.data.Sampler): | |||||
self.sampler = SamplerAdapter(sampler=sampler or SequentialSampler(), dataset=dataset) | |||||
else: | |||||
self.sampler = sampler | |||||
dataset = DataSetGetter(dataset, as_numpy) | dataset = DataSetGetter(dataset, as_numpy) | ||||
collate_fn = dataset.collate_fn if hasattr(dataset, 'collate_fn') else None | collate_fn = dataset.collate_fn if hasattr(dataset, 'collate_fn') else None | ||||
self.dataiter = torch.utils.data.DataLoader( | self.dataiter = torch.utils.data.DataLoader( | ||||
dataset=dataset, batch_size=batch_size, sampler=sampler, | |||||
dataset=dataset, batch_size=batch_size, sampler=self.sampler, | |||||
collate_fn=collate_fn, num_workers=num_workers, | collate_fn=collate_fn, num_workers=num_workers, | ||||
pin_memory=pin_memory, drop_last=drop_last, | pin_memory=pin_memory, drop_last=drop_last, | ||||
timeout=timeout, worker_init_fn=worker_init_fn) | timeout=timeout, worker_init_fn=worker_init_fn) | ||||
self.num_batches = self.get_num_batches(len(dataset), batch_size, drop_last) | |||||
# 以sampler的数量为准,因为DistributedSampler的时候每个进程上并不是所有的数据都用上了 | |||||
self.num_batches = self.get_num_batches(len(self.dataiter.sampler), batch_size, drop_last) | |||||
self.batch_size = batch_size | self.batch_size = batch_size | ||||
@@ -182,7 +190,7 @@ class TorchLoaderIter(BatchIter): | |||||
super().__init__() | super().__init__() | ||||
assert isinstance(dataset, torch.utils.data.DataLoader) | assert isinstance(dataset, torch.utils.data.DataLoader) | ||||
self.dataiter = dataset | self.dataiter = dataset | ||||
self.num_batches = self.get_num_batches(len(dataset), dataset.batch_size, dataset.drop_last) | |||||
self.num_batches = self.get_num_batches(len(dataset.sampler), dataset.batch_size, dataset.drop_last) | |||||
self.batch_size = dataset.batch_size | self.batch_size = dataset.batch_size | ||||
@@ -479,7 +479,7 @@ class FitlogCallback(Callback): | |||||
self.datasets[key] = value | self.datasets[key] = value | ||||
elif isinstance(data, DataSet): | elif isinstance(data, DataSet): | ||||
self.datasets['test'] = data | self.datasets['test'] = data | ||||
else: | |||||
elif data is not None: | |||||
raise TypeError("data receives dict[DataSet] or DataSet object.") | raise TypeError("data receives dict[DataSet] or DataSet object.") | ||||
self.verbose = verbose | self.verbose = verbose | ||||
@@ -487,7 +487,7 @@ class DataSet(object): | |||||
""" | """ | ||||
删除第index个instance | 删除第index个instance | ||||
:param int index: 需要删除的instance的index,从0开始 | |||||
:param int index: 需要删除的instance的index,序号从0开始。 | |||||
""" | """ | ||||
assert isinstance(index, int), "Only integer supported." | assert isinstance(index, int), "Only integer supported." | ||||
if len(self) <= index: | if len(self) <= index: | ||||
@@ -566,7 +566,7 @@ class DataSet(object): | |||||
raise KeyError("DataSet has no field named {}.".format(old_name)) | raise KeyError("DataSet has no field named {}.".format(old_name)) | ||||
return self | return self | ||||
def set_target(self, *field_names, flag=True): | |||||
def set_target(self, *field_names, flag=True, use_1st_ins_infer_dim_type=True): | |||||
""" | """ | ||||
将field_names的field设置为target | 将field_names的field设置为target | ||||
@@ -577,11 +577,14 @@ class DataSet(object): | |||||
:param str field_names: field的名称 | :param str field_names: field的名称 | ||||
:param bool flag: 将field_name的target状态设置为flag | :param bool flag: 将field_name的target状态设置为flag | ||||
:param bool use_1st_ins_infer_dim_type: 如果为True,将不会check该列是否所有数据都是同样的维度,同样的类型。将直接使用第一 | |||||
行的数据进行类型和维度推断本列的数据的类型和维度。 | |||||
""" | """ | ||||
assert isinstance(flag, bool), "Only bool type supported." | assert isinstance(flag, bool), "Only bool type supported." | ||||
for name in field_names: | for name in field_names: | ||||
if name in self.field_arrays: | if name in self.field_arrays: | ||||
try: | try: | ||||
self.field_arrays[name]._use_1st_ins_infer_dim_type = bool(use_1st_ins_infer_dim_type) | |||||
self.field_arrays[name].is_target = flag | self.field_arrays[name].is_target = flag | ||||
except SetInputOrTargetException as e: | except SetInputOrTargetException as e: | ||||
print(f"Cannot set field:{name} as target.") | print(f"Cannot set field:{name} as target.") | ||||
@@ -589,7 +592,7 @@ class DataSet(object): | |||||
else: | else: | ||||
raise KeyError("{} is not a valid field name.".format(name)) | raise KeyError("{} is not a valid field name.".format(name)) | ||||
def set_input(self, *field_names, flag=True): | |||||
def set_input(self, *field_names, flag=True, use_1st_ins_infer_dim_type=True): | |||||
""" | """ | ||||
将field_names的field设置为input:: | 将field_names的field设置为input:: | ||||
@@ -598,10 +601,13 @@ class DataSet(object): | |||||
:param str field_names: field的名称 | :param str field_names: field的名称 | ||||
:param bool flag: 将field_name的input状态设置为flag | :param bool flag: 将field_name的input状态设置为flag | ||||
:param bool use_1st_ins_infer_dim_type: 如果为True,将不会check该列是否所有数据都是同样的维度,同样的类型。将直接使用第一 | |||||
行的数据进行类型和维度推断本列的数据的类型和维度。 | |||||
""" | """ | ||||
for name in field_names: | for name in field_names: | ||||
if name in self.field_arrays: | if name in self.field_arrays: | ||||
try: | try: | ||||
self.field_arrays[name]._use_1st_ins_infer_dim_type = bool(use_1st_ins_infer_dim_type) | |||||
self.field_arrays[name].is_input = flag | self.field_arrays[name].is_input = flag | ||||
except SetInputOrTargetException as e: | except SetInputOrTargetException as e: | ||||
print(f"Cannot set field:{name} as input, exception happens at the {e.index} value.") | print(f"Cannot set field:{name} as input, exception happens at the {e.index} value.") | ||||
@@ -225,7 +225,7 @@ class CrossEntropyLoss(LossBase): | |||||
def get_loss(self, pred, target, seq_len=None): | def get_loss(self, pred, target, seq_len=None): | ||||
if pred.dim() > 2: | if pred.dim() > 2: | ||||
if pred.size(1) != target.size(1): | |||||
if pred.size(1) != target.size(1): # 有可能顺序替换了 | |||||
pred = pred.transpose(1, 2) | pred = pred.transpose(1, 2) | ||||
pred = pred.reshape(-1, pred.size(-1)) | pred = pred.reshape(-1, pred.size(-1)) | ||||
target = target.reshape(-1) | target = target.reshape(-1) | ||||
@@ -49,7 +49,7 @@ class NullOptimizer(Optimizer): | |||||
super().__init__(None) | super().__init__(None) | ||||
def construct_from_pytorch(self, model_params): | def construct_from_pytorch(self, model_params): | ||||
pass | |||||
return self | |||||
def __getattr__(self, item): | def __getattr__(self, item): | ||||
def pass_func(*args, **kwargs): | def pass_func(*args, **kwargs): | ||||
@@ -25,9 +25,9 @@ class Sampler(object): | |||||
def __call__(self, data_set): | def __call__(self, data_set): | ||||
""" | """ | ||||
:param DataSet data_set: `DataSet` 对象, 需要Sample的数据 | |||||
:return result: list(int) 其中元素的下标序列, ``data_set`` 中元素会按 ``result`` 中顺序取出 | |||||
""" | |||||
:param DataSet data_set: `DataSet` 对象, 需要Sample的数据 | |||||
:return result: list(int) 其中元素的下标序列, ``data_set`` 中元素会按 ``result`` 中顺序取出 | |||||
""" | |||||
raise NotImplementedError | raise NotImplementedError | ||||
@@ -47,6 +47,7 @@ from .utils import _get_func_signature | |||||
from .utils import _get_model_device | from .utils import _get_model_device | ||||
from .utils import _move_model_to_device | from .utils import _move_model_to_device | ||||
from ._parallel_utils import _data_parallel_wrapper | from ._parallel_utils import _data_parallel_wrapper | ||||
from .utils import _model_contains_inner_module | |||||
from functools import partial | from functools import partial | ||||
__all__ = [ | __all__ = [ | ||||
@@ -83,9 +84,7 @@ class Tester(object): | |||||
def __init__(self, data, model, metrics, batch_size=16, num_workers=0, device=None, verbose=1): | def __init__(self, data, model, metrics, batch_size=16, num_workers=0, device=None, verbose=1): | ||||
super(Tester, self).__init__() | super(Tester, self).__init__() | ||||
if not isinstance(data, DataSet): | |||||
raise TypeError(f"The type of data must be `fastNLP.DataSet`, got `{type(data)}`.") | |||||
if not isinstance(model, nn.Module): | if not isinstance(model, nn.Module): | ||||
raise TypeError(f"The type of model must be `torch.nn.Module`, got `{type(model)}`.") | raise TypeError(f"The type of model must be `torch.nn.Module`, got `{type(model)}`.") | ||||
@@ -106,19 +105,22 @@ class Tester(object): | |||||
# check predict | # check predict | ||||
if (hasattr(self._model, 'predict') and callable(self._model.predict)) or \ | if (hasattr(self._model, 'predict') and callable(self._model.predict)) or \ | ||||
(isinstance(self._model, nn.DataParallel) and hasattr(self._model.module, 'predict') and | |||||
callable(self._model.module.predict)): | |||||
(_model_contains_inner_module(self._model) and hasattr(self._model.module, 'predict') and | |||||
callable(self._model.module.predict)): | |||||
if isinstance(self._model, nn.DataParallel): | if isinstance(self._model, nn.DataParallel): | ||||
self._predict_func_wrapper = partial(_data_parallel_wrapper('predict', | self._predict_func_wrapper = partial(_data_parallel_wrapper('predict', | ||||
self._model.device_ids, | self._model.device_ids, | ||||
self._model.output_device), | self._model.output_device), | ||||
network=self._model.module) | network=self._model.module) | ||||
self._predict_func = self._model.module.predict # 用于匹配参数 | |||||
elif isinstance(self._model, nn.parallel.DistributedDataParallel): | |||||
self._predict_func = self._model.module.predict | self._predict_func = self._model.module.predict | ||||
self._predict_func_wrapper = self._model.module.predict # 用于调用 | |||||
else: | else: | ||||
self._predict_func = self._model.predict | self._predict_func = self._model.predict | ||||
self._predict_func_wrapper = self._model.predict | self._predict_func_wrapper = self._model.predict | ||||
else: | else: | ||||
if isinstance(self._model, nn.DataParallel): | |||||
if _model_contains_inner_module(model): | |||||
self._predict_func_wrapper = self._model.forward | self._predict_func_wrapper = self._model.forward | ||||
self._predict_func = self._model.module.forward | self._predict_func = self._model.module.forward | ||||
else: | else: | ||||
@@ -352,7 +352,7 @@ from .utils import _move_dict_value_to_device | |||||
from .utils import _get_func_signature | from .utils import _get_func_signature | ||||
from .utils import _get_model_device | from .utils import _get_model_device | ||||
from .utils import _move_model_to_device | from .utils import _move_model_to_device | ||||
from .utils import _model_contains_inner_module | |||||
class Trainer(object): | class Trainer(object): | ||||
""" | """ | ||||
@@ -389,8 +389,8 @@ class Trainer(object): | |||||
要指定以哪个指标为准。另外有些指标是越小效果越好,比如语言模型的困惑度,这种情况下,在key前面增加一个'-'来表 | 要指定以哪个指标为准。另外有些指标是越小效果越好,比如语言模型的困惑度,这种情况下,在key前面增加一个'-'来表 | ||||
明验证时,值越小越好(比如: "-ppl")。仅在传入dev_data时有效。 | 明验证时,值越小越好(比如: "-ppl")。仅在传入dev_data时有效。 | ||||
:param int validate_every: 多少个step在验证集上验证一次; 如果为-1,则每个epoch结束验证一次。仅在传入dev_data时有效。 | :param int validate_every: 多少个step在验证集上验证一次; 如果为-1,则每个epoch结束验证一次。仅在传入dev_data时有效。 | ||||
:param str,None save_path: 将模型保存路径。如果为None,则不保存模型。如果dev_data为None,则保存最后一次迭代的模型。 | |||||
保存的时候不仅保存了参数,还保存了模型结构。即便使用DataParallel,这里也只保存模型。 | |||||
:param str,None save_path: 将模型保存路径,如果路径不存在,将自动创建文件夹。如果为None,则不保存模型。如果dev_data为None,则保存 | |||||
最后一次迭代的模型。保存的时候不仅保存了参数,还保存了模型结构。即便使用DataParallel,这里也只保存模型。 | |||||
:param bool use_tqdm: 是否使用tqdm来显示训练进度; 如果为False,则将loss打印在终端中。 | :param bool use_tqdm: 是否使用tqdm来显示训练进度; 如果为False,则将loss打印在终端中。 | ||||
:param str,int,torch.device,list(int) device: 将模型load到哪个设备。默认为None,即Trainer不对模型 | :param str,int,torch.device,list(int) device: 将模型load到哪个设备。默认为None,即Trainer不对模型 | ||||
的计算位置进行管理。支持以下的输入: | 的计算位置进行管理。支持以下的输入: | ||||
@@ -440,7 +440,7 @@ class Trainer(object): | |||||
# check update every | # check update every | ||||
assert update_every >= 1, "update_every must be no less than 1." | assert update_every >= 1, "update_every must be no less than 1." | ||||
self.update_every = int(update_every) | self.update_every = int(update_every) | ||||
# check save_path | # check save_path | ||||
if not (save_path is None or isinstance(save_path, str)): | if not (save_path is None or isinstance(save_path, str)): | ||||
raise ValueError("save_path can only be None or `str`.") | raise ValueError("save_path can only be None or `str`.") | ||||
@@ -458,30 +458,69 @@ class Trainer(object): | |||||
self.metric_key = None | self.metric_key = None | ||||
# prepare loss | # prepare loss | ||||
losser = _prepare_losser(loss) | losser = _prepare_losser(loss) | ||||
# sampler check | |||||
if sampler is not None and not isinstance(sampler, Sampler): | |||||
raise ValueError("The type of sampler should be fastNLP.BaseSampler, got {}.".format(type(sampler))) | |||||
if sampler is None: | |||||
sampler = RandomSampler() | |||||
elif hasattr(sampler, 'set_batch_size'): | |||||
sampler.set_batch_size(batch_size) | |||||
if isinstance(train_data, BatchIter): | |||||
if sampler is not None: | |||||
warnings.warn("sampler is ignored when train_data is a BatchIter.") | |||||
if num_workers>0: | |||||
warnings.warn("num_workers is ignored when train_data is BatchIter.") | |||||
if drop_last: | |||||
warnings.warn("drop_last is ignored when train_data is BatchIter.") | |||||
if isinstance(model, nn.parallel.DistributedDataParallel): # 如果是分布式的 | |||||
# device为None | |||||
if device is not None: | |||||
warnings.warn("device is ignored when model is nn.parallel.DistributedDataParallel.") | |||||
device = None | |||||
# Sampler要是分布式的 | |||||
if sampler is None: | |||||
sampler = torch.utils.data.DistributedSampler(train_data) | |||||
elif not isinstance(sampler, torch.utils.data.DistributedSampler): | |||||
raise TypeError("When using nn.parallel.DistributedDataParallel, " | |||||
"sampler must be None or torch.utils.data.DistributedSampler.") | |||||
# 不能保存模型 | |||||
if save_path: | |||||
raise RuntimeError("Saving model in Distributed situation is not allowed right now.") | |||||
else: | |||||
# sampler check | |||||
if sampler is not None and not isinstance(sampler, (Sampler, torch.utils.data.Sampler)): | |||||
raise ValueError(f"The type of sampler should be fastNLP.BaseSampler or pytorch's Sampler, got {type(sampler)}") | |||||
if sampler is None: | |||||
sampler = RandomSampler() | |||||
elif hasattr(sampler, 'set_batch_size'): | |||||
sampler.set_batch_size(batch_size) | |||||
if isinstance(train_data, DataSet): | if isinstance(train_data, DataSet): | ||||
self.data_iterator = DataSetIter( | self.data_iterator = DataSetIter( | ||||
dataset=train_data, batch_size=batch_size, num_workers=num_workers, sampler=sampler, drop_last=drop_last) | dataset=train_data, batch_size=batch_size, num_workers=num_workers, sampler=sampler, drop_last=drop_last) | ||||
elif isinstance(train_data, BatchIter): | elif isinstance(train_data, BatchIter): | ||||
self.data_iterator = train_data | self.data_iterator = train_data | ||||
train_data = train_data.dataset | |||||
else: | else: | ||||
raise TypeError("train_data type {} not support".format(type(train_data))) | raise TypeError("train_data type {} not support".format(type(train_data))) | ||||
if check_code_level > -1 and isinstance(self.data_iterator, DataSetIter): | |||||
_check_code(dataset=train_data, model=model, losser=losser, metrics=metrics, dev_data=dev_data, | |||||
metric_key=self.metric_key, check_level=check_code_level, | |||||
batch_size=min(batch_size, DEFAULT_CHECK_BATCH_SIZE)) | |||||
# _check_code 是 fastNLP 帮助你检查代码是否正确的方法 。如果你在错误栈中看到这行注释,请认真检查你的代码 | |||||
self.model = _move_model_to_device(model, device=device) | self.model = _move_model_to_device(model, device=device) | ||||
if _model_contains_inner_module(self.model): | |||||
self._forward_func = self.model.module.forward | |||||
else: | |||||
self._forward_func = self.model.forward | |||||
if check_code_level > -1: | |||||
# _check_code 是 fastNLP 帮助你检查代码是否正确的方法 。如果你在错误栈中看到这行注释,请认真检查你的field名与模型的输入 | |||||
# 名是否匹配 | |||||
dev_dataset = dev_data | |||||
if isinstance(dev_data, BatchIter): | |||||
dev_dataset = None | |||||
warnings.warn("dev_data is of BatchIter type, ignore validation checking.") | |||||
check_batch_size = min(batch_size, DEFAULT_CHECK_BATCH_SIZE) | |||||
if isinstance(self.model, nn.DataParallel): | |||||
_num_devices = len(self.model.device_ids) | |||||
if batch_size//_num_devices>1: # 如果多卡是每个卡可以分多个数据的,则用每个卡给两个sample | |||||
check_batch_size = max(len(self.model.device_ids)*2, check_batch_size) | |||||
else: | |||||
check_batch_size = max(len(self.model.device_ids), check_batch_size) | |||||
_check_code(dataset=train_data, model=self.model, losser=losser, forward_func=self._forward_func, metrics=metrics, | |||||
dev_data=dev_dataset, metric_key=self.metric_key, check_level=check_code_level, | |||||
batch_size=check_batch_size) | |||||
self.train_data = train_data | self.train_data = train_data | ||||
self.dev_data = dev_data # If None, No validation. | self.dev_data = dev_data # If None, No validation. | ||||
@@ -496,8 +535,7 @@ class Trainer(object): | |||||
self.best_dev_epoch = None | self.best_dev_epoch = None | ||||
self.best_dev_step = None | self.best_dev_step = None | ||||
self.best_dev_perf = None | self.best_dev_perf = None | ||||
self.n_steps = (len(self.train_data) // self.batch_size + int( | |||||
len(self.train_data) % self.batch_size != 0)) * int(drop_last==0) * self.n_epochs | |||||
self.n_steps = len(self.data_iterator) * self.n_epochs | |||||
if isinstance(optimizer, torch.optim.Optimizer): | if isinstance(optimizer, torch.optim.Optimizer): | ||||
self.optimizer = optimizer | self.optimizer = optimizer | ||||
@@ -600,10 +638,6 @@ class Trainer(object): | |||||
self.step = 0 | self.step = 0 | ||||
self.epoch = 0 | self.epoch = 0 | ||||
start = time.time() | start = time.time() | ||||
if isinstance(self.model, nn.DataParallel): | |||||
self._forward_func = self.model.module.forward | |||||
else: | |||||
self._forward_func = self.model.forward | |||||
with inner_tqdm(total=self.n_steps, postfix='loss:{0:<6.5f}', leave=False, dynamic_ncols=True) as pbar: | with inner_tqdm(total=self.n_steps, postfix='loss:{0:<6.5f}', leave=False, dynamic_ncols=True) as pbar: | ||||
self.pbar = pbar | self.pbar = pbar | ||||
avg_loss = 0 | avg_loss = 0 | ||||
@@ -745,7 +779,7 @@ class Trainer(object): | |||||
model_path = os.path.join(self.save_path, model_name) | model_path = os.path.join(self.save_path, model_name) | ||||
if not os.path.exists(self.save_path): | if not os.path.exists(self.save_path): | ||||
os.makedirs(self.save_path, exist_ok=True) | os.makedirs(self.save_path, exist_ok=True) | ||||
if isinstance(model, nn.DataParallel): | |||||
if _model_contains_inner_module(model): | |||||
model = model.module | model = model.module | ||||
if only_param: | if only_param: | ||||
state_dict = model.state_dict() | state_dict = model.state_dict() | ||||
@@ -765,7 +799,7 @@ class Trainer(object): | |||||
states = torch.load(model_path) | states = torch.load(model_path) | ||||
else: | else: | ||||
states = torch.load(model_path).state_dict() | states = torch.load(model_path).state_dict() | ||||
if isinstance(model, nn.DataParallel): | |||||
if _model_contains_inner_module(model): | |||||
model.module.load_state_dict(states) | model.module.load_state_dict(states) | ||||
else: | else: | ||||
model.load_state_dict(states) | model.load_state_dict(states) | ||||
@@ -823,12 +857,10 @@ def _get_value_info(_dict): | |||||
from numbers import Number | from numbers import Number | ||||
from .batch import _to_tensor | from .batch import _to_tensor | ||||
def _check_code(dataset, model, losser, metrics, batch_size=DEFAULT_CHECK_BATCH_SIZE, | |||||
dev_data=None, metric_key=None, | |||||
check_level=0): | |||||
def _check_code(dataset, model, losser, metrics, forward_func, batch_size=DEFAULT_CHECK_BATCH_SIZE, | |||||
dev_data=None, metric_key=None, check_level=0): | |||||
# check get_loss 方法 | # check get_loss 方法 | ||||
model_devcie = _get_model_device(model=model) | |||||
model_device = _get_model_device(model=model) | |||||
def _iter(): | def _iter(): | ||||
start_idx = 0 | start_idx = 0 | ||||
while start_idx<len(dataset): | while start_idx<len(dataset): | ||||
@@ -849,7 +881,7 @@ def _check_code(dataset, model, losser, metrics, batch_size=DEFAULT_CHECK_BATCH_ | |||||
start_idx += batch_size | start_idx += batch_size | ||||
for batch_count, (batch_x, batch_y) in enumerate(_iter()): | for batch_count, (batch_x, batch_y) in enumerate(_iter()): | ||||
_move_dict_value_to_device(batch_x, batch_y, device=model_devcie) | |||||
_move_dict_value_to_device(batch_x, batch_y, device=model_device) | |||||
# forward check | # forward check | ||||
if batch_count == 0: | if batch_count == 0: | ||||
info_str = "" | info_str = "" | ||||
@@ -868,15 +900,11 @@ def _check_code(dataset, model, losser, metrics, batch_size=DEFAULT_CHECK_BATCH_ | |||||
else: | else: | ||||
info_str += 'There is no target field.' | info_str += 'There is no target field.' | ||||
print(info_str) | print(info_str) | ||||
_check_forward_error(forward_func=model.forward, dataset=dataset, | |||||
_check_forward_error(forward_func=forward_func, dataset=dataset, | |||||
batch_x=batch_x, check_level=check_level) | batch_x=batch_x, check_level=check_level) | ||||
if isinstance(model, nn.DataParallel): | |||||
forward_func = model.module.forward | |||||
else: | |||||
forward_func = model.forward | |||||
refined_batch_x = _build_args(forward_func, **batch_x) | refined_batch_x = _build_args(forward_func, **batch_x) | ||||
pred_dict = model(**refined_batch_x) | pred_dict = model(**refined_batch_x) | ||||
func_signature = _get_func_signature(model.forward) | |||||
func_signature = _get_func_signature(forward_func) | |||||
if not isinstance(pred_dict, dict): | if not isinstance(pred_dict, dict): | ||||
raise TypeError(f"The return value of {func_signature} should be `dict`, not `{type(pred_dict)}`.") | raise TypeError(f"The return value of {func_signature} should be `dict`, not `{type(pred_dict)}`.") | ||||
@@ -896,7 +924,7 @@ def _check_code(dataset, model, losser, metrics, batch_size=DEFAULT_CHECK_BATCH_ | |||||
loss.backward() | loss.backward() | ||||
except _CheckError as e: | except _CheckError as e: | ||||
# TODO: another error raised if _CheckError caught | # TODO: another error raised if _CheckError caught | ||||
pre_func_signature = _get_func_signature(model.forward) | |||||
pre_func_signature = _get_func_signature(forward_func) | |||||
_check_loss_evaluate(prev_func_signature=pre_func_signature, func_signature=e.func_signature, | _check_loss_evaluate(prev_func_signature=pre_func_signature, func_signature=e.func_signature, | ||||
check_res=e.check_res, pred_dict=pred_dict, target_dict=batch_y, | check_res=e.check_res, pred_dict=pred_dict, target_dict=batch_y, | ||||
dataset=dataset, check_level=check_level) | dataset=dataset, check_level=check_level) | ||||
@@ -62,7 +62,6 @@ def _prepare_cache_filepath(filepath): | |||||
os.makedirs(cache_dir) | os.makedirs(cache_dir) | ||||
# TODO 可以保存下缓存时的参数,如果load的时候发现参数不一致,发出警告。 | |||||
def cache_results(_cache_fp, _refresh=False, _verbose=1): | def cache_results(_cache_fp, _refresh=False, _verbose=1): | ||||
""" | """ | ||||
别名::class:`fastNLP.cache_results` :class:`fastNLP.core.uitls.cache_results` | 别名::class:`fastNLP.cache_results` :class:`fastNLP.core.uitls.cache_results` | ||||
@@ -188,49 +187,17 @@ def _save_model(model, model_name, save_dir, only_param=False): | |||||
torch.save(model, model_path) | torch.save(model, model_path) | ||||
model.to(_model_device) | model.to(_model_device) | ||||
def _model_contains_inner_module(model): | |||||
""" | |||||
# def save_pickle(obj, pickle_path, file_name): | |||||
# """Save an object into a pickle file. | |||||
# | |||||
# :param obj: an object | |||||
# :param pickle_path: str, the directory where the pickle file is to be saved | |||||
# :param file_name: str, the name of the pickle file. In general, it should be ended by "pkl". | |||||
# """ | |||||
# if not os.path.exists(pickle_path): | |||||
# os.mkdir(pickle_path) | |||||
# print("make dir {} before saving pickle file".format(pickle_path)) | |||||
# with open(os.path.join(pickle_path, file_name), "wb") as f: | |||||
# _pickle.dump(obj, f) | |||||
# print("{} saved in {}".format(file_name, pickle_path)) | |||||
# | |||||
# | |||||
# def load_pickle(pickle_path, file_name): | |||||
# """Load an object from a given pickle file. | |||||
# | |||||
# :param pickle_path: str, the directory where the pickle file is. | |||||
# :param file_name: str, the name of the pickle file. | |||||
# :return obj: an object stored in the pickle | |||||
# """ | |||||
# with open(os.path.join(pickle_path, file_name), "rb") as f: | |||||
# obj = _pickle.load(f) | |||||
# print("{} loaded from {}".format(file_name, pickle_path)) | |||||
# return obj | |||||
# | |||||
# | |||||
# def pickle_exist(pickle_path, pickle_name): | |||||
# """Check if a given pickle file exists in the directory. | |||||
# | |||||
# :param pickle_path: the directory of target pickle file | |||||
# :param pickle_name: the filename of target pickle file | |||||
# :return: True if file exists else False | |||||
# """ | |||||
# if not os.path.exists(pickle_path): | |||||
# os.makedirs(pickle_path) | |||||
# file_name = os.path.join(pickle_path, pickle_name) | |||||
# if os.path.exists(file_name): | |||||
# return True | |||||
# else: | |||||
# return False | |||||
:param nn.Module model: 模型文件,判断是否内部包含model.module, 多用于check模型是否是nn.DataParallel, | |||||
nn.parallel.DistributedDataParallel。主要是在做形参匹配的时候需要使用最内部的model的function。 | |||||
:return: bool | |||||
""" | |||||
if isinstance(model, nn.Module): | |||||
if isinstance(model, (nn.DataParallel, nn.parallel.DistributedDataParallel)): | |||||
return True | |||||
return False | |||||
def _move_model_to_device(model, device): | def _move_model_to_device(model, device): | ||||
""" | """ | ||||
@@ -254,8 +221,8 @@ def _move_model_to_device(model, device): | |||||
:return: torch.nn.DataParallel or torch.nn.Module | :return: torch.nn.DataParallel or torch.nn.Module | ||||
""" | """ | ||||
if isinstance(model, torch.nn.parallel.DistributedDataParallel): | |||||
raise RuntimeError("model of `torch.nn.parallel.DistributedDataParallel` is not supported right now.") | |||||
# if isinstance(model, torch.nn.parallel.DistributedDataParallel): | |||||
# raise RuntimeError("model of `torch.nn.parallel.DistributedDataParallel` is not supported right now.") | |||||
if device is None: | if device is None: | ||||
if isinstance(model, torch.nn.DataParallel): | if isinstance(model, torch.nn.DataParallel): | ||||
@@ -352,7 +319,6 @@ def _map_args(maps: dict, **kwargs): | |||||
output.update({name: val}) | output.update({name: val}) | ||||
for keys in maps.keys(): | for keys in maps.keys(): | ||||
if keys not in output.keys(): | if keys not in output.keys(): | ||||
# TODO: add UNUSED warning. | |||||
pass | pass | ||||
return output | return output | ||||
@@ -570,18 +536,6 @@ def _check_loss_evaluate(prev_func_signature: str, func_signature: str, check_re | |||||
else: | else: | ||||
_tmp = f'Provide `{_miss}` in DataSet or output of {prev_func_signature}.' | _tmp = f'Provide `{_miss}` in DataSet or output of {prev_func_signature}.' | ||||
suggestions.append(_tmp) | suggestions.append(_tmp) | ||||
# for _miss in unmapped_missing: | |||||
# if _miss in dataset: | |||||
# suggestions.append(f"Set `{_miss}` as target.") | |||||
# else: | |||||
# _tmp = '' | |||||
# if check_res.unused: | |||||
# _tmp = f"Specify your assignment for `{input_func_map.get(_miss, _miss)}` when initialize {module_name}." | |||||
# if _tmp: | |||||
# _tmp += f' Or provide `{_miss}` in DataSet or output of {prev_func_signature}.' | |||||
# else: | |||||
# _tmp = f'Provide `{_miss}` in output of {prev_func_signature} or DataSet.' | |||||
# suggestions.append(_tmp) | |||||
if check_res.duplicated: | if check_res.duplicated: | ||||
errs.append(f"\tduplicated param: {check_res.duplicated}.") | errs.append(f"\tduplicated param: {check_res.duplicated}.") | ||||
@@ -37,8 +37,8 @@ class BertEmbedding(ContextualEmbedding): | |||||
:param ~fastNLP.Vocabulary vocab: 词表 | :param ~fastNLP.Vocabulary vocab: 词表 | ||||
:param str model_dir_or_name: 模型所在目录或者模型的名称。当传入模型所在目录时,目录中应该包含一个词表文件(以.txt作为后缀名), | :param str model_dir_or_name: 模型所在目录或者模型的名称。当传入模型所在目录时,目录中应该包含一个词表文件(以.txt作为后缀名), | ||||
权重文件(以.bin作为文件后缀名), 配置文件(以.json作为后缀名)。 | 权重文件(以.bin作为文件后缀名), 配置文件(以.json作为后缀名)。 | ||||
:param str layers: 输出embedding表示来自于哪些层,不同层的结果按照layers中的顺序在最后一维concat起来。以','隔开层数,可以以负数 | |||||
去索引倒数几层。 | |||||
:param str layers: 输出embedding表示来自于哪些层,不同层的结果按照layers中的顺序在最后一维concat起来。以','隔开层数,层的序号是 | |||||
从0开始,可以以负数去索引倒数几层。 | |||||
:param str pool_method: 因为在bert中,每个word会被表示为多个word pieces, 当获取一个word的表示的时候,怎样从它的word pieces | :param str pool_method: 因为在bert中,每个word会被表示为多个word pieces, 当获取一个word的表示的时候,怎样从它的word pieces | ||||
中计算得到它对应的表示。支持 ``last`` , ``first`` , ``avg`` , ``max``。 | 中计算得到它对应的表示。支持 ``last`` , ``first`` , ``avg`` , ``max``。 | ||||
:param float word_dropout: 以多大的概率将一个词替换为unk。这样既可以训练unk也是一定的regularize。 | :param float word_dropout: 以多大的概率将一个词替换为unk。这样既可以训练unk也是一定的regularize。 | ||||
@@ -334,7 +334,7 @@ class _WordBertModel(nn.Module): | |||||
start, end = batch_word_pieces_cum_length[i, j], batch_word_pieces_cum_length[i, j+1] | start, end = batch_word_pieces_cum_length[i, j], batch_word_pieces_cum_length[i, j+1] | ||||
outputs[l_index, i, j+s_shift] = torch.mean(truncate_output_layer[i, start:end], dim=-2) | outputs[l_index, i, j+s_shift] = torch.mean(truncate_output_layer[i, start:end], dim=-2) | ||||
if self.include_cls_sep: | if self.include_cls_sep: | ||||
if l==len(bert_outputs) and self.pooled_cls: | |||||
if l in (len(bert_outputs)-1, -1) and self.pooled_cls: | |||||
outputs[l_index, :, 0] = pooled_cls | outputs[l_index, :, 0] = pooled_cls | ||||
else: | else: | ||||
outputs[l_index, :, 0] = output_layer[:, 0] | outputs[l_index, :, 0] = output_layer[:, 0] | ||||
@@ -37,7 +37,7 @@ class ElmoEmbedding(ContextualEmbedding): | |||||
:param model_dir_or_name: 可以有两种方式调用预训练好的ELMo embedding:第一种是传入ELMo所在文件夹,该文件夹下面应该有两个文件, | :param model_dir_or_name: 可以有两种方式调用预训练好的ELMo embedding:第一种是传入ELMo所在文件夹,该文件夹下面应该有两个文件, | ||||
其中一个是以json为后缀的配置文件,另一个是以pkl为后缀的权重文件;第二种是传入ELMo版本的名称,将自动查看缓存中是否存在该模型, | 其中一个是以json为后缀的配置文件,另一个是以pkl为后缀的权重文件;第二种是传入ELMo版本的名称,将自动查看缓存中是否存在该模型, | ||||
没有的话将自动下载并缓存。 | 没有的话将自动下载并缓存。 | ||||
:param layers: str, 指定返回的层数, 以,隔开不同的层。如果要返回第二层的结果'2', 返回后两层的结果'1,2'。不同的层的结果 | |||||
:param layers: str, 指定返回的层数(从0开始), 以,隔开不同的层。如果要返回第二层的结果'2', 返回后两层的结果'1,2'。不同的层的结果 | |||||
按照这个顺序concat起来,默认为'2'。'mix'会使用可学习的权重结合不同层的表示(权重是否可训练与requires_grad保持一致, | 按照这个顺序concat起来,默认为'2'。'mix'会使用可学习的权重结合不同层的表示(权重是否可训练与requires_grad保持一致, | ||||
初始化权重对三层结果进行mean-pooling, 可以通过ElmoEmbedding.set_mix_weights_requires_grad()方法只将mix weights设置为可学习。) | 初始化权重对三层结果进行mean-pooling, 可以通过ElmoEmbedding.set_mix_weights_requires_grad()方法只将mix weights设置为可学习。) | ||||
:param requires_grad: bool, 该层是否需要gradient, 默认为False. | :param requires_grad: bool, 该层是否需要gradient, 默认为False. | ||||
@@ -43,7 +43,7 @@ class StaticEmbedding(TokenEmbedding): | |||||
如果输入为None则使用embedding_dim的维度随机初始化一个embedding。 | 如果输入为None则使用embedding_dim的维度随机初始化一个embedding。 | ||||
:param int embedding_dim: 随机初始化的embedding的维度,仅在model_dir_or_name为None时有效。 | :param int embedding_dim: 随机初始化的embedding的维度,仅在model_dir_or_name为None时有效。 | ||||
:param bool requires_grad: 是否需要gradient. 默认为True | :param bool requires_grad: 是否需要gradient. 默认为True | ||||
:param callable init_method: 如何初始化没有找到的值。可以使用torch.nn.init.*中各种方法。调用该方法时传入一个tensor对象。 | |||||
:param callable init_method: 如何初始化没有找到的值。可以使用torch.nn.init.*中各种方法。调用该方法时传入一个tensor对 | |||||
:param bool lower: 是否将vocab中的词语小写后再和预训练的词表进行匹配。如果你的词表中包含大写的词语,或者就是需要单独 | :param bool lower: 是否将vocab中的词语小写后再和预训练的词表进行匹配。如果你的词表中包含大写的词语,或者就是需要单独 | ||||
为大写的词语开辟一个vector表示,则将lower设置为False。 | 为大写的词语开辟一个vector表示,则将lower设置为False。 | ||||
:param float word_dropout: 以多大的概率将一个词替换为unk。这样既可以训练unk也是一定的regularize。 | :param float word_dropout: 以多大的概率将一个词替换为unk。这样既可以训练unk也是一定的regularize。 | ||||
@@ -84,7 +84,7 @@ class StaticEmbedding(TokenEmbedding): | |||||
if lowered_word not in lowered_vocab.word_count: | if lowered_word not in lowered_vocab.word_count: | ||||
lowered_vocab.add_word(lowered_word) | lowered_vocab.add_word(lowered_word) | ||||
lowered_vocab._no_create_word[lowered_word] += 1 | lowered_vocab._no_create_word[lowered_word] += 1 | ||||
print(f"All word in vocab have been lowered. There are {len(vocab)} words, {len(lowered_vocab)} unique lowered " | |||||
print(f"All word in the vocab have been lowered. There are {len(vocab)} words, {len(lowered_vocab)} unique lowered " | |||||
f"words.") | f"words.") | ||||
if model_path: | if model_path: | ||||
embedding = self._load_with_vocab(model_path, vocab=lowered_vocab, init_method=init_method) | embedding = self._load_with_vocab(model_path, vocab=lowered_vocab, init_method=init_method) | ||||
@@ -563,6 +563,8 @@ class WordpieceTokenizer(object): | |||||
output_tokens.append(self.unk_token) | output_tokens.append(self.unk_token) | ||||
else: | else: | ||||
output_tokens.extend(sub_tokens) | output_tokens.extend(sub_tokens) | ||||
if len(output_tokens)==0: | |||||
return [self.unk_token] | |||||
return output_tokens | return output_tokens | ||||
@@ -3,7 +3,8 @@ from functools import reduce | |||||
import torch | import torch | ||||
import torch.nn as nn | import torch.nn as nn | ||||
import torch.nn.init as init | import torch.nn.init as init | ||||
import glob | |||||
import os | |||||
def initial_parameter(net, initial_method=None): | def initial_parameter(net, initial_method=None): | ||||
"""A method used to initialize the weights of PyTorch models. | """A method used to initialize the weights of PyTorch models. | ||||
@@ -119,7 +120,6 @@ def get_dropout_mask(drop_p: float, tensor: torch.Tensor): | |||||
training=False, inplace=True) | training=False, inplace=True) | ||||
return mask_x | return mask_x | ||||
import glob | |||||
def _get_file_name_base_on_postfix(dir_path, postfix): | def _get_file_name_base_on_postfix(dir_path, postfix): | ||||
""" | """ | ||||
@@ -1,4 +1,5 @@ | |||||
import os | import os | ||||
import sys | |||||
import unittest | import unittest | ||||
from fastNLP import DataSet | from fastNLP import DataSet | ||||
@@ -79,6 +80,16 @@ class TestDataSetMethods(unittest.TestCase): | |||||
self.assertFalse("x" in dd.field_arrays) | self.assertFalse("x" in dd.field_arrays) | ||||
self.assertTrue("y" in dd.field_arrays) | self.assertTrue("y" in dd.field_arrays) | ||||
def test_delete_instance(self): | |||||
dd = DataSet() | |||||
old_length = 2 | |||||
dd.add_field("x", [[1, 2, 3]] * old_length) | |||||
dd.add_field("y", [[1, 2, 3, 4]] * old_length) | |||||
dd.delete_instance(0) | |||||
self.assertEqual(len(dd), old_length-1) | |||||
dd.delete_instance(0) | |||||
self.assertEqual(len(dd), old_length-2) | |||||
def test_getitem(self): | def test_getitem(self): | ||||
ds = DataSet({"x": [[1, 2, 3, 4]] * 40, "y": [[5, 6]] * 40}) | ds = DataSet({"x": [[1, 2, 3, 4]] * 40, "y": [[5, 6]] * 40}) | ||||
ins_1, ins_0 = ds[0], ds[1] | ins_1, ins_0 = ds[0], ds[1] | ||||
@@ -170,22 +170,22 @@ class TestFieldArray(unittest.TestCase): | |||||
def test_append(self): | def test_append(self): | ||||
with self.assertRaises(Exception): | with self.assertRaises(Exception): | ||||
fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True) | |||||
fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True, use_1st_ins_infer_dim_type=False) | |||||
fa.append(0) | fa.append(0) | ||||
with self.assertRaises(Exception): | with self.assertRaises(Exception): | ||||
fa = FieldArray("y", [1.1, 2.2, 3.3, 4.4, 5.5], is_input=True) | |||||
fa = FieldArray("y", [1.1, 2.2, 3.3, 4.4, 5.5], is_input=True, use_1st_ins_infer_dim_type=False) | |||||
fa.append([1, 2, 3, 4, 5]) | fa.append([1, 2, 3, 4, 5]) | ||||
with self.assertRaises(Exception): | with self.assertRaises(Exception): | ||||
fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True) | |||||
fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True, use_1st_ins_infer_dim_type=False) | |||||
fa.append([]) | fa.append([]) | ||||
with self.assertRaises(Exception): | with self.assertRaises(Exception): | ||||
fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True) | |||||
fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1, 2, 3, 4, 5]], is_input=True, use_1st_ins_infer_dim_type=False) | |||||
fa.append(["str", 0, 0, 0, 1.89]) | fa.append(["str", 0, 0, 0, 1.89]) | ||||
fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1.0, 2.0, 3.0, 4.0, 5.0]], is_input=True) | |||||
fa = FieldArray("y", [[1.1, 2.2, 3.3, 4.4, 5.5], [1.0, 2.0, 3.0, 4.0, 5.0]], is_input=True, use_1st_ins_infer_dim_type=False) | |||||
fa.append([1.2, 2.3, 3.4, 4.5, 5.6]) | fa.append([1.2, 2.3, 3.4, 4.5, 5.6]) | ||||
self.assertEqual(len(fa), 3) | self.assertEqual(len(fa), 3) | ||||
self.assertEqual(fa[2], [1.2, 2.3, 3.4, 4.5, 5.6]) | self.assertEqual(fa[2], [1.2, 2.3, 3.4, 4.5, 5.6]) | ||||