@@ -9,12 +9,13 @@ import sys | |||||
from fastNLP.core.log import logger | from fastNLP.core.log import logger | ||||
from .topk_saver import TopkSaver | from .topk_saver import TopkSaver | ||||
from .callback import Callback | from .callback import Callback | ||||
from ..utils.exceptions import EarlyStopException | |||||
class CheckpointCallback(Callback): | class CheckpointCallback(Callback): | ||||
def __init__(self, folder: Optional[Union[str, Path]] = None, every_n_epochs: Optional[int] = None, | def __init__(self, folder: Optional[Union[str, Path]] = None, every_n_epochs: Optional[int] = None, | ||||
every_n_batches: Optional[int] = None, last: bool = False, | |||||
on_exceptions: Optional[Union[BaseException, Sequence[BaseException]]] = None, topk: int = 0, | |||||
every_n_batches: Optional[int] = None, last: bool = False, topk: int = 0, | |||||
on_exceptions: Optional[Union[BaseException, Sequence[BaseException]]] = [EarlyStopException], | |||||
monitor: Optional[Union[str, Callable]] = None, larger_better: bool = True, | monitor: Optional[Union[str, Callable]] = None, larger_better: bool = True, | ||||
only_state_dict: bool = True, model_save_fn: Optional[Callable] = None, save_object: str = 'model', | only_state_dict: bool = True, model_save_fn: Optional[Callable] = None, save_object: str = 'model', | ||||
save_evaluate_results=True, **kwargs): | save_evaluate_results=True, **kwargs): | ||||
@@ -49,7 +50,7 @@ class CheckpointCallback(Callback): | |||||
:param every_n_batches: 多少个 batch 保存一次。 | :param every_n_batches: 多少个 batch 保存一次。 | ||||
:param last: 如果为 True ,将在每次 epoch 运行结束都保存一次,会覆盖之前的保存。 | :param last: 如果为 True ,将在每次 epoch 运行结束都保存一次,会覆盖之前的保存。 | ||||
:param topk: 保存 monitor 结果 topK 个。 | :param topk: 保存 monitor 结果 topK 个。 | ||||
:param on_exceptions: 在出异常信息时,是否保存。传入需要捕获的异常的类。 | |||||
:param on_exceptions: 在出异常信息时,是否保存。传入需要捕获的异常的类。默认将捕获 EarlyStopException 。 | |||||
:param larger_better: monitor 的值是否时越大越好。 | :param larger_better: monitor 的值是否时越大越好。 | ||||
:param only_state_dict: 保存模型时是否只保存 state_dict 。当 model_save_fn 不为 None 时,该参数无效。 | :param only_state_dict: 保存模型时是否只保存 state_dict 。当 model_save_fn 不为 None 时,该参数无效。 | ||||
:param model_save_fn: 个性化的保存函数,当触发保存操作时,就调用这个函数,这个函数应当接受一个文件夹作为参数,不返回任何东西。 | :param model_save_fn: 个性化的保存函数,当触发保存操作时,就调用这个函数,这个函数应当接受一个文件夹作为参数,不返回任何东西。 | ||||
@@ -23,7 +23,7 @@ class Evaluator: | |||||
driver: Driver | driver: Driver | ||||
_evaluate_batch_loop: Loop | _evaluate_batch_loop: Loop | ||||
def __init__(self, model, dataloaders, metrics: Optional[Union[Dict, Metric]] = None, | |||||
def __init__(self, model, dataloaders, metrics: Optional[Dict] = None, | |||||
driver: Union[str, Driver] = 'torch', device: Optional[Union[int, List[int], str]] = None, | driver: Union[str, Driver] = 'torch', device: Optional[Union[int, List[int], str]] = None, | ||||
evaluate_batch_step_fn: Optional[callable] = None, evaluate_fn: Optional[str] = None, | evaluate_batch_step_fn: Optional[callable] = None, evaluate_fn: Optional[str] = None, | ||||
input_mapping: Optional[Union[Callable, Dict]] = None, | input_mapping: Optional[Union[Callable, Dict]] = None, | ||||
@@ -388,5 +388,8 @@ class _MetricsWrapper: | |||||
_results = metric.accumulate() | _results = metric.accumulate() | ||||
else: | else: | ||||
raise RuntimeError(f"Not support `{type(metric)}` for now.") | raise RuntimeError(f"Not support `{type(metric)}` for now.") | ||||
results[metric_name] = _results | |||||
if _results is not None: | |||||
results[metric_name] = _results | |||||
else: | |||||
logger.warning_once(f"Metric:{metric_name} returns None when getting metric results.") | |||||
return results | return results |
@@ -40,8 +40,8 @@ def initialize_paddle_driver(driver: str, device: Optional[Union[str, int, List[ | |||||
if user_visible_devices is None: | if user_visible_devices is None: | ||||
raise RuntimeError("To run paddle distributed training, please set `FASTNLP_BACKEND` to 'paddle' before using FastNLP.") | raise RuntimeError("To run paddle distributed training, please set `FASTNLP_BACKEND` to 'paddle' before using FastNLP.") | ||||
if device is not None: | if device is not None: | ||||
logger.warning_once("Parameter `device` would be ignored when you are using `paddle.distributed.launch` to pull " | |||||
"up your script. And we will directly get the local device via environment variables.") | |||||
logger.rank_zero_warning("Parameter `device` would be ignored when you are using `paddle.distributed.launch` to pull " | |||||
"up your script. And we will directly get the local device via environment variables.", once=True) | |||||
_visible_list = user_visible_devices.split(",") | _visible_list = user_visible_devices.split(",") | ||||
device = [ f"gpu:{_visible_list.index(g) }" for g in os.environ["CUDA_VISIBLE_DEVICES"].split(",")] | device = [ f"gpu:{_visible_list.index(g) }" for g in os.environ["CUDA_VISIBLE_DEVICES"].split(",")] | ||||
# TODO 目前一个进程仅对应一个卡,所以暂时传入单个 | # TODO 目前一个进程仅对应一个卡,所以暂时传入单个 | ||||
@@ -26,9 +26,9 @@ def initialize_torch_driver(driver: str, device: Optional[Union[str, "torch.devi | |||||
# world_size 和 rank | # world_size 和 rank | ||||
if FASTNLP_BACKEND_LAUNCH in os.environ: | if FASTNLP_BACKEND_LAUNCH in os.environ: | ||||
if device is not None: | if device is not None: | ||||
logger.warning_once("Parameter `device` would be ignored when you are using `torch.distributed.run` to pull " | |||||
logger.rank_zero_warning("Parameter `device` would be ignored when you are using `torch.distributed.run` to pull " | |||||
"up your script. And we will directly get the local device via " | "up your script. And we will directly get the local device via " | ||||
"`os.environ['LOCAL_RANK']`.") | |||||
"`os.environ['LOCAL_RANK']`.", once=True) | |||||
return TorchDDPDriver(model, torch.device(f"cuda:{os.environ['LOCAL_RANK']}"), True, **kwargs) | return TorchDDPDriver(model, torch.device(f"cuda:{os.environ['LOCAL_RANK']}"), True, **kwargs) | ||||
if driver not in {"torch", "fairscale"}: | if driver not in {"torch", "fairscale"}: | ||||
@@ -564,9 +564,9 @@ class PretrainedConfig: | |||||
raise EnvironmentError(msg) | raise EnvironmentError(msg) | ||||
if resolved_config_file == config_file: | if resolved_config_file == config_file: | ||||
logger.info(f"loading configuration file {config_file}") | |||||
logger.debug(f"loading configuration file {config_file}") | |||||
else: | else: | ||||
logger.info(f"loading configuration file {config_file} from cache at {resolved_config_file}") | |||||
logger.debug(f"loading configuration file {config_file} from cache at {resolved_config_file}") | |||||
return config_dict, kwargs | return config_dict, kwargs | ||||
@@ -603,7 +603,7 @@ class PretrainedConfig: | |||||
for key in to_remove: | for key in to_remove: | ||||
kwargs.pop(key, None) | kwargs.pop(key, None) | ||||
logger.info(f"Model config {config}") | |||||
logger.debug(f"Model config {config}") | |||||
if return_unused_kwargs: | if return_unused_kwargs: | ||||
return config, kwargs | return config, kwargs | ||||
else: | else: | ||||
@@ -1260,9 +1260,9 @@ class PreTrainedModel(Module, ModuleUtilsMixin, GenerationMixin): | |||||
raise EnvironmentError(msg) | raise EnvironmentError(msg) | ||||
if resolved_archive_file == archive_file: | if resolved_archive_file == archive_file: | ||||
logger.info(f"loading weights file {archive_file}") | |||||
logger.debug(f"loading weights file {archive_file}") | |||||
else: | else: | ||||
logger.info(f"loading weights file {archive_file} from cache at {resolved_archive_file}") | |||||
logger.debug(f"loading weights file {archive_file} from cache at {resolved_archive_file}") | |||||
else: | else: | ||||
resolved_archive_file = None | resolved_archive_file = None | ||||
@@ -1700,9 +1700,9 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): | |||||
continue | continue | ||||
if file_path == resolved_vocab_files[file_id]: | if file_path == resolved_vocab_files[file_id]: | ||||
logger.info(f"loading file {file_path}") | |||||
logger.debug(f"loading file {file_path}") | |||||
else: | else: | ||||
logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}") | |||||
logger.debug(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}") | |||||
return cls._from_pretrained( | return cls._from_pretrained( | ||||
resolved_vocab_files, | resolved_vocab_files, | ||||