diff --git a/fastNLP/core/callbacks/checkpoint_callback.py b/fastNLP/core/callbacks/checkpoint_callback.py index a18e61fa..625aea09 100644 --- a/fastNLP/core/callbacks/checkpoint_callback.py +++ b/fastNLP/core/callbacks/checkpoint_callback.py @@ -9,12 +9,13 @@ import sys from fastNLP.core.log import logger from .topk_saver import TopkSaver from .callback import Callback +from ..utils.exceptions import EarlyStopException class CheckpointCallback(Callback): def __init__(self, folder: Optional[Union[str, Path]] = None, every_n_epochs: Optional[int] = None, - every_n_batches: Optional[int] = None, last: bool = False, - on_exceptions: Optional[Union[BaseException, Sequence[BaseException]]] = None, topk: int = 0, + every_n_batches: Optional[int] = None, last: bool = False, topk: int = 0, + on_exceptions: Optional[Union[BaseException, Sequence[BaseException]]] = [EarlyStopException], monitor: Optional[Union[str, Callable]] = None, larger_better: bool = True, only_state_dict: bool = True, model_save_fn: Optional[Callable] = None, save_object: str = 'model', save_evaluate_results=True, **kwargs): @@ -49,7 +50,7 @@ class CheckpointCallback(Callback): :param every_n_batches: 多少个 batch 保存一次。 :param last: 如果为 True ,将在每次 epoch 运行结束都保存一次,会覆盖之前的保存。 :param topk: 保存 monitor 结果 topK 个。 - :param on_exceptions: 在出异常信息时,是否保存。传入需要捕获的异常的类。 + :param on_exceptions: 在出异常信息时,是否保存。传入需要捕获的异常的类。默认将捕获 EarlyStopException 。 :param larger_better: monitor 的值是否时越大越好。 :param only_state_dict: 保存模型时是否只保存 state_dict 。当 model_save_fn 不为 None 时,该参数无效。 :param model_save_fn: 个性化的保存函数,当触发保存操作时,就调用这个函数,这个函数应当接受一个文件夹作为参数,不返回任何东西。 diff --git a/fastNLP/core/controllers/evaluator.py b/fastNLP/core/controllers/evaluator.py index fcee2460..8ac35ad2 100644 --- a/fastNLP/core/controllers/evaluator.py +++ b/fastNLP/core/controllers/evaluator.py @@ -23,7 +23,7 @@ class Evaluator: driver: Driver _evaluate_batch_loop: Loop - def __init__(self, model, dataloaders, metrics: Optional[Union[Dict, Metric]] = None, + def __init__(self, model, dataloaders, metrics: Optional[Dict] = None, driver: Union[str, Driver] = 'torch', device: Optional[Union[int, List[int], str]] = None, evaluate_batch_step_fn: Optional[callable] = None, evaluate_fn: Optional[str] = None, input_mapping: Optional[Union[Callable, Dict]] = None, @@ -388,5 +388,8 @@ class _MetricsWrapper: _results = metric.accumulate() else: raise RuntimeError(f"Not support `{type(metric)}` for now.") - results[metric_name] = _results + if _results is not None: + results[metric_name] = _results + else: + logger.warning_once(f"Metric:{metric_name} returns None when getting metric results.") return results diff --git a/fastNLP/core/drivers/paddle_driver/initialize_paddle_driver.py b/fastNLP/core/drivers/paddle_driver/initialize_paddle_driver.py index 22098ff2..552fc622 100644 --- a/fastNLP/core/drivers/paddle_driver/initialize_paddle_driver.py +++ b/fastNLP/core/drivers/paddle_driver/initialize_paddle_driver.py @@ -40,8 +40,8 @@ def initialize_paddle_driver(driver: str, device: Optional[Union[str, int, List[ if user_visible_devices is None: raise RuntimeError("To run paddle distributed training, please set `FASTNLP_BACKEND` to 'paddle' before using FastNLP.") if device is not None: - logger.warning_once("Parameter `device` would be ignored when you are using `paddle.distributed.launch` to pull " - "up your script. And we will directly get the local device via environment variables.") + logger.rank_zero_warning("Parameter `device` would be ignored when you are using `paddle.distributed.launch` to pull " + "up your script. And we will directly get the local device via environment variables.", once=True) _visible_list = user_visible_devices.split(",") device = [ f"gpu:{_visible_list.index(g) }" for g in os.environ["CUDA_VISIBLE_DEVICES"].split(",")] # TODO 目前一个进程仅对应一个卡,所以暂时传入单个 diff --git a/fastNLP/core/drivers/torch_driver/initialize_torch_driver.py b/fastNLP/core/drivers/torch_driver/initialize_torch_driver.py index f9fac83f..723765d2 100644 --- a/fastNLP/core/drivers/torch_driver/initialize_torch_driver.py +++ b/fastNLP/core/drivers/torch_driver/initialize_torch_driver.py @@ -26,9 +26,9 @@ def initialize_torch_driver(driver: str, device: Optional[Union[str, "torch.devi # world_size 和 rank if FASTNLP_BACKEND_LAUNCH in os.environ: if device is not None: - logger.warning_once("Parameter `device` would be ignored when you are using `torch.distributed.run` to pull " + logger.rank_zero_warning("Parameter `device` would be ignored when you are using `torch.distributed.run` to pull " "up your script. And we will directly get the local device via " - "`os.environ['LOCAL_RANK']`.") + "`os.environ['LOCAL_RANK']`.", once=True) return TorchDDPDriver(model, torch.device(f"cuda:{os.environ['LOCAL_RANK']}"), True, **kwargs) if driver not in {"torch", "fairscale"}: diff --git a/fastNLP/transformers/torch/configuration_utils.py b/fastNLP/transformers/torch/configuration_utils.py index fb494d9f..948d9873 100644 --- a/fastNLP/transformers/torch/configuration_utils.py +++ b/fastNLP/transformers/torch/configuration_utils.py @@ -564,9 +564,9 @@ class PretrainedConfig: raise EnvironmentError(msg) if resolved_config_file == config_file: - logger.info(f"loading configuration file {config_file}") + logger.debug(f"loading configuration file {config_file}") else: - logger.info(f"loading configuration file {config_file} from cache at {resolved_config_file}") + logger.debug(f"loading configuration file {config_file} from cache at {resolved_config_file}") return config_dict, kwargs @@ -603,7 +603,7 @@ class PretrainedConfig: for key in to_remove: kwargs.pop(key, None) - logger.info(f"Model config {config}") + logger.debug(f"Model config {config}") if return_unused_kwargs: return config, kwargs else: diff --git a/fastNLP/transformers/torch/modeling_utils.py b/fastNLP/transformers/torch/modeling_utils.py index d19816a3..74f370b6 100644 --- a/fastNLP/transformers/torch/modeling_utils.py +++ b/fastNLP/transformers/torch/modeling_utils.py @@ -1260,9 +1260,9 @@ class PreTrainedModel(Module, ModuleUtilsMixin, GenerationMixin): raise EnvironmentError(msg) if resolved_archive_file == archive_file: - logger.info(f"loading weights file {archive_file}") + logger.debug(f"loading weights file {archive_file}") else: - logger.info(f"loading weights file {archive_file} from cache at {resolved_archive_file}") + logger.debug(f"loading weights file {archive_file} from cache at {resolved_archive_file}") else: resolved_archive_file = None diff --git a/fastNLP/transformers/torch/tokenization_utils_base.py b/fastNLP/transformers/torch/tokenization_utils_base.py index ad62cd6e..8ed5a2e2 100644 --- a/fastNLP/transformers/torch/tokenization_utils_base.py +++ b/fastNLP/transformers/torch/tokenization_utils_base.py @@ -1700,9 +1700,9 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): continue if file_path == resolved_vocab_files[file_id]: - logger.info(f"loading file {file_path}") + logger.debug(f"loading file {file_path}") else: - logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}") + logger.debug(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}") return cls._from_pretrained( resolved_vocab_files,