@@ -4,8 +4,6 @@ from types import DynamicClassAttribute | |||||
from functools import wraps | from functools import wraps | ||||
import fastNLP | |||||
__all__ = [ | __all__ = [ | ||||
'Events', | 'Events', | ||||
'EventsList', | 'EventsList', | ||||
@@ -11,6 +11,7 @@ from .callback import Callback | |||||
from fastNLP.core.log import logger | from fastNLP.core.log import logger | ||||
from .progress_callback import ProgressCallback, choose_progress_callback | from .progress_callback import ProgressCallback, choose_progress_callback | ||||
from fastNLP.envs import rank_zero_call | from fastNLP.envs import rank_zero_call | ||||
from fastNLP.core.utils.utils import _get_fun_msg | |||||
def _transfer(func): | def _transfer(func): | ||||
@@ -21,10 +22,12 @@ def _transfer(func): | |||||
def wrapper(manager, *arg, **kwargs): | def wrapper(manager, *arg, **kwargs): | ||||
manager.callback_counter[func.__name__] += 1 # 给实际被调用的 callback_fn 的计数加 1; | manager.callback_counter[func.__name__] += 1 # 给实际被调用的 callback_fn 的计数加 1; | ||||
returns = [] | |||||
for callback_fn in manager.callback_fns[func.__name__]: | for callback_fn in manager.callback_fns[func.__name__]: | ||||
returns.append(callback_fn(*arg, **kwargs)) | |||||
return returns | |||||
try: | |||||
callback_fn(*arg, **kwargs) | |||||
except BaseException as e: | |||||
logger.error(f"The following callback_fn raise exception:{_get_fun_msg(callback_fn)}.") | |||||
raise e | |||||
return wrapper | return wrapper | ||||
@@ -16,7 +16,7 @@ SUPPORTED_BACKENDS = ['torch', 'jittor', 'paddle', 'numpy', 'raw', 'auto', None] | |||||
CHECK_BACKEND = ['torch', 'jittor', 'paddle'] # backend 为 auto 时 检查是否是这些 backend | CHECK_BACKEND = ['torch', 'jittor', 'paddle'] # backend 为 auto 时 检查是否是这些 backend | ||||
def _get_backend(): | |||||
def _get_backend() -> str: | |||||
""" | """ | ||||
当 Collator 的 backend 为 None 的时候如何,通过这个函数自动判定其 backend 。判断方法主要为以下两个: | 当 Collator 的 backend 为 None 的时候如何,通过这个函数自动判定其 backend 。判断方法主要为以下两个: | ||||
(1)尝试通过向上寻找当前 collator 的 callee 对象,根据 callee 对象寻找。然后使用 '/site-packages/{backend}' 来寻找是否是 | (1)尝试通过向上寻找当前 collator 的 callee 对象,根据 callee 对象寻找。然后使用 '/site-packages/{backend}' 来寻找是否是 | ||||
@@ -57,7 +57,7 @@ def _get_backend(): | |||||
else: | else: | ||||
break | break | ||||
if len(catch_backend): | if len(catch_backend): | ||||
logger.debug(f"Find a file named:{catch_backend[1]} from stack contain backend:{catch_backend[0]}.") | |||||
logger.debug(f"Find a file named:{catch_backend[1]} from stack contains backend:{catch_backend[0]}.") | |||||
return catch_backend[0] | return catch_backend[0] | ||||
# 方式 (2) | # 方式 (2) | ||||
@@ -66,7 +66,7 @@ def _get_backend(): | |||||
if catch_backend: | if catch_backend: | ||||
break | break | ||||
if len(catch_backend): | if len(catch_backend): | ||||
logger.debug(f"Find a file named:{catch_backend[1]} from sys.modules contain backend:{catch_backend[0]}.") | |||||
logger.debug(f"Find a file named:{catch_backend[1]} from sys.modules contains backend:{catch_backend[0]}.") | |||||
return catch_backend[0] | return catch_backend[0] | ||||
return 'numpy' | return 'numpy' | ||||
@@ -80,7 +80,7 @@ class Collator: | |||||
时候自动根据设置以及数据情况,为每个 field 获取一个 padder ,在之后的每次调用中,都将使用对应的 Padder 给对应的 field 。 | 时候自动根据设置以及数据情况,为每个 field 获取一个 padder ,在之后的每次调用中,都将使用对应的 Padder 给对应的 field 。 | ||||
:param backend: 对于可以 pad 的 field,使用哪种 tensor,支持 ['torch','jittor','paddle','numpy','raw', auto, None]。 | :param backend: 对于可以 pad 的 field,使用哪种 tensor,支持 ['torch','jittor','paddle','numpy','raw', auto, None]。 | ||||
若为 'auto' ,则在进行 pad 的时候会根据调用的环境决定其 backend 。该参数对本身就不能进行 pad 的数据没用影响,不能 pad | |||||
若为 'auto' ,则在进行 pad 的时候会根据调用的环境决定其 backend 。该参数对不能进行 pad 的数据没用影响,不能 pad | |||||
的数据返回一定是 list 。 | 的数据返回一定是 list 。 | ||||
""" | """ | ||||
self.unpack_batch_func = None | self.unpack_batch_func = None | ||||
@@ -144,15 +144,18 @@ class Collator: | |||||
for key in unpack_batch.keys(): | for key in unpack_batch.keys(): | ||||
if key not in self.input_fields and key not in self.ignore_fields: | if key not in self.input_fields and key not in self.ignore_fields: | ||||
self.input_fields[key] = {'pad_val': 0, 'dtype': None, 'backend': self.backend} | self.input_fields[key] = {'pad_val': 0, 'dtype': None, 'backend': self.backend} | ||||
elif key in self.input_fields and self.input_fields[key]['backend'] == 'auto': | |||||
self.input_fields[key]['backend'] = self.backend | |||||
for field_name, setting in self.input_fields.items(): | for field_name, setting in self.input_fields.items(): | ||||
pad_fn = setting.get('pad_fn', None) | pad_fn = setting.get('pad_fn', None) | ||||
if callable(pad_fn): | if callable(pad_fn): | ||||
padder = pad_fn | padder = pad_fn | ||||
else: | else: | ||||
backend = self.backend if setting['backend'] == 'auto' else setting['backend'] | |||||
batch_field = unpack_batch.get(field_name) | batch_field = unpack_batch.get(field_name) | ||||
padder = get_padder(batch_field=batch_field, pad_val=setting['pad_val'], | padder = get_padder(batch_field=batch_field, pad_val=setting['pad_val'], | ||||
dtype=setting['dtype'], backend=setting['backend'], | |||||
dtype=setting['dtype'], backend=backend, | |||||
field_name=field_name) | field_name=field_name) | ||||
self.padders[field_name] = padder | self.padders[field_name] = padder | ||||
if self.batch_data_type == 'l': | if self.batch_data_type == 'l': | ||||
@@ -4,14 +4,15 @@ from dataclasses import dataclass | |||||
from fastNLP.core.controllers.trainer import Trainer | from fastNLP.core.controllers.trainer import Trainer | ||||
from fastNLP.core.metrics.accuracy import Accuracy | from fastNLP.core.metrics.accuracy import Accuracy | ||||
from fastNLP.core.callbacks.progress_callback import RichCallback | from fastNLP.core.callbacks.progress_callback import RichCallback | ||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE | |||||
from paddle.optimizer import Adam | |||||
from paddle.io import DataLoader | |||||
if _NEED_IMPORT_PADDLE: | |||||
from paddle.optimizer import Adam | |||||
from paddle.io import DataLoader | |||||
from tests.helpers.models.paddle_model import PaddleNormalModel_Classification_1 | from tests.helpers.models.paddle_model import PaddleNormalModel_Classification_1 | ||||
from tests.helpers.datasets.paddle_data import PaddleRandomMaxDataset | from tests.helpers.datasets.paddle_data import PaddleRandomMaxDataset | ||||
from tests.helpers.callbacks.helper_callbacks import RecordLossCallback, RecordMetricCallback | |||||
from tests.helpers.utils import magic_argv_env_context | from tests.helpers.utils import magic_argv_env_context | ||||
@dataclass | @dataclass | ||||
@@ -100,17 +100,16 @@ def model_and_optimizers(request): | |||||
# 测试一下普通的情况; | # 测试一下普通的情况; | ||||
@pytest.mark.torch | @pytest.mark.torch | ||||
@pytest.mark.parametrize("driver,device", [("torch", "cpu"), ("torch", 1), ("torch", [0, 1])]) # ("torch", "cpu"), ("torch", 1), ("torch", [0, 1]) | @pytest.mark.parametrize("driver,device", [("torch", "cpu"), ("torch", 1), ("torch", [0, 1])]) # ("torch", "cpu"), ("torch", 1), ("torch", [0, 1]) | ||||
@pytest.mark.parametrize("callbacks", [[RecordMetricCallback(monitor="acc", metric_threshold=0.2, larger_better=True)]]) | |||||
@pytest.mark.parametrize("evaluate_every", [-3, -1, 100]) | @pytest.mark.parametrize("evaluate_every", [-3, -1, 100]) | ||||
@magic_argv_env_context | @magic_argv_env_context | ||||
def test_trainer_torch_with_evaluator( | def test_trainer_torch_with_evaluator( | ||||
model_and_optimizers: TrainerParameters, | model_and_optimizers: TrainerParameters, | ||||
driver, | driver, | ||||
device, | device, | ||||
callbacks, | |||||
evaluate_every, | evaluate_every, | ||||
n_epochs=10, | n_epochs=10, | ||||
): | ): | ||||
callbacks = [RecordMetricCallback(monitor="acc", metric_threshold=0.2, larger_better=True)] | |||||
trainer = Trainer( | trainer = Trainer( | ||||
model=model_and_optimizers.model, | model=model_and_optimizers.model, | ||||
driver=driver, | driver=driver, | ||||
@@ -172,7 +171,7 @@ def test_trainer_torch_with_evaluator_fp16_accumulation_steps( | |||||
if dist.is_initialized(): | if dist.is_initialized(): | ||||
dist.destroy_process_group() | dist.destroy_process_group() | ||||
@pytest.mark.torch | |||||
@pytest.mark.parametrize("driver,device", [("torch", 1)]) # ("torch", [0, 1]),("torch", 1) | @pytest.mark.parametrize("driver,device", [("torch", 1)]) # ("torch", [0, 1]),("torch", 1) | ||||
@magic_argv_env_context | @magic_argv_env_context | ||||
def test_trainer_validate_every( | def test_trainer_validate_every( | ||||
@@ -184,9 +183,7 @@ def test_trainer_validate_every( | |||||
def validate_every(trainer): | def validate_every(trainer): | ||||
if trainer.global_forward_batches % 10 == 0: | if trainer.global_forward_batches % 10 == 0: | ||||
print(trainer) | |||||
print("\nfastNLP test validate every.\n") | print("\nfastNLP test validate every.\n") | ||||
print(trainer.global_forward_batches) | |||||
return True | return True | ||||
trainer = Trainer( | trainer = Trainer( | ||||
@@ -1,11 +1,17 @@ | |||||
import pytest | import pytest | ||||
import numpy as np | |||||
from fastNLP.core.dataloaders.paddle_dataloader.fdl import PaddleDataLoader | from fastNLP.core.dataloaders.paddle_dataloader.fdl import PaddleDataLoader | ||||
from fastNLP.core.dataset import DataSet | from fastNLP.core.dataset import DataSet | ||||
from fastNLP.core.log import logger | from fastNLP.core.log import logger | ||||
from paddle.io import Dataset, DataLoader | |||||
import numpy as np | |||||
import paddle | |||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE | |||||
if _NEED_IMPORT_PADDLE: | |||||
from paddle.io import Dataset, DataLoader | |||||
import paddle | |||||
else: | |||||
from fastNLP.core.utils.dummy_class import DummyClass as Dataset | |||||
class RandomDataset(Dataset): | class RandomDataset(Dataset): | ||||
@@ -14,9 +14,10 @@ from fastNLP.core.drivers.paddle_driver.dist_utils import ( | |||||
) | ) | ||||
from fastNLP.core.drivers.paddle_driver.fleet_launcher import FleetLauncher | from fastNLP.core.drivers.paddle_driver.fleet_launcher import FleetLauncher | ||||
from tests.helpers.utils import magic_argv_env_context | from tests.helpers.utils import magic_argv_env_context | ||||
import paddle | |||||
import paddle.distributed as dist | |||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE | |||||
if _NEED_IMPORT_PADDLE: | |||||
import paddle | |||||
import paddle.distributed as dist | |||||
@pytest.mark.paddle | @pytest.mark.paddle | ||||
class TestDistUtilsTools: | class TestDistUtilsTools: | ||||
@@ -13,10 +13,11 @@ from tests.helpers.models.paddle_model import PaddleNormalModel_Classification_1 | |||||
from tests.helpers.datasets.paddle_data import PaddleNormalDataset, PaddleRandomMaxDataset | from tests.helpers.datasets.paddle_data import PaddleNormalDataset, PaddleRandomMaxDataset | ||||
from tests.helpers.utils import magic_argv_env_context | from tests.helpers.utils import magic_argv_env_context | ||||
from fastNLP.core import rank_zero_rm | from fastNLP.core import rank_zero_rm | ||||
import paddle | |||||
import paddle.distributed as dist | |||||
from paddle.io import DataLoader, BatchSampler | |||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE | |||||
if _NEED_IMPORT_PADDLE: | |||||
import paddle | |||||
import paddle.distributed as dist | |||||
from paddle.io import DataLoader, BatchSampler | |||||
def generate_driver(num_labels, feature_dimension, device=[0,1], fp16=False, output_from_new_proc="only_error"): | def generate_driver(num_labels, feature_dimension, device=[0,1], fp16=False, output_from_new_proc="only_error"): | ||||
paddle_model = PaddleNormalModel_Classification_1(num_labels, feature_dimension) | paddle_model = PaddleNormalModel_Classification_1(num_labels, feature_dimension) | ||||
@@ -5,8 +5,9 @@ from fastNLP.core.drivers.paddle_driver.initialize_paddle_driver import initiali | |||||
from fastNLP.envs import get_gpu_count | from fastNLP.envs import get_gpu_count | ||||
from tests.helpers.models.paddle_model import PaddleNormalModel_Classification_1 | from tests.helpers.models.paddle_model import PaddleNormalModel_Classification_1 | ||||
from tests.helpers.utils import magic_argv_env_context | from tests.helpers.utils import magic_argv_env_context | ||||
import paddle | |||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE | |||||
if _NEED_IMPORT_PADDLE: | |||||
import paddle | |||||
@pytest.mark.paddle | @pytest.mark.paddle | ||||
def test_incorrect_driver(): | def test_incorrect_driver(): | ||||
@@ -8,10 +8,12 @@ from tests.helpers.datasets.paddle_data import PaddleNormalDataset, PaddleRandom | |||||
from tests.helpers.datasets.torch_data import TorchNormalDataset | from tests.helpers.datasets.torch_data import TorchNormalDataset | ||||
from tests.helpers.models.torch_model import TorchNormalModel_Classification_1 | from tests.helpers.models.torch_model import TorchNormalModel_Classification_1 | ||||
from fastNLP.core import rank_zero_rm | from fastNLP.core import rank_zero_rm | ||||
import paddle | |||||
from paddle.io import DataLoader, BatchSampler | |||||
import torch | |||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE, _NEED_IMPORT_TORCH | |||||
if _NEED_IMPORT_PADDLE: | |||||
import paddle | |||||
from paddle.io import DataLoader, BatchSampler | |||||
if _NEED_IMPORT_TORCH: | |||||
import torch | |||||
############################################################################ | ############################################################################ | ||||
# | # | ||||
@@ -7,9 +7,10 @@ from fastNLP.core.drivers.paddle_driver.utils import ( | |||||
replace_sampler, | replace_sampler, | ||||
) | ) | ||||
from fastNLP.core.samplers import RandomBatchSampler, RandomSampler | from fastNLP.core.samplers import RandomBatchSampler, RandomSampler | ||||
import paddle | |||||
from paddle.io import DataLoader, BatchSampler | |||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE | |||||
if _NEED_IMPORT_PADDLE: | |||||
import paddle | |||||
from paddle.io import DataLoader, BatchSampler | |||||
from tests.helpers.datasets.paddle_data import PaddleNormalDataset | from tests.helpers.datasets.paddle_data import PaddleNormalDataset | ||||
@@ -8,10 +8,12 @@ from tests.helpers.datasets.torch_data import TorchNormalDataset, TorchArgMaxDat | |||||
from tests.helpers.datasets.paddle_data import PaddleNormalDataset | from tests.helpers.datasets.paddle_data import PaddleNormalDataset | ||||
from tests.helpers.models.paddle_model import PaddleNormalModel_Classification_1 | from tests.helpers.models.paddle_model import PaddleNormalModel_Classification_1 | ||||
from fastNLP.core import rank_zero_rm | from fastNLP.core import rank_zero_rm | ||||
import torch | |||||
from torch.utils.data import DataLoader, BatchSampler | |||||
import paddle | |||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE, _NEED_IMPORT_TORCH | |||||
if _NEED_IMPORT_TORCH: | |||||
import torch | |||||
from torch.utils.data import DataLoader, BatchSampler | |||||
if _NEED_IMPORT_PADDLE: | |||||
import paddle | |||||
def dataloader_with_randombatchsampler(dataset, batch_size, shuffle, drop_last): | def dataloader_with_randombatchsampler(dataset, batch_size, shuffle, drop_last): | ||||
""" | """ | ||||
@@ -1,12 +1,14 @@ | |||||
import os | import os | ||||
import pytest | import pytest | ||||
import paddle | |||||
import paddle.distributed | |||||
import paddle.distributed.fleet.base.role_maker as role_maker | |||||
import paddle.distributed.fleet as fleet | |||||
from fastNLP.core.metrics import Accuracy | from fastNLP.core.metrics import Accuracy | ||||
from fastNLP.core.drivers.paddle_driver.fleet_launcher import FleetLauncher | from fastNLP.core.drivers.paddle_driver.fleet_launcher import FleetLauncher | ||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE | |||||
if _NEED_IMPORT_PADDLE: | |||||
import paddle | |||||
import paddle.distributed | |||||
import paddle.distributed.fleet.base.role_maker as role_maker | |||||
import paddle.distributed.fleet as fleet | |||||
############################################################################ | ############################################################################ |
@@ -1,7 +1,9 @@ | |||||
import pytest | import pytest | ||||
import paddle | |||||
from fastNLP.core.utils.paddle_utils import paddle_to, paddle_move_data_to_device | from fastNLP.core.utils.paddle_utils import paddle_to, paddle_move_data_to_device | ||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE | |||||
if _NEED_IMPORT_PADDLE: | |||||
import paddle | |||||
############################################################################ | ############################################################################ | ||||
@@ -36,12 +36,10 @@ class RecordMetricCallback(Callback): | |||||
self.larger_better = larger_better | self.larger_better = larger_better | ||||
self.metric = None | self.metric = None | ||||
self.metric_threshold = metric_threshold | self.metric_threshold = metric_threshold | ||||
self.metric_begin_value = None | |||||
self.metric_begin_value = float('-inf') if larger_better else float('inf') | |||||
def on_evaluate_end(self, trainer, results): | def on_evaluate_end(self, trainer, results): | ||||
self.metric = results[self.monitor] | self.metric = results[self.monitor] | ||||
if self.metric_begin_value is None: | |||||
self.metric_begin_value = self.metric | |||||
def on_train_end(self, trainer): | def on_train_end(self, trainer): | ||||
if self.larger_better: | if self.larger_better: | ||||
@@ -1,7 +1,12 @@ | |||||
import paddle | |||||
from paddle.io import Dataset | |||||
import numpy as np | import numpy as np | ||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE | |||||
if _NEED_IMPORT_PADDLE: | |||||
import paddle | |||||
from paddle.io import Dataset | |||||
else: | |||||
from fastNLP.core.utils.dummy_class import DummyClass as Dataset | |||||
class PaddleNormalDataset(Dataset): | class PaddleNormalDataset(Dataset): | ||||
def __init__(self, num_of_data=1000): | def __init__(self, num_of_data=1000): | ||||
@@ -1,7 +1,12 @@ | |||||
import paddle | |||||
import paddle.nn as nn | |||||
class PaddleNormalModel_Classification_1(paddle.nn.Layer): | |||||
from fastNLP.envs.imports import _NEED_IMPORT_PADDLE | |||||
if _NEED_IMPORT_PADDLE: | |||||
import paddle | |||||
import paddle.nn as nn | |||||
from paddle.nn import Layer | |||||
else: | |||||
from fastNLP.core.utils.dummy_class import DummyClass as Layer | |||||
class PaddleNormalModel_Classification_1(Layer): | |||||
""" | """ | ||||
基础的paddle分类模型 | 基础的paddle分类模型 | ||||
""" | """ | ||||
@@ -32,7 +37,7 @@ class PaddleNormalModel_Classification_1(paddle.nn.Layer): | |||||
return {"pred": x, "target": y.reshape((-1,))} | return {"pred": x, "target": y.reshape((-1,))} | ||||
class PaddleNormalModel_Classification_2(paddle.nn.Layer): | |||||
class PaddleNormalModel_Classification_2(Layer): | |||||
""" | """ | ||||
基础的paddle分类模型,只实现 forward 函数测试用户自己初始化了分布式的场景 | 基础的paddle分类模型,只实现 forward 函数测试用户自己初始化了分布式的场景 | ||||
""" | """ | ||||
@@ -30,12 +30,12 @@ def recover_logger(fn): | |||||
return wrapper | return wrapper | ||||
def magic_argv_env_context(fn=None, timeout=600): | |||||
def magic_argv_env_context(fn=None, timeout=300): | |||||
""" | """ | ||||
用来在测试时包裹每一个单独的测试函数,使得 ddp 测试正确; | 用来在测试时包裹每一个单独的测试函数,使得 ddp 测试正确; | ||||
会丢掉 pytest 中的 arg 参数。 | 会丢掉 pytest 中的 arg 参数。 | ||||
:param timeout: 表示一个测试如果经过多久还没有通过的话就主动将其 kill 掉,默认为 10 分钟,单位为秒; | |||||
:param timeout: 表示一个测试如果经过多久还没有通过的话就主动将其 kill 掉,默认为 5 分钟,单位为秒; | |||||
:return: | :return: | ||||
""" | """ | ||||
# 说明是通过 @magic_argv_env_context(timeout=600) 调用; | # 说明是通过 @magic_argv_env_context(timeout=600) 调用; | ||||