Browse Source

Merge branch 'dev0.8.0' of github.com:fastnlp/fastNLP into dev0.8.0

tags/v1.0.0alpha
yh_cc 2 years ago
parent
commit
048e409233
9 changed files with 521 additions and 430 deletions
  1. +2
    -0
      fastNLP/core/dataloaders/torch_dataloader/fdl.py
  2. +8
    -8
      fastNLP/core/drivers/paddle_driver/fleet.py
  3. +1
    -1
      fastNLP/core/drivers/paddle_driver/initialize_paddle_driver.py
  4. +8
    -5
      fastNLP/core/drivers/torch_driver/torch_driver.py
  5. +2
    -1
      fastNLP/core/metrics/backend/paddle_backend/backend.py
  6. +1
    -1
      fastNLP/core/metrics/backend/torch_backend/backend.py
  7. +5
    -3
      tests/core/dataloaders/torch_dataloader/test_fdl.py
  8. +435
    -411
      tests/core/drivers/paddle_driver/test_single_device.py
  9. +59
    -0
      tests/core/metrics/test_accutacy_paddle.py

+ 2
- 0
fastNLP/core/dataloaders/torch_dataloader/fdl.py View File

@@ -98,6 +98,7 @@ class TorchDataLoader(DataLoader):
def __getattr__(self, item):
"""
为FDataLoader提供dataset的方法和属性,实现该方法后,用户可以在FDataLoader实例化后使用apply等dataset的方法

:param item:
:return:
"""
@@ -119,6 +120,7 @@ class TorchDataLoader(DataLoader):
"""
设置每个field_name的padding值,默认为0,只有当autocollate存在时该方法有效, 若没有则会添加auto_collator函数
当val=None时,意味着给定的field_names都不需要尝试padding

:param field_names:
:param val: padding值,默认为0
:return:


+ 8
- 8
fastNLP/core/drivers/paddle_driver/fleet.py View File

@@ -37,7 +37,7 @@ if _NEED_IMPORT_PADDLE:
import paddle
from paddle import DataParallel
import paddle.distributed.fleet as fleet
import paddle.distributed as dist
import paddle.distributed as paddledist
from paddle.io import BatchSampler
from paddle.optimizer import Optimizer
from paddle.fluid.reader import _DatasetKind
@@ -185,8 +185,8 @@ class PaddleFleetDriver(PaddleDriver):
if sorted(pre_gpus) != sorted(self.parallel_device):
raise RuntimeError("Notice you are using `PaddleFleetDriver` after one instantiated `PaddleFleetDriver`, it is not"
"allowed that your second `PaddleFleetDriver` has a new setting of parameters `parallel_device`.")
self.world_size = dist.get_world_size()
self.global_rank = dist.get_rank()
self.world_size = paddledist.get_world_size()
self.global_rank = paddledist.get_rank()

if not self.outside_fleet:
# self.model.to(self.model_device)
@@ -197,12 +197,12 @@ class PaddleFleetDriver(PaddleDriver):
# 初始化 self._pids,从而使得每一个进程都能接受到 rank0 的 send 操作;
# TODO 不用.to会怎么样?
self._pids = []
dist.all_gather(self._pids, paddle.to_tensor(os.getpid(), dtype="int32"))
paddledist.all_gather(self._pids, paddle.to_tensor(os.getpid(), dtype="int32"))
# TODO LOCAL_WORLD_SIZE
local_world_size = int(os.environ.get("LOCAL_WORLD_SIZE")) if "LOCAL_WORLD_SIZE" in os.environ else None
if local_world_size is None:
local_world_size = paddle.to_tensor(self.local_rank, dtype="int32")
dist.all_reduce(local_world_size, op=dist.ReduceOp.MAX)
paddledist.all_reduce(local_world_size, op=paddledist.ReduceOp.MAX)
local_world_size = local_world_size.item() + 1

node_rank = self.global_rank // local_world_size
@@ -232,11 +232,11 @@ class PaddleFleetDriver(PaddleDriver):
当用户使用了 `python -m paddle.distributed.launch xxx.py` 启动时,我们需要
根据 paddle 设置的环境变量来获得各种属性
"""
self.world_size = dist.get_world_size()
self.global_rank = dist.get_rank()
self.world_size = paddledist.get_world_size()
self.global_rank = paddledist.get_rank()

def barrier(self):
dist.barrier()
paddledist.barrier()

def configure_fleet(self):
if not self._has_fleetwrapped and not isinstance(self.model, DataParallel):


+ 1
- 1
fastNLP/core/drivers/paddle_driver/initialize_paddle_driver.py View File

@@ -28,7 +28,7 @@ def initialize_paddle_driver(driver: str, device: Optional[Union[str, int, List[
"""
if is_in_paddle_launch_dist():
if device is not None:
logger.warning("Parameter `device` would be ignored when you are using `paddle.distributed.launch` to pull "
logger.warning_once("Parameter `device` would be ignored when you are using `paddle.distributed.launch` to pull "
"up your script. And we will directly get the local device via "
"and `os.environ['CUDA_VISIBLE_DEVICES']``.")
device = [int(g) for g in os.environ["CUDA_VISIBLE_DEVICES"].split(",")]


+ 8
- 5
fastNLP/core/drivers/torch_driver/torch_driver.py View File

@@ -255,11 +255,14 @@ class TorchDriver(Driver):
logger.debug("Load model...")

# 3. 加载fp16的状态
if 'grad_scaler_state_dict' in states:
grad_scaler_state_dict = states.pop('grad_scaler_state_dict')
if not isinstance(self.grad_scaler, DummyGradScaler):
self.grad_scaler.load_state_dict(grad_scaler_state_dict)
logger.debug("Load grad_scaler state dict...")
if "grad_scaler_state_dict" in states:
grad_scaler_state_dict = states.pop("grad_scaler_state_dict")
if isinstance(self.grad_scaler, DummyGradScaler):
self.auto_cast, _grad_scaler = _build_fp16_env(dummy=False)
self.grad_scaler = _grad_scaler()
self.fp16 = True
self.grad_scaler.load_state_dict(grad_scaler_state_dict)
logger.debug("Load grad_scaler state dict...")
elif not isinstance(self.grad_scaler, DummyGradScaler):
logger.warning(f"Checkpoint {folder} is not trained with fp16=True, while resume to a fp16=True training, "
f"the training process may be unstable.")


+ 2
- 1
fastNLP/core/metrics/backend/paddle_backend/backend.py View File

@@ -14,11 +14,13 @@ if _NEED_IMPORT_PADDLE:
import paddle.distributed as dist
from paddle.fluid.dygraph import parallel_helper


def _simple_gather_all_tensors(result, group: Any, world_size: int) -> List:
gathered_result = [paddle.zeros_like(result) for _ in range(world_size)]
dist.all_gather(gathered_result, result, group)
return gathered_result


class PaddleBackend(Backend):
def __init__(self):
super().__init__()
@@ -124,4 +126,3 @@ class PaddleBackend(Backend):
# TODO 如果在这里处理的话,会不会在别的地方引起bug?
device = get_device_from_visible(device)
return paddle_to(tensor, device)


+ 1
- 1
fastNLP/core/metrics/backend/torch_backend/backend.py View File

@@ -32,7 +32,7 @@ class TorchBackend(Backend):
if dist.is_initialized():
if method is None:
raise AggregateMethodError(should_have_aggregate_method=True)
tensor = fastnlp_torch_all_gather(tensor)
tensor = self.all_gather_object(tensor)
if isinstance(tensor[0], torch.Tensor):
tensor = torch.stack(tensor)
# 第一步, aggregate结果


+ 5
- 3
tests/core/dataloaders/torch_dataloader/test_fdl.py View File

@@ -21,11 +21,12 @@ class TestFdl:
ds.set_pad_val("x", val=-1)
fdl = TorchDataLoader(ds, batch_size=3)
fdl.set_input("x", "y")
fdl.set_pad_val("x", val=None)
for batch in fdl:
print(batch)
fdl.set_pad_val("x", val=-2)
for batch in fdl:
print(batch)
# fdl.set_pad_val("x", val=-2)
# for batch in fdl:
# print(batch)

def test_add_collator(self):
ds = DataSet({"x": [[1, 2], [2, 3, 4], [4, 5, 6, 7]] * 10, "y": [1, 0, 1] * 10})
@@ -38,6 +39,7 @@ class TestFdl:

fdl = TorchDataLoader(ds, batch_size=3, as_numpy=True)
fdl.set_input("x", "y")
# fdl.set_pad_val("x", val=None)
fdl.add_collator(collate_fn)
for batch in fdl:
print(batch)


+ 435
- 411
tests/core/drivers/paddle_driver/test_single_device.py View File

@@ -1,3 +1,4 @@
from dataclasses import replace
import os
from re import S
os.environ["FASTNLP_BACKEND"] = "paddle"
@@ -16,203 +17,303 @@ import paddle
from paddle.io import DataLoader, BatchSampler
import torch


############################################################################
#
# 测试save和load相关的功能
# 测试基类 PaddleDrvier 中的一些简单函数
#
############################################################################

def generate_random_driver(features, labels):
"""
生成driver
"""
model = PaddleNormalModel_Classification_1(labels, features)
opt = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=0.01)
driver = PaddleSingleDriver(model, device="cpu")
driver.set_optimizers(opt)
driver.setup()

return driver

@pytest.fixture
def prepare_test_save_load():
dataset = PaddleRandomMaxDataset(320, 10)
dataloader = DataLoader(dataset, batch_size=32)
driver1, driver2 = generate_random_driver(10, 10), generate_random_driver(10, 10)
return driver1, driver2, dataloader

@pytest.mark.parametrize("only_state_dict", ([True, False]))
def test_save_and_load_with_randombatchsampler(only_state_dict):
class TestPaddleDriverFunctions:
"""
测试save和load函数,主要测试 dataloader 被替换了 sampler 之后的情况
使用 PaddleSingleDriver 测试基类的函数
"""

try:
path = "model.ckp"
@classmethod
def setup_class(self):
model = PaddleNormalModel_Classification_1(10, 32)
self.driver = PaddleSingleDriver(model, device="cpu")

driver1, driver2 = generate_random_driver(10, 10), generate_random_driver(10, 10)
dataset = PaddleRandomMaxDataset(80, 10)
dataloader = DataLoader(
dataset=dataset,
batch_sampler=RandomBatchSampler(BatchSampler(dataset, batch_size=4), 4, False)
def test_check_single_optimizer_legality(self):
"""
测试传入单个optimizer时的表现
"""
optimizer = paddle.optimizer.Adam(
parameters=self.driver.model.parameters(),
learning_rate=0.01
)
num_consumed_batches = 2

# TODO 断点重训完善后在这里迭代几次
already_seen_set = set()
for idx, batch in enumerate(dataloader):
if idx >= num_consumed_batches:
break
already_seen_set.update(batch)
self.driver.set_optimizers(optimizer)

sampler_states = dataloader.batch_sampler.state_dict()
save_states = {"num_consumed_batches": num_consumed_batches}
if only_state_dict:
driver1.save(Path(path), save_states, dataloader, only_state_dict, should_save_model=True)
else:
driver1.save(Path(path), save_states, dataloader, only_state_dict, should_save_model=True, input_spec=[paddle.ones((16, 10))])
# 加载
# 更改 batch_size
dataloader = DataLoader(
dataset=dataset,
batch_sampler=RandomBatchSampler(BatchSampler(dataset, batch_size=2), 2, False)
)
load_states = driver2.load(Path(path), dataloader, only_state_dict, should_load_model=True)
replaced_loader = load_states.pop("dataloader")
optimizer = torch.optim.Adam(TorchNormalModel_Classification_1(10, 32).parameters(), 0.01)
# 传入torch的optimizer时,应该报错ValueError
with pytest.raises(ValueError):
self.driver.set_optimizers(optimizer)

# 1. 检查 optimizer 的状态
# TODO optimizer 的 state_dict 总是为空
def test_check_optimizers_legality(self):
"""
测试传入optimizer list的表现
"""
optimizers = [
paddle.optimizer.Adam(
parameters=self.driver.model.parameters(),
learning_rate=0.01
) for i in range(10)
]

# 2. 检查 batch_sampler 是否被正确地加载和替换
assert isinstance(replaced_loader.batch_sampler, RandomBatchSampler)
assert replaced_loader.batch_sampler.index_list == sampler_states["index_list"]
assert replaced_loader.batch_sampler.data_idx == sampler_states["data_idx"]
self.driver.set_optimizers(optimizers)

# 3. 检查 model 的参数是否被正确加载
for batch in dataloader:
res1 = driver1.model.evaluate_step(**batch)
res2 = driver2.model.evaluate_step(**batch)
optimizers += [
torch.optim.Adam(TorchNormalModel_Classification_1(10, 32).parameters(), 0.01)
]

assert paddle.equal_all(res1["pred"], res2["pred"])
with pytest.raises(ValueError):
self.driver.set_optimizers(optimizers)

# 4. 检查 batch_idx
start_batch = load_states.pop('batch_idx_in_epoch')
assert start_batch == 2 * num_consumed_batches
left_batches = set()
for idx, batch in enumerate(replaced_loader):
left_batches.update(batch)
def test_check_dataloader_legality_in_train(self):
"""
测试is_train参数为True时,_check_dataloader_legality函数的表现
"""
dataloader = paddle.io.DataLoader(PaddleNormalDataset())
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", True)

assert len(left_batches) + len(already_seen_set) == len(dataset)
assert len(left_batches | already_seen_set) == len(dataset)
# batch_size 和 batch_sampler 均为 None 的情形
dataloader = paddle.io.DataLoader(PaddleNormalDataset(), batch_size=None)
with pytest.raises(ValueError):
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", True)

# 创建torch的dataloader
dataloader = torch.utils.data.DataLoader(
TorchNormalDataset(),
batch_size=32, shuffle=True
)
with pytest.raises(ValueError):
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", True)

finally:
synchronize_safe_rm(path)
def test_check_dataloader_legality_in_test(self):
"""
测试is_train参数为False时,_check_dataloader_legality函数的表现
"""
# 此时传入的应该是dict
dataloader = {
"train": paddle.io.DataLoader(PaddleNormalDataset()),
"test":paddle.io.DataLoader(PaddleNormalDataset())
}
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", False)

@pytest.mark.parametrize("only_state_dict", ([True, False]))
def test_save_and_load_with_randomsampler(only_state_dict):
"""
测试save和load函数,主要测试 dataloader 被替换了 batch_sampler 的情况
"""
# batch_size 和 batch_sampler 均为 None 的情形
dataloader = {
"train": paddle.io.DataLoader(PaddleNormalDataset()),
"test":paddle.io.DataLoader(PaddleNormalDataset(), batch_size=None)
}
with pytest.raises(ValueError):
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", False)

try:
path = "model.ckp"
# 传入的不是dict,应该报错
dataloader = paddle.io.DataLoader(PaddleNormalDataset())
with pytest.raises(ValueError):
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", False)

driver1, driver2 = generate_random_driver(10, 10), generate_random_driver(10, 10)
dataset = PaddleRandomMaxDataset(80, 10)
batch_sampler = BatchSampler(dataset=dataset, batch_size=2)
batch_sampler.sampler = RandomSampler(dataset, True)
dataloader = DataLoader(
dataset,
batch_sampler=batch_sampler
# 创建torch的dataloader
train_loader = torch.utils.data.DataLoader(
TorchNormalDataset(),
batch_size=32, shuffle=True
)
num_consumed_batches = 2

# TODO 断点重训完善后在这里迭代几次
already_seen_set = set()
for idx, batch in enumerate(dataloader):
if idx >= num_consumed_batches:
break
already_seen_set.update(batch)

sampler_states = dataloader.batch_sampler.sampler.state_dict()
save_states = {"num_consumed_batches": num_consumed_batches}
if only_state_dict:
driver1.save(Path(path), save_states, dataloader, only_state_dict, should_save_model=True)
else:
driver1.save(Path(path), save_states, dataloader, only_state_dict, should_save_model=True, input_spec=[paddle.ones((16, 10))])

# 加载
# 更改 batch_size
dataloader = DataLoader(
dataset=dataset,
batch_sampler=RandomBatchSampler(BatchSampler(dataset, batch_size=2), 2, False)
test_loader = torch.utils.data.DataLoader(
TorchNormalDataset(),
batch_size=32, shuffle=True
)
load_states = driver2.load(Path(path), dataloader, only_state_dict, should_load_model=True)
replaced_loader = load_states.pop("dataloader")
dataloader = {"train": train_loader, "test": test_loader}
with pytest.raises(ValueError):
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", False)

# 1. 检查 optimizer 的状态
# TODO optimizer 的 state_dict 总是为空
def test_tensor_to_numeric(self):
"""
测试tensor_to_numeric函数
"""
# 单个张量
tensor = paddle.to_tensor(3)
res = PaddleSingleDriver.tensor_to_numeric(tensor)
assert res == 3

# 2. 检查 sampler 是否被正确地加载和替换
replaced_loader = load_states["dataloader"]
tensor = paddle.rand((3, 4))
res = PaddleSingleDriver.tensor_to_numeric(tensor)
assert res == tensor.tolist()

assert isinstance(replaced_loader.batch_sampler.sampler, RandomSampler)
assert replaced_loader.batch_sampler.sampler.seed == sampler_states["seed"]
assert replaced_loader.batch_sampler.sampler.epoch == sampler_states["epoch"]
assert replaced_loader.batch_sampler.sampler.num_consumed_samples == sampler_states["num_consumed_samples"]
assert len(replaced_loader.batch_sampler.sampler.dataset) == sampler_states["length"]
assert replaced_loader.batch_sampler.sampler.shuffle == sampler_states["shuffle"]
# 张量list
tensor_list = [paddle.rand((6, 4, 2)) for i in range(10)]
res = PaddleSingleDriver.tensor_to_numeric(tensor_list)
assert isinstance(res, list)
tensor_list = [t.tolist() for t in tensor_list]
assert res == tensor_list

# 3. 检查 model 的参数是否被正确加载
for batch in dataloader:
res1 = driver1.model.evaluate_step(**batch)
res2 = driver2.model.evaluate_step(**batch)
# 张量tuple
tensor_tuple = tuple([paddle.rand((6, 4, 2)) for i in range(10)])
res = PaddleSingleDriver.tensor_to_numeric(tensor_tuple)
assert isinstance(res, tuple)
tensor_tuple = tuple([t.tolist() for t in tensor_tuple])
assert res == tensor_tuple

assert paddle.equal_all(res1["pred"], res2["pred"])
# 张量dict
tensor_dict = {
"tensor": paddle.rand((3, 4)),
"list": [paddle.rand((6, 4, 2)) for i in range(10)],
"dict":{
"list": [paddle.rand((6, 4, 2)) for i in range(10)],
"tensor": paddle.rand((3, 4))
},
"int": 2,
"string": "test string"
}

# 4. 检查 batch_idx
start_batch = load_states.pop('batch_idx_in_epoch')
assert start_batch == 2 * num_consumed_batches
left_batches = set()
for idx, batch in enumerate(replaced_loader):
left_batches.update(batch)
res = PaddleSingleDriver.tensor_to_numeric(tensor_dict)
assert isinstance(res, dict)
assert res["tensor"] == tensor_dict["tensor"].tolist()
assert isinstance(res["list"], list)
for r, d in zip(res["list"], tensor_dict["list"]):
assert r == d.tolist()
assert isinstance(res["int"], int)
assert isinstance(res["string"], str)
assert isinstance(res["dict"], dict)
assert isinstance(res["dict"]["list"], list)
for r, d in zip(res["dict"]["list"], tensor_dict["dict"]["list"]):
assert r == d.tolist()
assert res["dict"]["tensor"] == tensor_dict["dict"]["tensor"].tolist()

assert len(left_batches) + len(already_seen_set) == len(dataset)
assert len(left_batches | already_seen_set) == len(dataset)
finally:
synchronize_safe_rm(path)
def test_set_model_mode(self):
"""
测试set_model_mode函数
"""
self.driver.set_model_mode("train")
assert self.driver.model.training
self.driver.set_model_mode("eval")
assert not self.driver.model.training
# 应该报错
with pytest.raises(AssertionError):
self.driver.set_model_mode("test")

@pytest.mark.parametrize("only_state_dict", ([True, False]))
def test_save_and_load_model(prepare_test_save_load, only_state_dict):
"""
测试 save_model 和 load_model 函数
"""
try:
path = "model"
driver1, driver2, dataloader = prepare_test_save_load
def test_move_model_to_device_cpu(self):
"""
测试move_model_to_device函数
"""
PaddleSingleDriver.move_model_to_device(self.driver.model, "cpu")
assert self.driver.model.linear1.weight.place.is_cpu_place()

if only_state_dict:
driver1.save_model(path, only_state_dict)
def test_move_model_to_device_gpu(self):
"""
测试move_model_to_device函数
"""
PaddleSingleDriver.move_model_to_device(self.driver.model, "gpu")
assert self.driver.model.linear1.weight.place.is_gpu_place()
assert self.driver.model.linear1.weight.place.gpu_device_id() == 0

def test_worker_init_function(self):
"""
测试worker_init_function
"""
# 先确保不影响运行
# TODO:正确性
PaddleSingleDriver.worker_init_function(0)

def test_set_deterministic_dataloader(self):
"""
测试set_deterministic_dataloader
"""
# 先确保不影响运行
# TODO:正确性
dataloader = DataLoader(PaddleNormalDataset())
self.driver.set_deterministic_dataloader(dataloader)

def test_set_sampler_epoch(self):
"""
测试set_sampler_epoch
"""
# 先确保不影响运行
# TODO:正确性
dataloader = DataLoader(PaddleNormalDataset())
self.driver.set_sampler_epoch(dataloader, 0)

@pytest.mark.parametrize("batch_size", [16])
@pytest.mark.parametrize("shuffle", [True, False])
@pytest.mark.parametrize("drop_last", [True, False])
def test_get_dataloader_args(self, batch_size, shuffle, drop_last):
"""
测试正常情况下 get_dataloader_args 的表现
"""
dataloader = DataLoader(
PaddleNormalDataset(),
batch_size=batch_size,
shuffle=shuffle,
drop_last=drop_last,
)
res = PaddleSingleDriver.get_dataloader_args(dataloader)

assert isinstance(res.dataset, PaddleNormalDataset)
assert isinstance(res.batch_sampler, BatchSampler)
if shuffle:
assert isinstance(res.sampler, paddle.io.RandomSampler)
else:
driver1.save_model(path, only_state_dict, input_spec=[paddle.ones((32, 10))])
driver2.load_model(path, only_state_dict)
assert isinstance(res.sampler, paddle.io.SequenceSampler)
assert res.shuffle == shuffle
assert res.batch_size == batch_size
assert res.drop_last == drop_last

for batch in dataloader:
batch = driver1.move_data_to_device(batch)
res1 = driver1.model.evaluate_step(**batch)
res2 = driver2.model.evaluate_step(**batch)
@pytest.mark.parametrize("batch_size", [16])
@pytest.mark.parametrize("shuffle", [True, False])
@pytest.mark.parametrize("drop_last", [True, False])
def test_get_dataloader_args_with_randombatchsampler(self, batch_size, shuffle, drop_last):
"""
测试替换了 batch_sampler 后 get_dataloader_args 的表现
"""
dataset = PaddleNormalDataset()
dataloader = DataLoader(
dataset,
batch_sampler=RandomBatchSampler(
BatchSampler(dataset, batch_size=batch_size, shuffle=shuffle),
batch_size,
drop_last,
)
)
res = PaddleSingleDriver.get_dataloader_args(dataloader)

assert paddle.equal_all(res1["pred"], res2["pred"])
finally:
if only_state_dict:
synchronize_safe_rm(path)
assert isinstance(res.dataset, PaddleNormalDataset)
assert isinstance(res.batch_sampler, RandomBatchSampler)
if shuffle:
assert isinstance(res.sampler, paddle.io.RandomSampler)
else:
synchronize_safe_rm(path + ".pdiparams")
synchronize_safe_rm(path + ".pdiparams.info")
synchronize_safe_rm(path + ".pdmodel")
assert isinstance(res.sampler, paddle.io.SequenceSampler)
assert res.shuffle == shuffle
assert res.batch_size == batch_size
assert res.drop_last == drop_last

@pytest.mark.parametrize("batch_size", [16])
@pytest.mark.parametrize("shuffle", [True, False])
@pytest.mark.parametrize("drop_last", [True, False])
def test_get_dataloader_args_with_randomsampler(self, batch_size, shuffle, drop_last):
"""
测试替换了 sampler 后 get_dataloader_args 的表现
"""
dataset = PaddleNormalDataset()
batch_sampler = BatchSampler(dataset, batch_size=batch_size, drop_last=drop_last)
batch_sampler.sampler = RandomSampler(dataset, shuffle)
dataloader = DataLoader(
dataset,
batch_sampler=batch_sampler,
)
res = PaddleSingleDriver.get_dataloader_args(dataloader)

assert isinstance(res.dataset, PaddleNormalDataset)
assert isinstance(res.batch_sampler, BatchSampler)
assert isinstance(res.sampler, RandomSampler)
assert res.shuffle == shuffle
assert res.batch_size == batch_size
assert res.drop_last == drop_last


############################################################################
#
# 测试 PaddleSingleDrvier 中的一些简单函数
#
############################################################################

class TestSingleDeviceFunction:
"""
@@ -242,6 +343,12 @@ class TestSingleDeviceFunction:
self.driver.move_data_to_device(paddle.rand((32, 64)))


############################################################################
#
# 测试 set_dist_repro_dataloader 函数
#
############################################################################

class TestSetDistReproDataloder:
"""
专门测试 set_dist_repro_dataloader 函数的类
@@ -423,287 +530,204 @@ class TestSetDistReproDataloder:
assert len(left_idxes) + len(already_seen_idx) == len(self.dataset)
assert len(left_idxes | already_seen_idx) == len(self.dataset)

class TestPaddleDriverFunctions:
############################################################################
#
# 测试 save 和 load 相关的功能
#
############################################################################

def generate_random_driver(features, labels):
"""
使用 PaddleSingleDriver 测试基类的函数
生成driver
"""
model = PaddleNormalModel_Classification_1(labels, features)
opt = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=0.01)
driver = PaddleSingleDriver(model, device="cpu")
driver.set_optimizers(opt)
driver.setup()

@classmethod
def setup_class(self):
model = PaddleNormalModel_Classification_1(10, 32)
self.driver = PaddleSingleDriver(model, device="cpu")

def test_check_single_optimizer_legality(self):
"""
测试传入单个optimizer时的表现
"""
optimizer = paddle.optimizer.Adam(
parameters=self.driver.model.parameters(),
learning_rate=0.01
)

self.driver.set_optimizers(optimizer)
return driver

optimizer = torch.optim.Adam(TorchNormalModel_Classification_1(10, 32).parameters(), 0.01)
# 传入torch的optimizer时,应该报错ValueError
with pytest.raises(ValueError):
self.driver.set_optimizers(optimizer)
@pytest.fixture
def prepare_test_save_load():
dataset = PaddleRandomMaxDataset(320, 10)
dataloader = DataLoader(dataset, batch_size=32)
driver1, driver2 = generate_random_driver(10, 10), generate_random_driver(10, 10)
return driver1, driver2, dataloader

def test_check_optimizers_legality(self):
"""
测试传入optimizer list的表现
"""
optimizers = [
paddle.optimizer.Adam(
parameters=self.driver.model.parameters(),
learning_rate=0.01
) for i in range(10)
]
@pytest.mark.parametrize("only_state_dict", ([True, False]))
def test_save_and_load_model(prepare_test_save_load, only_state_dict):
"""
测试 save_model 和 load_model 函数
"""
try:
path = "model"
driver1, driver2, dataloader = prepare_test_save_load

self.driver.set_optimizers(optimizers)
if only_state_dict:
driver1.save_model(path, only_state_dict)
else:
driver1.save_model(path, only_state_dict, input_spec=[paddle.ones((32, 10))])
driver2.load_model(path, only_state_dict)

optimizers += [
torch.optim.Adam(TorchNormalModel_Classification_1(10, 32).parameters(), 0.01)
]
for batch in dataloader:
batch = driver1.move_data_to_device(batch)
res1 = driver1.model.evaluate_step(**batch)
res2 = driver2.model.evaluate_step(**batch)

with pytest.raises(ValueError):
self.driver.set_optimizers(optimizers)
assert paddle.equal_all(res1["pred"], res2["pred"])
finally:
if only_state_dict:
synchronize_safe_rm(path)
else:
synchronize_safe_rm(path + ".pdiparams")
synchronize_safe_rm(path + ".pdiparams.info")
synchronize_safe_rm(path + ".pdmodel")

def test_check_dataloader_legality_in_train(self):
"""
测试is_train参数为True时,_check_dataloader_legality函数的表现
"""
dataloader = paddle.io.DataLoader(PaddleNormalDataset())
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", True)
@pytest.mark.parametrize("only_state_dict", ([True, False]))
def test_save_and_load_with_randombatchsampler(only_state_dict):
"""
测试save和load函数,主要测试 dataloader 被替换了 sampler 之后的情况
"""

# batch_size 和 batch_sampler 均为 None 的情形
dataloader = paddle.io.DataLoader(PaddleNormalDataset(), batch_size=None)
with pytest.raises(ValueError):
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", True)
try:
path = "model.ckp"

# 创建torch的dataloader
dataloader = torch.utils.data.DataLoader(
TorchNormalDataset(),
batch_size=32, shuffle=True
driver1, driver2 = generate_random_driver(10, 10), generate_random_driver(10, 10)
dataset = PaddleRandomMaxDataset(40, 10)
dataloader = DataLoader(
dataset=dataset,
batch_sampler=RandomBatchSampler(BatchSampler(dataset, batch_size=4), 4, False)
)
with pytest.raises(ValueError):
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", True)
num_consumed_batches = 2

def test_check_dataloader_legality_in_test(self):
"""
测试is_train参数为False时,_check_dataloader_legality函数的表现
"""
# 此时传入的应该是dict
dataloader = {
"train": paddle.io.DataLoader(PaddleNormalDataset()),
"test":paddle.io.DataLoader(PaddleNormalDataset())
}
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", False)

# batch_size 和 batch_sampler 均为 None 的情形
dataloader = {
"train": paddle.io.DataLoader(PaddleNormalDataset()),
"test":paddle.io.DataLoader(PaddleNormalDataset(), batch_size=None)
}
with pytest.raises(ValueError):
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", False)

# 传入的不是dict,应该报错
dataloader = paddle.io.DataLoader(PaddleNormalDataset())
with pytest.raises(ValueError):
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", False)
already_seen_x_set = set()
already_seen_y_set = set()
for idx, batch in enumerate(dataloader):
if idx >= num_consumed_batches:
break
already_seen_x_set.update(batch["x"])
already_seen_y_set.update(batch["y"])

# 创建torch的dataloader
train_loader = torch.utils.data.DataLoader(
TorchNormalDataset(),
batch_size=32, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
TorchNormalDataset(),
batch_size=32, shuffle=True
sampler_states = dataloader.batch_sampler.state_dict()
save_states = {"num_consumed_batches": num_consumed_batches}
if only_state_dict:
driver1.save(Path(path), save_states, dataloader, only_state_dict, should_save_model=True)
else:
driver1.save(Path(path), save_states, dataloader, only_state_dict, should_save_model=True, input_spec=[paddle.ones((16, 10))])
# 加载
# 更改 batch_size
dataloader = DataLoader(
dataset=dataset,
batch_sampler=RandomBatchSampler(BatchSampler(dataset, batch_size=2, shuffle=True), 2, False)
)
dataloader = {"train": train_loader, "test": test_loader}
with pytest.raises(ValueError):
PaddleSingleDriver.check_dataloader_legality(dataloader, "dataloader", False)

def test_tensor_to_numeric(self):
"""
测试tensor_to_numeric函数
"""
# 单个张量
tensor = paddle.to_tensor(3)
res = PaddleSingleDriver.tensor_to_numeric(tensor)
assert res == 3

tensor = paddle.rand((3, 4))
res = PaddleSingleDriver.tensor_to_numeric(tensor)
assert res == tensor.tolist()

# 张量list
tensor_list = [paddle.rand((6, 4, 2)) for i in range(10)]
res = PaddleSingleDriver.tensor_to_numeric(tensor_list)
assert isinstance(res, list)
tensor_list = [t.tolist() for t in tensor_list]
assert res == tensor_list

# 张量tuple
tensor_tuple = tuple([paddle.rand((6, 4, 2)) for i in range(10)])
res = PaddleSingleDriver.tensor_to_numeric(tensor_tuple)
assert isinstance(res, tuple)
tensor_tuple = tuple([t.tolist() for t in tensor_tuple])
assert res == tensor_tuple

# 张量dict
tensor_dict = {
"tensor": paddle.rand((3, 4)),
"list": [paddle.rand((6, 4, 2)) for i in range(10)],
"dict":{
"list": [paddle.rand((6, 4, 2)) for i in range(10)],
"tensor": paddle.rand((3, 4))
},
"int": 2,
"string": "test string"
}

res = PaddleSingleDriver.tensor_to_numeric(tensor_dict)
assert isinstance(res, dict)
assert res["tensor"] == tensor_dict["tensor"].tolist()
assert isinstance(res["list"], list)
for r, d in zip(res["list"], tensor_dict["list"]):
assert r == d.tolist()
assert isinstance(res["int"], int)
assert isinstance(res["string"], str)
assert isinstance(res["dict"], dict)
assert isinstance(res["dict"]["list"], list)
for r, d in zip(res["dict"]["list"], tensor_dict["dict"]["list"]):
assert r == d.tolist()
assert res["dict"]["tensor"] == tensor_dict["dict"]["tensor"].tolist()
load_states = driver2.load(Path(path), dataloader, only_state_dict, should_load_model=True)
replaced_loader = load_states.pop("dataloader")
# 1. 检查 optimizer 的状态
# TODO optimizer 的 state_dict 总是为空

def test_set_model_mode(self):
"""
测试set_model_mode函数
"""
self.driver.set_model_mode("train")
assert self.driver.model.training
self.driver.set_model_mode("eval")
assert not self.driver.model.training
# 应该报错
with pytest.raises(AssertionError):
self.driver.set_model_mode("test")
# 2. 检查 batch_sampler 是否被正确地加载和替换
assert not (replaced_loader is dataloader)
assert replaced_loader.batch_sampler is dataloader.batch_sampler
assert isinstance(replaced_loader.batch_sampler, RandomBatchSampler)
assert replaced_loader.batch_sampler.index_list == sampler_states["index_list"]
assert replaced_loader.batch_sampler.num_consumed_samples == num_consumed_batches * 4

def test_move_model_to_device_cpu(self):
"""
测试move_model_to_device函数
"""
PaddleSingleDriver.move_model_to_device(self.driver.model, "cpu")
assert self.driver.model.linear1.weight.place.is_cpu_place()
# 3. 检查 model 的参数是否正确
# 4. 检查 batch_idx
start_batch = load_states.pop('batch_idx_in_epoch')
assert start_batch == 2 * num_consumed_batches
left_x_batches = set()
left_y_batches = set()
for idx, batch in enumerate(replaced_loader):

def test_move_model_to_device_gpu(self):
"""
测试move_model_to_device函数
"""
PaddleSingleDriver.move_model_to_device(self.driver.model, "gpu")
assert self.driver.model.linear1.weight.place.is_gpu_place()
assert self.driver.model.linear1.weight.place.gpu_device_id() == 0
left_x_batches.update(batch["x"])
left_y_batches.update(batch["y"])
res1 = driver1.model.evaluate_step(**batch)
res2 = driver2.model.evaluate_step(**batch)
assert paddle.equal_all(res1["pred"], res2["pred"])

def test_worker_init_function(self):
"""
测试worker_init_function
"""
# 先确保不影响运行
# TODO:正确性
PaddleSingleDriver.worker_init_function(0)
assert len(left_x_batches) + len(already_seen_x_set) == len(dataset)
assert len(left_x_batches | already_seen_x_set) == len(dataset)
assert len(left_y_batches) + len(already_seen_y_set) == len(dataset)
assert len(left_y_batches | already_seen_y_set) == len(dataset)
finally:
synchronize_safe_rm(path)

def test_set_deterministic_dataloader(self):
"""
测试set_deterministic_dataloader
"""
# 先确保不影响运行
# TODO:正确性
dataloader = DataLoader(PaddleNormalDataset())
self.driver.set_deterministic_dataloader(dataloader)
@pytest.mark.parametrize("only_state_dict", ([True, False]))
def test_save_and_load_with_randomsampler(only_state_dict):
"""
测试save和load函数,主要测试 dataloader 被替换了 batch_sampler 的情况
"""

def test_set_sampler_epoch(self):
"""
测试set_sampler_epoch
"""
# 先确保不影响运行
# TODO:正确性
dataloader = DataLoader(PaddleNormalDataset())
self.driver.set_sampler_epoch(dataloader, 0)
try:
path = "model.ckp"

@pytest.mark.parametrize("batch_size", [16])
@pytest.mark.parametrize("shuffle", [True, False])
@pytest.mark.parametrize("drop_last", [True, False])
def test_get_dataloader_args(self, batch_size, shuffle, drop_last):
"""
测试正常情况下 get_dataloader_args 的表现
"""
driver1, driver2 = generate_random_driver(10, 10), generate_random_driver(10, 10)
dataset = PaddleRandomMaxDataset(40, 10)
batch_sampler = BatchSampler(dataset=dataset, batch_size=4)
batch_sampler.sampler = RandomSampler(dataset, True)
dataloader = DataLoader(
PaddleNormalDataset(),
batch_size=batch_size,
shuffle=shuffle,
drop_last=drop_last,
dataset,
batch_sampler=batch_sampler
)
res = PaddleSingleDriver.get_dataloader_args(dataloader)
num_consumed_batches = 2

assert isinstance(res.dataset, PaddleNormalDataset)
assert isinstance(res.batch_sampler, BatchSampler)
if shuffle:
assert isinstance(res.sampler, paddle.io.RandomSampler)
else:
assert isinstance(res.sampler, paddle.io.SequenceSampler)
assert res.shuffle == shuffle
assert res.batch_size == batch_size
assert res.drop_last == drop_last
already_seen_x_set = set()
already_seen_y_set = set()
for idx, batch in enumerate(dataloader):
if idx >= num_consumed_batches:
break
already_seen_x_set.update(batch["x"])
already_seen_y_set.update(batch["y"])

@pytest.mark.parametrize("batch_size", [16])
@pytest.mark.parametrize("shuffle", [True, False])
@pytest.mark.parametrize("drop_last", [True, False])
def test_get_dataloader_args_with_randombatchsampler(self, batch_size, shuffle, drop_last):
"""
测试替换了 batch_sampler 后 get_dataloader_args 的表现
"""
dataset = PaddleNormalDataset()
sampler_states = dataloader.batch_sampler.sampler.state_dict()
save_states = {"num_consumed_batches": num_consumed_batches}
if only_state_dict:
driver1.save(Path(path), save_states, dataloader, only_state_dict, should_save_model=True)
else:
driver1.save(Path(path), save_states, dataloader, only_state_dict, should_save_model=True, input_spec=[paddle.ones((16, 10))])
# 加载
# 更改 batch_size
batch_sampler = BatchSampler(dataset=dataset, batch_size=2)
batch_sampler.sampler = RandomSampler(dataset, True)
dataloader = DataLoader(
dataset,
batch_sampler=RandomBatchSampler(
BatchSampler(dataset, batch_size=batch_size, shuffle=shuffle),
batch_size,
drop_last,
)
batch_sampler=batch_sampler
)
res = PaddleSingleDriver.get_dataloader_args(dataloader)
load_states = driver2.load(Path(path), dataloader, only_state_dict, should_load_model=True)
replaced_loader = load_states.pop("dataloader")

assert isinstance(res.dataset, PaddleNormalDataset)
assert isinstance(res.batch_sampler, RandomBatchSampler)
if shuffle:
assert isinstance(res.sampler, paddle.io.RandomSampler)
else:
assert isinstance(res.sampler, paddle.io.SequenceSampler)
assert res.shuffle == shuffle
assert res.batch_size == batch_size
assert res.drop_last == drop_last
# 1. 检查 optimizer 的状态
# TODO optimizer 的 state_dict 总是为空

@pytest.mark.parametrize("batch_size", [16])
@pytest.mark.parametrize("shuffle", [True, False])
@pytest.mark.parametrize("drop_last", [True, False])
def test_get_dataloader_args_with_randomsampler(self, batch_size, shuffle, drop_last):
"""
测试替换了 sampler 后 get_dataloader_args 的表现
"""
dataset = PaddleNormalDataset()
batch_sampler = BatchSampler(dataset, batch_size=batch_size, drop_last=drop_last)
batch_sampler.sampler = RandomSampler(dataset, shuffle)
dataloader = DataLoader(
dataset,
batch_sampler=batch_sampler,
)
res = PaddleSingleDriver.get_dataloader_args(dataloader)
# 2. 检查 sampler 是否被正确地加载和替换
assert not (replaced_loader is dataloader)
assert isinstance(replaced_loader.batch_sampler.sampler, RandomSampler)
assert replaced_loader.batch_sampler.sampler.seed == sampler_states["seed"]
assert replaced_loader.batch_sampler.sampler.epoch == sampler_states["epoch"]
assert replaced_loader.batch_sampler.sampler.num_consumed_samples == 4 * num_consumed_batches
assert len(replaced_loader.batch_sampler.sampler.dataset) == sampler_states["length"]
assert replaced_loader.batch_sampler.sampler.shuffle == sampler_states["shuffle"]

assert isinstance(res.dataset, PaddleNormalDataset)
assert isinstance(res.batch_sampler, BatchSampler)
assert isinstance(res.sampler, RandomSampler)
assert res.shuffle == shuffle
assert res.batch_size == batch_size
assert res.drop_last == drop_last
# 3. 检查 model 的参数是否正确
# 4. 检查 batch_idx
start_batch = load_states.pop('batch_idx_in_epoch')
assert start_batch == 2 * num_consumed_batches
left_x_batches = set()
left_y_batches = set()
for idx, batch in enumerate(replaced_loader):

left_x_batches.update(batch["x"])
left_y_batches.update(batch["y"])
res1 = driver1.model.evaluate_step(**batch)
res2 = driver2.model.evaluate_step(**batch)
assert paddle.equal_all(res1["pred"], res2["pred"])

assert len(left_x_batches) + len(already_seen_x_set) == len(dataset)
assert len(left_x_batches | already_seen_x_set) == len(dataset)
assert len(left_y_batches) + len(already_seen_y_set) == len(dataset)
assert len(left_y_batches | already_seen_y_set) == len(dataset)
finally:
synchronize_safe_rm(path)

+ 59
- 0
tests/core/metrics/test_accutacy_paddle.py View File

@@ -0,0 +1,59 @@
import os

import pytest
import paddle
import paddle.distributed
import paddle.distributed.fleet.base.role_maker as role_maker
import paddle.distributed.fleet as fleet
from fastNLP.core.metrics import Accuracy
from fastNLP.core.drivers.paddle_driver.fleet_launcher import FleetLauncher

############################################################################
#
# 测试 单机单卡情况下的Accuracy
#
############################################################################
def test_accuracy_single():
pred = paddle.to_tensor([[1.19812393, -0.82041764, -0.53517765, -0.73061031, -1.45006669,
0.46514302],
[-0.85775983, -2.18273783, -1.07505429, -1.45561373, 0.40011844,
1.02202022],
[-0.39487389, 0.65682763, -0.62424040, 0.53692561, -0.28390560,
-0.02559055],
[-0.22586937, -0.07676325, -0.95977223, 0.36395910, -0.91758579,
-0.83857095],
[0.25136873, 2.49652624, 1.06251311, 1.60194016, 1.01451588,
0.08403367],
[0.10844281, 1.19017303, -0.11378096, 1.12686944, -0.08654942,
0.48605862],
[1.27320433, -1.13902378, 1.47072780, -0.98665696, -0.42589864,
0.64618838],
[0.83809763, -0.05356205, 0.03042423, -0.28371972, 0.81611472,
-0.45802942],
[0.38535264, 0.09721313, 2.27187467, 0.32045507, -0.20711982,
-0.13550705],
[-0.75228405, -1.34161997, 1.08697927, 0.33218071, -1.19470012,
2.58735061]])
tg = paddle.to_tensor([1, 2, 1, 3, 5, 4, 4, 2, 1, 5])
acc_metric = Accuracy()
acc_metric.update(pred, tg)
result = acc_metric.get_metric()
true_result = {'acc': 0.3}
assert true_result == result


############################################################################
#
# 测试 单机多卡情况下的Accuracy
#
############################################################################
def test_accuracy_ddp():
launcher = FleetLauncher(devices=[0, 1])
launcher.launch()
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role)
if fleet.is_server():
pass
elif fleet.is_worker():
print(os.getenv("PADDLE_TRAINER_ID"))


Loading…
Cancel
Save