|
|
@@ -1,20 +1,20 @@ |
|
|
|
import os |
|
|
|
os.environ["FASTNLP_BACKEND"] = "paddle" |
|
|
|
import pytest |
|
|
|
|
|
|
|
from fastNLP.envs.set_backend import set_env |
|
|
|
from fastNLP.envs.set_env_on_import import set_env_on_import_paddle |
|
|
|
|
|
|
|
set_env_on_import_paddle() |
|
|
|
set_env("paddle") |
|
|
|
import paddle |
|
|
|
from paddle.io import DataLoader, BatchSampler |
|
|
|
|
|
|
|
from fastNLP.core.drivers.paddle_driver.single_device import PaddleSingleDriver |
|
|
|
from fastNLP.core.samplers.reproducible_sampler import RandomSampler |
|
|
|
from fastNLP.core.samplers import ReproducibleBatchSampler |
|
|
|
from tests.helpers.models.paddle_model import PaddleNormalModel_Classification |
|
|
|
from tests.helpers.datasets.paddle_data import PaddleDataset_MNIST, PaddleRandomDataset |
|
|
|
from tests.helpers.models.paddle_model import PaddleNormalModel_Classification_1 |
|
|
|
from tests.helpers.datasets.paddle_data import PaddleRandomMaxDataset |
|
|
|
from tests.helpers.datasets.torch_data import TorchNormalDataset |
|
|
|
from tests.helpers.models.torch_model import TorchNormalModel_Classification_1 |
|
|
|
from fastNLP.core import synchronize_safe_rm |
|
|
|
|
|
|
|
import paddle |
|
|
|
from paddle.io import DataLoader, BatchSampler |
|
|
|
import torch |
|
|
|
|
|
|
|
|
|
|
|
############################################################################ |
|
|
|
# |
|
|
@@ -26,32 +26,35 @@ def generate_random_driver(features, labels): |
|
|
|
""" |
|
|
|
生成driver |
|
|
|
""" |
|
|
|
model = PaddleNormalModel_Classification(labels, features) |
|
|
|
model = PaddleNormalModel_Classification_1(labels, features) |
|
|
|
opt = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=0.01) |
|
|
|
driver = PaddleSingleDriver(model) |
|
|
|
driver = PaddleSingleDriver(model, device="cpu") |
|
|
|
driver.set_optimizers(opt) |
|
|
|
|
|
|
|
return driver |
|
|
|
|
|
|
|
@pytest.fixture |
|
|
|
def prepare_test_save_load(): |
|
|
|
dataset = PaddleRandomDataset(num_of_data=320, features=64, labels=8) |
|
|
|
dataset = PaddleRandomMaxDataset(320, 10) |
|
|
|
dataloader = DataLoader(dataset, batch_size=32) |
|
|
|
driver1, driver2 = generate_random_driver(64, 8), generate_random_driver(64, 8) |
|
|
|
driver1, driver2 = generate_random_driver(10, 10), generate_random_driver(10, 10) |
|
|
|
return driver1, driver2, dataloader |
|
|
|
|
|
|
|
def test_save_and_load(prepare_test_save_load): |
|
|
|
@pytest.mark.parametrize("reproducible", [True, False]) |
|
|
|
@pytest.mark.parametrize("only_state_dict", [True, False]) |
|
|
|
def test_save_and_load(prepare_test_save_load, reproducible, only_state_dict): |
|
|
|
""" |
|
|
|
测试save和load函数 |
|
|
|
TODO optimizer的state_dict为空,暂时不测试 |
|
|
|
""" |
|
|
|
|
|
|
|
try: |
|
|
|
path = "model.pdparams" |
|
|
|
path = "model.ckp" |
|
|
|
driver1, driver2, dataloader = prepare_test_save_load |
|
|
|
dataloader = driver1.set_dist_repro_dataloader(dataloader, "dist", reproducible) |
|
|
|
|
|
|
|
driver1.save(path, {}) |
|
|
|
driver2.load(path) |
|
|
|
driver1.save(path, {}, dataloader, only_state_dict, should_save_model=True) |
|
|
|
driver2.load(path, dataloader, only_state_dict, should_load_model=True) |
|
|
|
|
|
|
|
for batch in dataloader: |
|
|
|
res1 = driver1.validate_step(batch) |
|
|
@@ -67,11 +70,11 @@ def test_save_and_load_state_dict(prepare_test_save_load): |
|
|
|
TODO optimizer的state_dict为空,暂时不测试 |
|
|
|
""" |
|
|
|
try: |
|
|
|
path = "model.pdparams" |
|
|
|
path = "dict" |
|
|
|
driver1, driver2, dataloader = prepare_test_save_load |
|
|
|
|
|
|
|
driver1.save_model(path) |
|
|
|
driver2.model.load_dict(driver2.load_model(path)) |
|
|
|
driver2.load_model(path) |
|
|
|
|
|
|
|
for batch in dataloader: |
|
|
|
res1 = driver1.validate_step(batch) |
|
|
@@ -87,11 +90,11 @@ def test_save_and_load_whole_model(prepare_test_save_load): |
|
|
|
TODO optimizer的state_dict为空,暂时不测试 |
|
|
|
""" |
|
|
|
try: |
|
|
|
path = "model.pdparams" |
|
|
|
path = "model" |
|
|
|
driver1, driver2, dataloader = prepare_test_save_load |
|
|
|
|
|
|
|
driver1.save_model(path, only_state_dict=False, input_spec=[next(iter(dataloader))["x"]]) |
|
|
|
driver2.model = driver2.load_model(path, load_dict=False) |
|
|
|
driver2.load_model(path, only_state_dict=False) |
|
|
|
|
|
|
|
for batch in dataloader: |
|
|
|
res1 = driver1.validate_step(batch) |
|
|
@@ -99,7 +102,9 @@ def test_save_and_load_whole_model(prepare_test_save_load): |
|
|
|
|
|
|
|
assert paddle.equal_all(res1["pred"], res2["pred"]) |
|
|
|
finally: |
|
|
|
synchronize_safe_rm(path) |
|
|
|
synchronize_safe_rm(path + ".pdiparams") |
|
|
|
synchronize_safe_rm(path + ".pdiparams.info") |
|
|
|
synchronize_safe_rm(path + ".pdmodel") |
|
|
|
|
|
|
|
|
|
|
|
class TestSingleDeviceFunction: |
|
|
@@ -109,8 +114,8 @@ class TestSingleDeviceFunction: |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def setup_class(cls): |
|
|
|
model = PaddleNormalModel_Classification(10, 784) |
|
|
|
cls.driver = PaddleSingleDriver(model) |
|
|
|
model = PaddleNormalModel_Classification_1(10, 784) |
|
|
|
cls.driver = PaddleSingleDriver(model, device="gpu") |
|
|
|
|
|
|
|
def test_unwrap_model(self): |
|
|
|
""" |
|
|
@@ -129,7 +134,7 @@ class TestSingleDeviceFunction: |
|
|
|
""" |
|
|
|
测试get_model_device |
|
|
|
""" |
|
|
|
self.driver = PaddleSingleDriver(PaddleNormalModel_Classification(10, 784), "cpu") |
|
|
|
self.driver = PaddleSingleDriver(PaddleNormalModel_Classification_1(10, 784), "cpu") |
|
|
|
device = self.driver.get_model_device() |
|
|
|
assert device == "cpu", device |
|
|
|
|
|
|
@@ -137,7 +142,7 @@ class TestSingleDeviceFunction: |
|
|
|
""" |
|
|
|
测试get_model_device |
|
|
|
""" |
|
|
|
self.driver = PaddleSingleDriver(PaddleNormalModel_Classification(10, 784), "gpu:0") |
|
|
|
self.driver = PaddleSingleDriver(PaddleNormalModel_Classification_1(10, 784), "gpu:0") |
|
|
|
device = self.driver.get_model_device() |
|
|
|
assert device == "gpu:0", device |
|
|
|
|
|
|
@@ -152,8 +157,11 @@ class TestSingleDeviceFunction: |
|
|
|
self.driver.move_data_to_device(paddle.rand((32, 64))) |
|
|
|
|
|
|
|
@pytest.mark.parametrize( |
|
|
|
"dist_sampler", |
|
|
|
["dist", ReproducibleBatchSampler(BatchSampler(PaddleDataset_MNIST("train")), 32, False), RandomSampler(PaddleDataset_MNIST("train"))] |
|
|
|
"dist_sampler", [ |
|
|
|
"dist", |
|
|
|
ReproducibleBatchSampler(BatchSampler(PaddleRandomMaxDataset(320, 10)), 32, False), |
|
|
|
RandomSampler(PaddleRandomMaxDataset(320, 10)) |
|
|
|
] |
|
|
|
) |
|
|
|
@pytest.mark.parametrize( |
|
|
|
"reproducible", |
|
|
@@ -161,8 +169,225 @@ class TestSingleDeviceFunction: |
|
|
|
) |
|
|
|
def test_repalce_sampler(self, dist_sampler, reproducible): |
|
|
|
""" |
|
|
|
测试replace_sampler函数 |
|
|
|
测试set_dist_repro_dataloader函数 |
|
|
|
""" |
|
|
|
dataloader = DataLoader(PaddleRandomMaxDataset(320, 10), batch_size=100, shuffle=True) |
|
|
|
|
|
|
|
res = self.driver.set_dist_repro_dataloader(dataloader, dist_sampler, reproducible) |
|
|
|
|
|
|
|
class TestPaddleDriverFunctions: |
|
|
|
""" |
|
|
|
使用 PaddleSingleDriver 测试基类的函数 |
|
|
|
""" |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def setup_class(self): |
|
|
|
model = PaddleNormalModel_Classification_1(10, 32) |
|
|
|
self.driver = PaddleSingleDriver(model, device="gpu") |
|
|
|
|
|
|
|
def test_check_single_optimizer_legality(self): |
|
|
|
""" |
|
|
|
测试传入单个optimizer时的表现 |
|
|
|
""" |
|
|
|
optimizer = paddle.optimizer.Adam( |
|
|
|
parameters=self.driver.model.parameters(), |
|
|
|
learning_rate=0.01 |
|
|
|
) |
|
|
|
|
|
|
|
self.driver.set_optimizers(optimizer) |
|
|
|
|
|
|
|
optimizer = torch.optim.Adam(TorchNormalModel_Classification_1(10, 32).parameters(), 0.01) |
|
|
|
# 传入torch的optimizer时,应该报错ValueError |
|
|
|
with self.assertRaises(ValueError) as cm: |
|
|
|
self.driver.set_optimizers(optimizer) |
|
|
|
|
|
|
|
def test_check_optimizers_legality(self): |
|
|
|
""" |
|
|
|
测试传入optimizer list的表现 |
|
|
|
""" |
|
|
|
optimizers = [ |
|
|
|
paddle.optimizer.Adam( |
|
|
|
parameters=self.driver.model.parameters(), |
|
|
|
learning_rate=0.01 |
|
|
|
) for i in range(10) |
|
|
|
] |
|
|
|
|
|
|
|
self.driver.set_optimizers(optimizers) |
|
|
|
|
|
|
|
optimizers += [ |
|
|
|
torch.optim.Adam(TorchNormalModel_Classification_1(10, 32).parameters(), 0.01) |
|
|
|
] |
|
|
|
|
|
|
|
with self.assertRaises(ValueError) as cm: |
|
|
|
self.driver.set_optimizers(optimizers) |
|
|
|
|
|
|
|
def test_check_dataloader_legality_in_train(self): |
|
|
|
""" |
|
|
|
测试is_train参数为True时,_check_dataloader_legality函数的表现 |
|
|
|
""" |
|
|
|
dataloader = paddle.io.DataLoader(PaddleNormalDataset()) |
|
|
|
PaddleSingleDriver._check_dataloader_legality(dataloader, "dataloader", True) |
|
|
|
|
|
|
|
# batch_size 和 batch_sampler 均为 None 的情形 |
|
|
|
dataloader = paddle.io.DataLoader(PaddleNormalDataset(), batch_size=None) |
|
|
|
with self.assertRaises(ValueError) as cm: |
|
|
|
PaddleSingleDriver._check_dataloader_legality(dataloader, "dataloader", True) |
|
|
|
|
|
|
|
# 创建torch的dataloader |
|
|
|
dataloader = torch.utils.data.DataLoader( |
|
|
|
TorchNormalDataset(), |
|
|
|
batch_size=32, shuffle=True |
|
|
|
) |
|
|
|
with self.assertRaises(ValueError) as cm: |
|
|
|
PaddleSingleDriver._check_dataloader_legality(dataloader, "dataloader", True) |
|
|
|
|
|
|
|
def test_check_dataloader_legacy_in_test(self): |
|
|
|
""" |
|
|
|
测试is_train参数为False时,_check_dataloader_legality函数的表现 |
|
|
|
""" |
|
|
|
# 此时传入的应该是dict |
|
|
|
dataloader = { |
|
|
|
"train": paddle.io.DataLoader(PaddleNormalDataset()), |
|
|
|
"test":paddle.io.DataLoader(PaddleNormalDataset()) |
|
|
|
} |
|
|
|
PaddleSingleDriver._check_dataloader_legality(dataloader, "dataloader", False) |
|
|
|
|
|
|
|
# batch_size 和 batch_sampler 均为 None 的情形 |
|
|
|
dataloader = { |
|
|
|
"train": paddle.io.DataLoader(PaddleNormalDataset()), |
|
|
|
"test":paddle.io.DataLoader(PaddleNormalDataset(), batch_size=None) |
|
|
|
} |
|
|
|
PaddleSingleDriver._check_dataloader_legality(dataloader, "dataloader", False) |
|
|
|
|
|
|
|
# 传入的不是dict,应该报错 |
|
|
|
dataloader = paddle.io.DataLoader(PaddleNormalDataset()) |
|
|
|
with self.assertRaises(ValueError) as cm: |
|
|
|
PaddleSingleDriver._check_dataloader_legality(dataloader, "dataloader", False) |
|
|
|
|
|
|
|
# 创建torch的dataloader |
|
|
|
train_loader = torch.utils.data.DataLoader( |
|
|
|
TorchNormalDataset(), |
|
|
|
batch_size=32, shuffle=True |
|
|
|
) |
|
|
|
test_loader = torch.utils.data.DataLoader( |
|
|
|
TorchNormalDataset(), |
|
|
|
batch_size=32, shuffle=True |
|
|
|
) |
|
|
|
dataloader = {"train": train_loader, "test": test_loader} |
|
|
|
with self.assertRaises(ValueError) as cm: |
|
|
|
PaddleSingleDriver._check_dataloader_legality(dataloader, "dataloader", False) |
|
|
|
|
|
|
|
def test_tensor_to_numeric(self): |
|
|
|
""" |
|
|
|
测试tensor_to_numeric函数 |
|
|
|
""" |
|
|
|
dataloader = DataLoader(PaddleDataset_MNIST("train"), batch_size=100, shuffle=True) |
|
|
|
# 单个张量 |
|
|
|
tensor = paddle.to_tensor(3) |
|
|
|
res = PaddleSingleDriver.tensor_to_numeric(tensor) |
|
|
|
self.assertEqual(res, 3) |
|
|
|
|
|
|
|
tensor = paddle.rand((3, 4)) |
|
|
|
res = PaddleSingleDriver.tensor_to_numeric(tensor) |
|
|
|
self.assertListEqual(res, tensor.tolist()) |
|
|
|
|
|
|
|
# 张量list |
|
|
|
tensor_list = [paddle.rand((6, 4, 2)) for i in range(10)] |
|
|
|
res = PaddleSingleDriver.tensor_to_numeric(tensor_list) |
|
|
|
self.assertTrue(res, list) |
|
|
|
tensor_list = [t.tolist() for t in tensor_list] |
|
|
|
self.assertListEqual(res, tensor_list) |
|
|
|
|
|
|
|
# 张量tuple |
|
|
|
tensor_tuple = tuple([paddle.rand((6, 4, 2)) for i in range(10)]) |
|
|
|
res = PaddleSingleDriver.tensor_to_numeric(tensor_tuple) |
|
|
|
self.assertTrue(res, tuple) |
|
|
|
tensor_tuple = tuple([t.tolist() for t in tensor_tuple]) |
|
|
|
self.assertTupleEqual(res, tensor_tuple) |
|
|
|
|
|
|
|
# 张量dict |
|
|
|
tensor_dict = { |
|
|
|
"tensor": paddle.rand((3, 4)), |
|
|
|
"list": [paddle.rand((6, 4, 2)) for i in range(10)], |
|
|
|
"dict":{ |
|
|
|
"list": [paddle.rand((6, 4, 2)) for i in range(10)], |
|
|
|
"tensor": paddle.rand((3, 4)) |
|
|
|
}, |
|
|
|
"int": 2, |
|
|
|
"string": "test string" |
|
|
|
} |
|
|
|
|
|
|
|
res = PaddleSingleDriver.tensor_to_numeric(tensor_dict) |
|
|
|
self.assertIsInstance(res, dict) |
|
|
|
self.assertListEqual(res["tensor"], tensor_dict["tensor"].tolist()) |
|
|
|
self.assertIsInstance(res["list"], list) |
|
|
|
for r, d in zip(res["list"], tensor_dict["list"]): |
|
|
|
self.assertListEqual(r, d.tolist()) |
|
|
|
self.assertIsInstance(res["int"], int) |
|
|
|
self.assertIsInstance(res["string"], str) |
|
|
|
self.assertIsInstance(res["dict"], dict) |
|
|
|
self.assertIsInstance(res["dict"]["list"], list) |
|
|
|
for r, d in zip(res["dict"]["list"], tensor_dict["dict"]["list"]): |
|
|
|
self.assertListEqual(r, d.tolist()) |
|
|
|
self.assertListEqual(res["dict"]["tensor"], tensor_dict["dict"]["tensor"].tolist()) |
|
|
|
|
|
|
|
def test_set_model_mode(self): |
|
|
|
""" |
|
|
|
测试set_model_mode函数 |
|
|
|
""" |
|
|
|
self.driver.set_model_mode("train") |
|
|
|
self.assertTrue(self.driver.model.training) |
|
|
|
self.driver.set_model_mode("eval") |
|
|
|
self.assertFalse(self.driver.model.training) |
|
|
|
# 应该报错 |
|
|
|
with self.assertRaises(AssertionError) as cm: |
|
|
|
self.driver.set_model_mode("test") |
|
|
|
|
|
|
|
def test_move_model_to_device_cpu(self): |
|
|
|
""" |
|
|
|
测试move_model_to_device函数 |
|
|
|
""" |
|
|
|
PaddleSingleDriver.move_model_to_device(self.driver.model, "cpu") |
|
|
|
self.assertTrue(self.driver.model.fc1.weight.place.is_cpu_place()) |
|
|
|
|
|
|
|
res = self.driver.set_dist_repro_dataloader(dataloader, dist_sampler, reproducible) |
|
|
|
def test_move_model_to_device_gpu(self): |
|
|
|
""" |
|
|
|
测试move_model_to_device函数 |
|
|
|
""" |
|
|
|
PaddleSingleDriver.move_model_to_device(self.driver.model, "gpu:0") |
|
|
|
self.assertTrue(self.driver.model.fc1.weight.place.is_gpu_place()) |
|
|
|
self.assertEqual(self.driver.model.fc1.weight.place.gpu_device_id(), 0) |
|
|
|
|
|
|
|
def test_worker_init_function(self): |
|
|
|
""" |
|
|
|
测试worker_init_function |
|
|
|
""" |
|
|
|
# 先确保不影响运行 |
|
|
|
# TODO:正确性 |
|
|
|
PaddleSingleDriver.worker_init_function(0) |
|
|
|
|
|
|
|
def test_set_deterministic_dataloader(self): |
|
|
|
""" |
|
|
|
测试set_deterministic_dataloader |
|
|
|
""" |
|
|
|
# 先确保不影响运行 |
|
|
|
# TODO:正确性 |
|
|
|
dataloader = DataLoader(PaddleNormalDataset()) |
|
|
|
self.driver.set_deterministic_dataloader(dataloader) |
|
|
|
|
|
|
|
def test_set_sampler_epoch(self): |
|
|
|
""" |
|
|
|
测试set_sampler_epoch |
|
|
|
""" |
|
|
|
# 先确保不影响运行 |
|
|
|
# TODO:正确性 |
|
|
|
dataloader = DataLoader(PaddleNormalDataset()) |
|
|
|
self.driver.set_sampler_epoch(dataloader, 0) |
|
|
|
|
|
|
|
def test_get_dataloader_args(self): |
|
|
|
""" |
|
|
|
测试get_dataloader_args |
|
|
|
""" |
|
|
|
# 先确保不影响运行 |
|
|
|
# TODO:正确性 |
|
|
|
dataloader = DataLoader(PaddleNormalDataset()) |
|
|
|
res = PaddleSingleDriver.get_dataloader_args(dataloader) |