From 791580797c6ba7d9a2aa5938776167e7226d7e17 Mon Sep 17 00:00:00 2001 From: x54-729 <17307130121@fudan.edu.cn> Date: Sun, 10 Apr 2022 06:55:05 +0000 Subject: [PATCH] =?UTF-8?q?paddle=20=E5=88=86=E5=B8=83=E5=BC=8F=E7=9A=84?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/core/controllers/test_trainer_fleet.py | 93 ++++++++++++++++++ .../controllers/test_trainer_fleet_outside.py | 98 +++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 tests/core/controllers/test_trainer_fleet.py create mode 100644 tests/core/controllers/test_trainer_fleet_outside.py diff --git a/tests/core/controllers/test_trainer_fleet.py b/tests/core/controllers/test_trainer_fleet.py new file mode 100644 index 00000000..a294ad1f --- /dev/null +++ b/tests/core/controllers/test_trainer_fleet.py @@ -0,0 +1,93 @@ +""" +这个文件测试用户以python -m paddle.distributed.launch 启动的情况 +看看有没有用pytest执行的机会 +python -m paddle.distributed.launch --gpus=0,2,3 test_trainer_fleet.py +""" +import os +os.environ["FASTNLP_BACKEND"] = "paddle" +import sys +sys.path.append("../../../") + +from dataclasses import dataclass + +from fastNLP.core.controllers.trainer import Trainer +from fastNLP.core.metrics.accuracy import Accuracy +from fastNLP.core.callbacks.progress_callback import RichCallback +from fastNLP.core.callbacks import Callback + +import paddle +from paddle.optimizer import Adam +from paddle.io import DataLoader + +from tests.helpers.models.paddle_model import PaddleNormalModel_Classification_1 +from tests.helpers.datasets.paddle_data import PaddleRandomMaxDataset +from tests.helpers.callbacks.helper_callbacks import RecordMetricCallback + +@dataclass +class MNISTTrainFleetConfig: + num_labels: int = 10 + feature_dimension: int = 10 + + batch_size: int = 32 + shuffle: bool = True + validate_every = -1 + +def test_trainer_fleet( + driver, + device, + callbacks, + n_epochs, +): + model = PaddleNormalModel_Classification_1( + num_labels=MNISTTrainFleetConfig.num_labels, + feature_dimension=MNISTTrainFleetConfig.feature_dimension + ) + optimizers = Adam(parameters=model.parameters(), learning_rate=0.0001) + + train_dataloader = DataLoader( + dataset=PaddleRandomMaxDataset(6400, MNISTTrainFleetConfig.feature_dimension), + batch_size=MNISTTrainFleetConfig.batch_size, + shuffle=True + ) + val_dataloader = DataLoader( + dataset=PaddleRandomMaxDataset(1280, MNISTTrainFleetConfig.feature_dimension), + batch_size=MNISTTrainFleetConfig.batch_size, + shuffle=True + ) + train_dataloader = train_dataloader + validate_dataloaders = val_dataloader + validate_every = MNISTTrainFleetConfig.validate_every + metrics = {"acc": Accuracy()} + trainer = Trainer( + model=model, + driver=driver, + device=device, + optimizers=optimizers, + train_dataloader=train_dataloader, + validate_dataloaders=validate_dataloaders, + validate_every=validate_every, + input_mapping=None, + output_mapping=None, + metrics=metrics, + + n_epochs=n_epochs, + callbacks=callbacks, + output_from_new_proc="logs", + ) + trainer.run() + +if __name__ == "__main__": + driver = "fleet" + device = [0,2,3] + # driver = "paddle" + # device = 2 + callbacks = [ + # RecordMetricCallback(monitor="acc#acc", metric_threshold=0.0, larger_better=True), + RichCallback(5), + ] + test_trainer_fleet( + driver=driver, + device=device, + callbacks=callbacks, + n_epochs=5, + ) \ No newline at end of file diff --git a/tests/core/controllers/test_trainer_fleet_outside.py b/tests/core/controllers/test_trainer_fleet_outside.py new file mode 100644 index 00000000..d461e211 --- /dev/null +++ b/tests/core/controllers/test_trainer_fleet_outside.py @@ -0,0 +1,98 @@ +""" +这个文件测试用户以python -m paddle.distributed.launch 启动的情况 +并且自己初始化了 fleet +python -m paddle.distributed.launch --gpus=0,2,3 test_trainer_fleet.py +""" +import os +os.environ["FASTNLP_BACKEND"] = "paddle" +import sys +sys.path.append("../../../") + +from dataclasses import dataclass + +from fastNLP.core.controllers.trainer import Trainer +from fastNLP.core.metrics.accuracy import Accuracy +from fastNLP.core.callbacks.progress_callback import RichCallback +from fastNLP.core.callbacks import Callback + +import paddle +from paddle.optimizer import Adam +from paddle.io import DataLoader +import paddle.distributed.fleet as fleet + +from tests.helpers.models.paddle_model import PaddleNormalModel_Classification_2 +from tests.helpers.datasets.paddle_data import PaddleRandomMaxDataset +from tests.helpers.callbacks.helper_callbacks import RecordMetricCallback + +@dataclass +class MNISTTrainFleetConfig: + num_labels: int = 10 + feature_dimension: int = 10 + + batch_size: int = 32 + shuffle: bool = True + validate_every = -1 + +def test_trainer_fleet( + driver, + device, + callbacks, + n_epochs, +): + fleet.init(is_collective=True) + + model = PaddleNormalModel_Classification_2( + num_labels=MNISTTrainFleetConfig.num_labels, + feature_dimension=MNISTTrainFleetConfig.feature_dimension, + ) + optimizers = Adam(parameters=model.parameters(), learning_rate=0.0001) + + model = fleet.distributed_model(model) + optimizers = fleet.distributed_optimizer(optimizers) + + train_dataloader = DataLoader( + dataset=PaddleRandomMaxDataset(6400, MNISTTrainFleetConfig.feature_dimension), + batch_size=MNISTTrainFleetConfig.batch_size, + shuffle=True + ) + val_dataloader = DataLoader( + dataset=PaddleRandomMaxDataset(1280, MNISTTrainFleetConfig.feature_dimension), + batch_size=MNISTTrainFleetConfig.batch_size, + shuffle=True + ) + train_dataloader = train_dataloader + validate_dataloaders = val_dataloader + validate_every = MNISTTrainFleetConfig.validate_every + metrics = {"acc": Accuracy()} + trainer = Trainer( + model=model, + driver=driver, + device=device, + optimizers=optimizers, + train_dataloader=train_dataloader, + validate_dataloaders=validate_dataloaders, + validate_every=validate_every, + input_mapping=None, + output_mapping=None, + metrics=metrics, + + n_epochs=n_epochs, + callbacks=callbacks, + output_from_new_proc="logs", + data_device=f"gpu:{os.environ['CUDA_VISIBLE_DEVICES']}" + ) + trainer.run() + +if __name__ == "__main__": + driver = "fleet" + device = [0,2,3] + callbacks = [ + # RecordMetricCallback(monitor="acc#acc", metric_threshold=0.0, larger_better=True), + RichCallback(5), + ] + test_trainer_fleet( + driver=driver, + device=device, + callbacks=callbacks, + n_epochs=30, + ) \ No newline at end of file