From e28ecb8b335971402c0658bb7bfe72f1a29d1818 Mon Sep 17 00:00:00 2001 From: ChenXin Date: Mon, 23 Sep 2019 13:44:19 +0800 Subject: [PATCH] hide the EchoCallback --- fastNLP/__init__.py | 1 - fastNLP/core/__init__.py | 5 +-- fastNLP/core/callback.py | 7 +++- test/core/test_dist_trainer.py | 67 +++++++++++++++++++--------------- 4 files changed, 45 insertions(+), 35 deletions(-) diff --git a/fastNLP/__init__.py b/fastNLP/__init__.py index ded83308..1629ab66 100644 --- a/fastNLP/__init__.py +++ b/fastNLP/__init__.py @@ -36,7 +36,6 @@ __all__ = [ "TensorboardCallback", "WarmupCallback", 'SaveModelCallback', - "EchoCallback", "CallbackException", "EarlyStopError", diff --git a/fastNLP/core/__init__.py b/fastNLP/core/__init__.py index bea80097..0588c9aa 100644 --- a/fastNLP/core/__init__.py +++ b/fastNLP/core/__init__.py @@ -48,7 +48,6 @@ __all__ = [ "TensorboardCallback", "WarmupCallback", 'SaveModelCallback', - "EchoCallback", "CallbackException", "EarlyStopError", @@ -77,8 +76,8 @@ __all__ = [ from ._logger import logger from .batch import DataSetIter, BatchIter, TorchLoaderIter from .callback import Callback, GradientClipCallback, EarlyStopCallback, FitlogCallback, EvaluateCallback, \ - LRScheduler, ControlC, LRFinder, TensorboardCallback, WarmupCallback, SaveModelCallback, EchoCallback, \ - CallbackException, EarlyStopError + LRScheduler, ControlC, LRFinder, TensorboardCallback, WarmupCallback, SaveModelCallback, CallbackException, \ + EarlyStopError from .const import Const from .dataset import DataSet from .field import FieldArray, Padder, AutoPadder, EngChar2DPadder diff --git a/fastNLP/core/callback.py b/fastNLP/core/callback.py index 36ce2aa5..dca34db5 100644 --- a/fastNLP/core/callback.py +++ b/fastNLP/core/callback.py @@ -62,7 +62,6 @@ __all__ = [ "TensorboardCallback", "WarmupCallback", "SaveModelCallback", - "EchoCallback", "CallbackException", "EarlyStopError" @@ -710,6 +709,8 @@ class ControlC(Callback): class SmoothValue(object): + """work for LRFinder""" + def __init__(self, beta: float): self.beta, self.n, self.mov_avg = beta, 0, 0 self.smooth = None @@ -1022,6 +1023,10 @@ class EarlyStopError(CallbackException): class EchoCallback(Callback): + """ + 用于测试分布式训练 + + """ def __init__(self, name, out=sys.stdout): super(EchoCallback, self).__init__() self.name = name diff --git a/test/core/test_dist_trainer.py b/test/core/test_dist_trainer.py index 03f613e1..3b53fe50 100644 --- a/test/core/test_dist_trainer.py +++ b/test/core/test_dist_trainer.py @@ -1,33 +1,36 @@ +import os +import shutil +import subprocess import unittest +from argparse import ArgumentParser import numpy as np import torch.cuda + +from fastNLP import AccuracyMetric +from fastNLP import CrossEntropyLoss, BCELoss from fastNLP import DataSet from fastNLP import Instance -from fastNLP import CrossEntropyLoss, BCELoss from fastNLP import SGD +from fastNLP.core.callback import EchoCallback from fastNLP.core.dist_trainer import DistTrainer, get_local_rank from fastNLP.models.base_model import NaiveClassifier -import shutil -import os -import subprocess -from argparse import ArgumentParser -from fastNLP.core.callback import EchoCallback -from fastNLP import AccuracyMetric + def prepare_fake_dataset(): mean = np.array([-3, -3]) cov = np.array([[1, 0], [0, 1]]) class_A = np.random.multivariate_normal(mean, cov, size=(1000,)) - + mean = np.array([3, 3]) cov = np.array([[1, 0], [0, 1]]) class_B = np.random.multivariate_normal(mean, cov, size=(1000,)) - + data_set = DataSet([Instance(x=[float(item[0]), float(item[1])], y=0) for item in class_A] + [Instance(x=[float(item[0]), float(item[1])], y=1) for item in class_B]) return data_set + def prepare_fake_dataset2(*args, size=100): ys = np.random.randint(4, size=100, dtype=np.int64) data = {'y': ys} @@ -35,32 +38,35 @@ def prepare_fake_dataset2(*args, size=100): data[arg] = np.random.randn(size, 5) return DataSet(data=data) + def set_rng_seed(seed): np.random.seed(seed) + def prepare_env(): def prepare_fake_dataset(): mean = np.array([-3, -3]) cov = np.array([[1, 0], [0, 1]]) class_A = np.random.multivariate_normal(mean, cov, size=(1000,)) - + mean = np.array([3, 3]) cov = np.array([[1, 0], [0, 1]]) class_B = np.random.multivariate_normal(mean, cov, size=(1000,)) - + data_set = DataSet([Instance(x=[float(item[0]), float(item[1])], y=[0.0]) for item in class_A] + [Instance(x=[float(item[0]), float(item[1])], y=[1.0]) for item in class_B]) return data_set - + data_set = prepare_fake_dataset() data_set.set_input("x") data_set.set_target("y") model = NaiveClassifier(2, 1) return data_set, model + class TestDistTrainer(unittest.TestCase): save_path = './save_cp' - + def run1(self): # test distributed training print('local rank', get_local_rank()) @@ -68,9 +74,9 @@ class TestDistTrainer(unittest.TestCase): data_set = prepare_fake_dataset() data_set.set_input("x", flag=True) data_set.set_target("y", flag=True) - + model = NaiveClassifier(2, 2) - + trainer = DistTrainer( model=model, train_data=data_set, optimizer=SGD(lr=0.1), loss=CrossEntropyLoss(pred="predict", target="y"), @@ -82,7 +88,7 @@ class TestDistTrainer(unittest.TestCase): """ if trainer.is_master and os.path.exists(self.save_path): shutil.rmtree(self.save_path) - + def run2(self): # test fp16 with distributed training print('local rank', get_local_rank()) @@ -90,9 +96,9 @@ class TestDistTrainer(unittest.TestCase): data_set = prepare_fake_dataset() data_set.set_input("x", flag=True) data_set.set_target("y", flag=True) - + model = NaiveClassifier(2, 2) - + trainer = DistTrainer( model=model, train_data=data_set, optimizer=SGD(lr=0.1), loss=CrossEntropyLoss(pred="predict", target="y"), @@ -105,7 +111,7 @@ class TestDistTrainer(unittest.TestCase): """ if trainer.is_master and os.path.exists(self.save_path): shutil.rmtree(self.save_path) - + def run3(self): set_rng_seed(100) data_set, model = prepare_env() @@ -117,15 +123,15 @@ class TestDistTrainer(unittest.TestCase): callbacks_master=[EchoCallback('callbacks_master')] ) trainer.train() - + def run4(self): set_rng_seed(100) data_set, model = prepare_env() - + train_set, dev_set = data_set.split(0.3) - + model = NaiveClassifier(2, 1) - + trainer = DistTrainer( train_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"), @@ -138,7 +144,7 @@ class TestDistTrainer(unittest.TestCase): """ if trainer.is_master and os.path.exists(self.save_path): shutil.rmtree(self.save_path) - + def run_dist(self, run_id): if torch.cuda.is_available(): ngpu = min(2, torch.cuda.device_count()) @@ -147,23 +153,24 @@ class TestDistTrainer(unittest.TestCase): '--nproc_per_node', str(ngpu), path, '--test', str(run_id)] print(' '.join(cmd)) subprocess.check_call(cmd) - + def test_normal_run(self): self.run_dist(1) - + def no_test_fp16(self): self.run_dist(2) - + def test_callback(self): self.run_dist(3) - + def test_dev_data(self): self.run_dist(4) + if __name__ == '__main__': runner = TestDistTrainer() parser = ArgumentParser() parser.add_argument('--test', type=int) args, _ = parser.parse_known_args() - if args.test and hasattr(runner, 'run%s'%args.test): - getattr(runner, 'run%s'%args.test)() + if args.test and hasattr(runner, 'run%s' % args.test): + getattr(runner, 'run%s' % args.test)()