|
@@ -1,33 +1,36 @@ |
|
|
|
|
|
import os |
|
|
|
|
|
import shutil |
|
|
|
|
|
import subprocess |
|
|
import unittest |
|
|
import unittest |
|
|
|
|
|
from argparse import ArgumentParser |
|
|
|
|
|
|
|
|
import numpy as np |
|
|
import numpy as np |
|
|
import torch.cuda |
|
|
import torch.cuda |
|
|
|
|
|
|
|
|
|
|
|
from fastNLP import AccuracyMetric |
|
|
|
|
|
from fastNLP import CrossEntropyLoss, BCELoss |
|
|
from fastNLP import DataSet |
|
|
from fastNLP import DataSet |
|
|
from fastNLP import Instance |
|
|
from fastNLP import Instance |
|
|
from fastNLP import CrossEntropyLoss, BCELoss |
|
|
|
|
|
from fastNLP import SGD |
|
|
from fastNLP import SGD |
|
|
|
|
|
from fastNLP.core.callback import EchoCallback |
|
|
from fastNLP.core.dist_trainer import DistTrainer, get_local_rank |
|
|
from fastNLP.core.dist_trainer import DistTrainer, get_local_rank |
|
|
from fastNLP.models.base_model import NaiveClassifier |
|
|
from fastNLP.models.base_model import NaiveClassifier |
|
|
import shutil |
|
|
|
|
|
import os |
|
|
|
|
|
import subprocess |
|
|
|
|
|
from argparse import ArgumentParser |
|
|
|
|
|
from fastNLP.core.callback import EchoCallback |
|
|
|
|
|
from fastNLP import AccuracyMetric |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_fake_dataset(): |
|
|
def prepare_fake_dataset(): |
|
|
mean = np.array([-3, -3]) |
|
|
mean = np.array([-3, -3]) |
|
|
cov = np.array([[1, 0], [0, 1]]) |
|
|
cov = np.array([[1, 0], [0, 1]]) |
|
|
class_A = np.random.multivariate_normal(mean, cov, size=(1000,)) |
|
|
class_A = np.random.multivariate_normal(mean, cov, size=(1000,)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mean = np.array([3, 3]) |
|
|
mean = np.array([3, 3]) |
|
|
cov = np.array([[1, 0], [0, 1]]) |
|
|
cov = np.array([[1, 0], [0, 1]]) |
|
|
class_B = np.random.multivariate_normal(mean, cov, size=(1000,)) |
|
|
class_B = np.random.multivariate_normal(mean, cov, size=(1000,)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data_set = DataSet([Instance(x=[float(item[0]), float(item[1])], y=0) for item in class_A] + |
|
|
data_set = DataSet([Instance(x=[float(item[0]), float(item[1])], y=0) for item in class_A] + |
|
|
[Instance(x=[float(item[0]), float(item[1])], y=1) for item in class_B]) |
|
|
[Instance(x=[float(item[0]), float(item[1])], y=1) for item in class_B]) |
|
|
return data_set |
|
|
return data_set |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_fake_dataset2(*args, size=100): |
|
|
def prepare_fake_dataset2(*args, size=100): |
|
|
ys = np.random.randint(4, size=100, dtype=np.int64) |
|
|
ys = np.random.randint(4, size=100, dtype=np.int64) |
|
|
data = {'y': ys} |
|
|
data = {'y': ys} |
|
@@ -35,32 +38,35 @@ def prepare_fake_dataset2(*args, size=100): |
|
|
data[arg] = np.random.randn(size, 5) |
|
|
data[arg] = np.random.randn(size, 5) |
|
|
return DataSet(data=data) |
|
|
return DataSet(data=data) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_rng_seed(seed): |
|
|
def set_rng_seed(seed): |
|
|
np.random.seed(seed) |
|
|
np.random.seed(seed) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_env(): |
|
|
def prepare_env(): |
|
|
def prepare_fake_dataset(): |
|
|
def prepare_fake_dataset(): |
|
|
mean = np.array([-3, -3]) |
|
|
mean = np.array([-3, -3]) |
|
|
cov = np.array([[1, 0], [0, 1]]) |
|
|
cov = np.array([[1, 0], [0, 1]]) |
|
|
class_A = np.random.multivariate_normal(mean, cov, size=(1000,)) |
|
|
class_A = np.random.multivariate_normal(mean, cov, size=(1000,)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mean = np.array([3, 3]) |
|
|
mean = np.array([3, 3]) |
|
|
cov = np.array([[1, 0], [0, 1]]) |
|
|
cov = np.array([[1, 0], [0, 1]]) |
|
|
class_B = np.random.multivariate_normal(mean, cov, size=(1000,)) |
|
|
class_B = np.random.multivariate_normal(mean, cov, size=(1000,)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data_set = DataSet([Instance(x=[float(item[0]), float(item[1])], y=[0.0]) for item in class_A] + |
|
|
data_set = DataSet([Instance(x=[float(item[0]), float(item[1])], y=[0.0]) for item in class_A] + |
|
|
[Instance(x=[float(item[0]), float(item[1])], y=[1.0]) for item in class_B]) |
|
|
[Instance(x=[float(item[0]), float(item[1])], y=[1.0]) for item in class_B]) |
|
|
return data_set |
|
|
return data_set |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data_set = prepare_fake_dataset() |
|
|
data_set = prepare_fake_dataset() |
|
|
data_set.set_input("x") |
|
|
data_set.set_input("x") |
|
|
data_set.set_target("y") |
|
|
data_set.set_target("y") |
|
|
model = NaiveClassifier(2, 1) |
|
|
model = NaiveClassifier(2, 1) |
|
|
return data_set, model |
|
|
return data_set, model |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDistTrainer(unittest.TestCase): |
|
|
class TestDistTrainer(unittest.TestCase): |
|
|
save_path = './save_cp' |
|
|
save_path = './save_cp' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run1(self): |
|
|
def run1(self): |
|
|
# test distributed training |
|
|
# test distributed training |
|
|
print('local rank', get_local_rank()) |
|
|
print('local rank', get_local_rank()) |
|
@@ -68,9 +74,9 @@ class TestDistTrainer(unittest.TestCase): |
|
|
data_set = prepare_fake_dataset() |
|
|
data_set = prepare_fake_dataset() |
|
|
data_set.set_input("x", flag=True) |
|
|
data_set.set_input("x", flag=True) |
|
|
data_set.set_target("y", flag=True) |
|
|
data_set.set_target("y", flag=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = NaiveClassifier(2, 2) |
|
|
model = NaiveClassifier(2, 2) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trainer = DistTrainer( |
|
|
trainer = DistTrainer( |
|
|
model=model, train_data=data_set, optimizer=SGD(lr=0.1), |
|
|
model=model, train_data=data_set, optimizer=SGD(lr=0.1), |
|
|
loss=CrossEntropyLoss(pred="predict", target="y"), |
|
|
loss=CrossEntropyLoss(pred="predict", target="y"), |
|
@@ -82,7 +88,7 @@ class TestDistTrainer(unittest.TestCase): |
|
|
""" |
|
|
""" |
|
|
if trainer.is_master and os.path.exists(self.save_path): |
|
|
if trainer.is_master and os.path.exists(self.save_path): |
|
|
shutil.rmtree(self.save_path) |
|
|
shutil.rmtree(self.save_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run2(self): |
|
|
def run2(self): |
|
|
# test fp16 with distributed training |
|
|
# test fp16 with distributed training |
|
|
print('local rank', get_local_rank()) |
|
|
print('local rank', get_local_rank()) |
|
@@ -90,9 +96,9 @@ class TestDistTrainer(unittest.TestCase): |
|
|
data_set = prepare_fake_dataset() |
|
|
data_set = prepare_fake_dataset() |
|
|
data_set.set_input("x", flag=True) |
|
|
data_set.set_input("x", flag=True) |
|
|
data_set.set_target("y", flag=True) |
|
|
data_set.set_target("y", flag=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = NaiveClassifier(2, 2) |
|
|
model = NaiveClassifier(2, 2) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trainer = DistTrainer( |
|
|
trainer = DistTrainer( |
|
|
model=model, train_data=data_set, optimizer=SGD(lr=0.1), |
|
|
model=model, train_data=data_set, optimizer=SGD(lr=0.1), |
|
|
loss=CrossEntropyLoss(pred="predict", target="y"), |
|
|
loss=CrossEntropyLoss(pred="predict", target="y"), |
|
@@ -105,7 +111,7 @@ class TestDistTrainer(unittest.TestCase): |
|
|
""" |
|
|
""" |
|
|
if trainer.is_master and os.path.exists(self.save_path): |
|
|
if trainer.is_master and os.path.exists(self.save_path): |
|
|
shutil.rmtree(self.save_path) |
|
|
shutil.rmtree(self.save_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run3(self): |
|
|
def run3(self): |
|
|
set_rng_seed(100) |
|
|
set_rng_seed(100) |
|
|
data_set, model = prepare_env() |
|
|
data_set, model = prepare_env() |
|
@@ -117,15 +123,15 @@ class TestDistTrainer(unittest.TestCase): |
|
|
callbacks_master=[EchoCallback('callbacks_master')] |
|
|
callbacks_master=[EchoCallback('callbacks_master')] |
|
|
) |
|
|
) |
|
|
trainer.train() |
|
|
trainer.train() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run4(self): |
|
|
def run4(self): |
|
|
set_rng_seed(100) |
|
|
set_rng_seed(100) |
|
|
data_set, model = prepare_env() |
|
|
data_set, model = prepare_env() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train_set, dev_set = data_set.split(0.3) |
|
|
train_set, dev_set = data_set.split(0.3) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = NaiveClassifier(2, 1) |
|
|
model = NaiveClassifier(2, 1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trainer = DistTrainer( |
|
|
trainer = DistTrainer( |
|
|
train_set, model, optimizer=SGD(lr=0.1), |
|
|
train_set, model, optimizer=SGD(lr=0.1), |
|
|
loss=BCELoss(pred="predict", target="y"), |
|
|
loss=BCELoss(pred="predict", target="y"), |
|
@@ -138,7 +144,7 @@ class TestDistTrainer(unittest.TestCase): |
|
|
""" |
|
|
""" |
|
|
if trainer.is_master and os.path.exists(self.save_path): |
|
|
if trainer.is_master and os.path.exists(self.save_path): |
|
|
shutil.rmtree(self.save_path) |
|
|
shutil.rmtree(self.save_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_dist(self, run_id): |
|
|
def run_dist(self, run_id): |
|
|
if torch.cuda.is_available(): |
|
|
if torch.cuda.is_available(): |
|
|
ngpu = min(2, torch.cuda.device_count()) |
|
|
ngpu = min(2, torch.cuda.device_count()) |
|
@@ -147,23 +153,24 @@ class TestDistTrainer(unittest.TestCase): |
|
|
'--nproc_per_node', str(ngpu), path, '--test', str(run_id)] |
|
|
'--nproc_per_node', str(ngpu), path, '--test', str(run_id)] |
|
|
print(' '.join(cmd)) |
|
|
print(' '.join(cmd)) |
|
|
subprocess.check_call(cmd) |
|
|
subprocess.check_call(cmd) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_normal_run(self): |
|
|
def test_normal_run(self): |
|
|
self.run_dist(1) |
|
|
self.run_dist(1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def no_test_fp16(self): |
|
|
def no_test_fp16(self): |
|
|
self.run_dist(2) |
|
|
self.run_dist(2) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_callback(self): |
|
|
def test_callback(self): |
|
|
self.run_dist(3) |
|
|
self.run_dist(3) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_dev_data(self): |
|
|
def test_dev_data(self): |
|
|
self.run_dist(4) |
|
|
self.run_dist(4) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
if __name__ == '__main__': |
|
|
runner = TestDistTrainer() |
|
|
runner = TestDistTrainer() |
|
|
parser = ArgumentParser() |
|
|
parser = ArgumentParser() |
|
|
parser.add_argument('--test', type=int) |
|
|
parser.add_argument('--test', type=int) |
|
|
args, _ = parser.parse_known_args() |
|
|
args, _ = parser.parse_known_args() |
|
|
if args.test and hasattr(runner, 'run%s'%args.test): |
|
|
|
|
|
getattr(runner, 'run%s'%args.test)() |
|
|
|
|
|
|
|
|
if args.test and hasattr(runner, 'run%s' % args.test): |
|
|
|
|
|
getattr(runner, 'run%s' % args.test)() |