Browse Source

加入了 test_logger.py

tags/v1.0.0alpha
YWMditto 2 years ago
parent
commit
929abc3953
6 changed files with 310 additions and 19 deletions
  1. +1
    -1
      fastNLP/envs/set_env_on_import.py
  2. +2
    -2
      tests/core/controllers/_test_distributed_launch_torch_1.py
  3. +1
    -1
      tests/core/controllers/_test_distributed_launch_torch_2.py
  4. +6
    -8
      tests/core/controllers/test_trainer_wo_evaluator_torch.py
  5. +300
    -0
      tests/core/log/test_logger.py
  6. +0
    -7
      tests/core/samplers/test_sampler.py

+ 1
- 1
fastNLP/envs/set_env_on_import.py View File

@@ -15,7 +15,7 @@ def remove_local_rank_in_argv():
"""
index = -1
for i, v in enumerate(sys.argv):
if v.startswith('--rank='):
if v.startswith('--local_rank='):
os.environ['LOCAL_RANK'] = v.split('=')[1]
index = i
break


+ 2
- 2
tests/core/controllers/_test_distributed_launch_torch_1.py View File

@@ -6,7 +6,7 @@ python -m torch.distributed.launch --nproc_per_node 2 tests/core/controllers/_te

import argparse
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5"
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"

import sys
path = os.path.abspath(__file__)
@@ -101,7 +101,7 @@ def _test_trainer_torch_with_evaluator_fp16_accumulation_steps(
)

trainer.run()
dist.barrier()
# dist.barrier()


if __name__ == "__main__":


+ 1
- 1
tests/core/controllers/_test_distributed_launch_torch_2.py View File

@@ -6,7 +6,7 @@ python -m torch.distributed.launch --nproc_per_node 2 tests/core/controllers/_te

import argparse
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5"
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"

import sys
path = os.path.abspath(__file__)


+ 6
- 8
tests/core/controllers/test_trainer_wo_evaluator_torch.py View File

@@ -77,15 +77,14 @@ def model_and_optimizers(request):

# 测试一下 cpu;
@pytest.mark.parametrize("driver,device", [("torch", "cpu")])
@pytest.mark.parametrize("callbacks", [[RecordLossCallback(loss_threshold=0.1)]])
@magic_argv_env_context
def test_trainer_torch_without_evaluator(
model_and_optimizers: TrainerParameters,
driver,
device,
callbacks,
n_epochs=10,
):
callbacks = [RecordLossCallback(loss_threshold=0.1)]
trainer = Trainer(
model=model_and_optimizers.model,
driver=driver,
@@ -108,8 +107,7 @@ def test_trainer_torch_without_evaluator(
dist.destroy_process_group()


@pytest.mark.parametrize("driver,device", [("torch", 4), ("torch", [4, 5])]) # ("torch", 4),
@pytest.mark.parametrize("callbacks", [[RecordLossCallback(loss_threshold=0.1)]])
@pytest.mark.parametrize("driver,device", [("torch", 1), ("torch", [1, 2])]) # ("torch", 4),
@pytest.mark.parametrize("fp16", [False, True])
@pytest.mark.parametrize("accumulation_steps", [1, 3])
@magic_argv_env_context
@@ -117,11 +115,11 @@ def test_trainer_torch_without_evaluator_fp16_accumulation_steps(
model_and_optimizers: TrainerParameters,
driver,
device,
callbacks,
fp16,
accumulation_steps,
n_epochs=10,
):
callbacks = [RecordLossCallback(loss_threshold=0.1)]
trainer = Trainer(
model=model_and_optimizers.model,
driver=driver,
@@ -148,7 +146,7 @@ def test_trainer_torch_without_evaluator_fp16_accumulation_steps(


# 测试 accumulation_steps;
@pytest.mark.parametrize("driver,device", [("torch", "cpu"), ("torch", 4), ("torch", [4, 5])])
@pytest.mark.parametrize("driver,device", [("torch", "cpu"), ("torch", 1), ("torch", [1, 2])])
@pytest.mark.parametrize("accumulation_steps", [1, 3])
@magic_argv_env_context
def test_trainer_torch_without_evaluator_accumulation_steps(
@@ -181,7 +179,7 @@ def test_trainer_torch_without_evaluator_accumulation_steps(
dist.destroy_process_group()


@pytest.mark.parametrize("driver,device", [("torch", [6, 7])])
@pytest.mark.parametrize("driver,device", [("torch", [1, 2])])
@pytest.mark.parametrize("output_from_new_proc", ["all", "ignore", "only_error", "test_log"])
@magic_argv_env_context
def test_trainer_output_from_new_proc(
@@ -244,7 +242,7 @@ def test_trainer_output_from_new_proc(
synchronize_safe_rm(path)


@pytest.mark.parametrize("driver,device", [("torch", [4, 5])])
@pytest.mark.parametrize("driver,device", [("torch", [1, 2])])
@pytest.mark.parametrize("cur_rank", [0]) # 依次测试如果是当前进程出现错误,是否能够正确地 kill 掉其他进程; , 1, 2, 3
@magic_argv_env_context
def test_trainer_on_exception(


+ 300
- 0
tests/core/log/test_logger.py View File

@@ -0,0 +1,300 @@
import os
import tempfile
import datetime
from pathlib import Path
import logging
import re

from fastNLP.envs.env import FASTNLP_LAUNCH_TIME
from tests.helpers.utils import magic_argv_env_context
from fastNLP.core import synchronize_safe_rm


# 测试 TorchDDPDriver;
@magic_argv_env_context
def test_add_file_ddp_1():
"""
测试 path 是一个文件的地址,但是这个文件所在的文件夹存在;

多卡时根据时间创造文件名字有一个很大的 bug,就是不同的进程启动之间是有时差的,因此会导致他们各自输出到单独的 log 文件中;
"""
import torch
import torch.distributed as dist

from fastNLP.core.log.logger import logger
from fastNLP.core.drivers.torch_driver.ddp import TorchDDPDriver
from tests.helpers.models.torch_model import TorchNormalModel_Classification_1

model = TorchNormalModel_Classification_1(num_labels=3, feature_dimension=10)

driver = TorchDDPDriver(
model=model,
parallel_device=[torch.device("cuda:0"), torch.device("cuda:1")],
output_from_new_proc="all"
)
driver.setup()
msg = 'some test log msg'

path = Path.cwd()
filepath = path.joinpath('log.txt')
handler = logger.add_file(filepath, mode="w")
logger.info(msg)
logger.warning(f"\nrank {driver.get_local_rank()} should have this message!\n")

for h in logger.handlers:
if isinstance(h, logging.FileHandler):
h.flush()
dist.barrier()
with open(filepath, 'r') as f:
line = ''.join([l for l in f])
assert msg in line
assert f"\nrank {driver.get_local_rank()} should have this message!\n" in line

pattern = re.compile(msg)
assert len(pattern.findall(line)) == 1

synchronize_safe_rm(filepath)
dist.barrier()
dist.destroy_process_group()
logger.removeHandler(handler)


@magic_argv_env_context
def test_add_file_ddp_2():
"""
测试 path 是一个文件的地址,但是这个文件所在的文件夹不存在;
"""

import torch
import torch.distributed as dist

from fastNLP.core.log.logger import logger
from fastNLP.core.drivers.torch_driver.ddp import TorchDDPDriver
from tests.helpers.models.torch_model import TorchNormalModel_Classification_1

model = TorchNormalModel_Classification_1(num_labels=3, feature_dimension=10)

driver = TorchDDPDriver(
model=model,
parallel_device=[torch.device("cuda:0"), torch.device("cuda:1")],
output_from_new_proc="all"
)
driver.setup()

msg = 'some test log msg'

origin_path = Path.cwd()
try:
path = origin_path.joinpath("not_existed")
filepath = path.joinpath('log.txt')
handler = logger.add_file(filepath)
logger.info(msg)
logger.warning(f"\nrank {driver.get_local_rank()} should have this message!\n")
for h in logger.handlers:
if isinstance(h, logging.FileHandler):
h.flush()
dist.barrier()
with open(filepath, 'r') as f:
line = ''.join([l for l in f])

assert msg in line
assert f"\nrank {driver.get_local_rank()} should have this message!\n" in line
pattern = re.compile(msg)
assert len(pattern.findall(line)) == 1
finally:
synchronize_safe_rm(path)
logger.removeHandler(handler)

dist.barrier()
dist.destroy_process_group()


@magic_argv_env_context
def test_add_file_ddp_3():
"""
path = None;

多卡时根据时间创造文件名字有一个很大的 bug,就是不同的进程启动之间是有时差的,因此会导致他们各自输出到单独的 log 文件中;
"""
import torch
import torch.distributed as dist

from fastNLP.core.log.logger import logger
from fastNLP.core.drivers.torch_driver.ddp import TorchDDPDriver
from tests.helpers.models.torch_model import TorchNormalModel_Classification_1

model = TorchNormalModel_Classification_1(num_labels=3, feature_dimension=10)

driver = TorchDDPDriver(
model=model,
parallel_device=[torch.device("cuda:0"), torch.device("cuda:1")],
output_from_new_proc="all"
)
driver.setup()
msg = 'some test log msg'

handler = logger.add_file()
logger.info(msg)
logger.warning(f"\nrank {driver.get_local_rank()} should have this message!\n")

for h in logger.handlers:
if isinstance(h, logging.FileHandler):
h.flush()
dist.barrier()
file = Path.cwd().joinpath(os.environ.get(FASTNLP_LAUNCH_TIME)+".log")
with open(file, 'r') as f:
line = ''.join([l for l in f])

# print(f"\nrank: {driver.get_local_rank()} line, {line}\n")
assert msg in line
assert f"\nrank {driver.get_local_rank()} should have this message!\n" in line

pattern = re.compile(msg)
assert len(pattern.findall(line)) == 1

synchronize_safe_rm(file)
dist.barrier()
dist.destroy_process_group()
logger.removeHandler(handler)

@magic_argv_env_context
def test_add_file_ddp_4():
"""
测试 path 是文件夹;
"""

import torch
import torch.distributed as dist

from fastNLP.core.log.logger import logger
from fastNLP.core.drivers.torch_driver.ddp import TorchDDPDriver
from tests.helpers.models.torch_model import TorchNormalModel_Classification_1

model = TorchNormalModel_Classification_1(num_labels=3, feature_dimension=10)

driver = TorchDDPDriver(
model=model,
parallel_device=[torch.device("cuda:0"), torch.device("cuda:1")],
output_from_new_proc="all"
)
driver.setup()
msg = 'some test log msg'

path = Path.cwd().joinpath("not_existed")
try:
handler = logger.add_file(path)
logger.info(msg)
logger.warning(f"\nrank {driver.get_local_rank()} should have this message!\n")

for h in logger.handlers:
if isinstance(h, logging.FileHandler):
h.flush()
dist.barrier()

file = path.joinpath(os.environ.get(FASTNLP_LAUNCH_TIME) + ".log")
with open(file, 'r') as f:
line = ''.join([l for l in f])
assert msg in line
assert f"\nrank {driver.get_local_rank()} should have this message!\n" in line
pattern = re.compile(msg)
assert len(pattern.findall(line)) == 1
finally:
synchronize_safe_rm(path)
logger.removeHandler(handler)

dist.barrier()
dist.destroy_process_group()


class TestLogger:
msg = 'some test log msg'

def test_add_file_1(self):
"""
测试 path 是一个文件的地址,但是这个文件所在的文件夹存在;
"""
from fastNLP.core.log.logger import logger

path = Path(tempfile.mkdtemp())
try:
filepath = path.joinpath('log.txt')
handler = logger.add_file(filepath)
logger.info(self.msg)
with open(filepath, 'r') as f:
line = ''.join([l for l in f])
assert self.msg in line
finally:
synchronize_safe_rm(path)
logger.removeHandler(handler)

def test_add_file_2(self):
"""
测试 path 是一个文件的地址,但是这个文件所在的文件夹不存在;
"""
from fastNLP.core.log.logger import logger

origin_path = Path(tempfile.mkdtemp())

try:
path = origin_path.joinpath("not_existed")
path = path.joinpath('log.txt')
handler = logger.add_file(path)
logger.info(self.msg)
with open(path, 'r') as f:
line = ''.join([l for l in f])
assert self.msg in line
finally:
synchronize_safe_rm(origin_path)
logger.removeHandler(handler)

def test_add_file_3(self):
"""
测试 path 是 None;
"""
from fastNLP.core.log.logger import logger

handler = logger.add_file()
logger.info(self.msg)

path = Path.cwd()
cur_datetime = str(datetime.datetime.now().strftime('%Y-%m-%d'))
for file in path.iterdir():
if file.name.startswith(cur_datetime):
with open(file, 'r') as f:
line = ''.join([l for l in f])
assert self.msg in line
file.unlink()
logger.removeHandler(handler)

def test_add_file_4(self):
"""
测试 path 是文件夹;
"""
from fastNLP.core.log.logger import logger

path = Path(tempfile.mkdtemp())
try:
handler = logger.add_file(path)
logger.info(self.msg)

cur_datetime = str(datetime.datetime.now().strftime('%Y-%m-%d'))
for file in path.iterdir():
if file.name.startswith(cur_datetime):
with open(file, 'r') as f:
line = ''.join([l for l in f])
assert self.msg in line
finally:
synchronize_safe_rm(path)
logger.removeHandler(handler)

def test_stdout(self, capsys):
from fastNLP.core.log.logger import logger

handler = logger.set_stdout(stdout="raw")
logger.info(self.msg)
logger.debug('aabbc')
captured = capsys.readouterr()
assert "some test log msg\n" == captured.out

logger.removeHandler(handler)


+ 0
- 7
tests/core/samplers/test_sampler.py View File

@@ -10,13 +10,6 @@ from fastNLP.core.drivers.torch_driver.utils import replace_batch_sampler
from tests.helpers.datasets.torch_data import TorchNormalDataset









class SamplerTest(unittest.TestCase):

def test_sequentialsampler(self):


Loading…
Cancel
Save