From 929abc395307b4ed835388f52a9810a8f0cd5dd8 Mon Sep 17 00:00:00 2001
From: YWMditto <ditto@YWMdittodeMacBook-Pro.local>
Date: Sat, 9 Apr 2022 15:28:13 +0800
Subject: [PATCH] =?UTF-8?q?=E5=8A=A0=E5=85=A5=E4=BA=86=20test=5Flogger.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fastNLP/envs/set_env_on_import.py             |   2 +-
 .../_test_distributed_launch_torch_1.py       |   4 +-
 .../_test_distributed_launch_torch_2.py       |   2 +-
 .../test_trainer_wo_evaluator_torch.py        |  14 +-
 tests/core/log/test_logger.py                 | 300 ++++++++++++++++++
 tests/core/samplers/test_sampler.py           |   7 -
 6 files changed, 310 insertions(+), 19 deletions(-)

diff --git a/fastNLP/envs/set_env_on_import.py b/fastNLP/envs/set_env_on_import.py
index db978bae..773c1e22 100644
--- a/fastNLP/envs/set_env_on_import.py
+++ b/fastNLP/envs/set_env_on_import.py
@@ -15,7 +15,7 @@ def remove_local_rank_in_argv():
     """
     index = -1
     for i, v in enumerate(sys.argv):
-        if v.startswith('--rank='):
+        if v.startswith('--local_rank='):
             os.environ['LOCAL_RANK'] = v.split('=')[1]
             index = i
             break
diff --git a/tests/core/controllers/_test_distributed_launch_torch_1.py b/tests/core/controllers/_test_distributed_launch_torch_1.py
index fb37c8d5..56261922 100644
--- a/tests/core/controllers/_test_distributed_launch_torch_1.py
+++ b/tests/core/controllers/_test_distributed_launch_torch_1.py
@@ -6,7 +6,7 @@ python -m torch.distributed.launch --nproc_per_node 2 tests/core/controllers/_te
 
 import argparse
 import os
-os.environ["CUDA_VISIBLE_DEVICES"] = "4,5"
+os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"
 
 import sys
 path = os.path.abspath(__file__)
@@ -101,7 +101,7 @@ def _test_trainer_torch_with_evaluator_fp16_accumulation_steps(
     )
 
     trainer.run()
-    dist.barrier()
+    # dist.barrier()
 
 
 if __name__ == "__main__":
diff --git a/tests/core/controllers/_test_distributed_launch_torch_2.py b/tests/core/controllers/_test_distributed_launch_torch_2.py
index ad42672a..13d88248 100644
--- a/tests/core/controllers/_test_distributed_launch_torch_2.py
+++ b/tests/core/controllers/_test_distributed_launch_torch_2.py
@@ -6,7 +6,7 @@ python -m torch.distributed.launch --nproc_per_node 2 tests/core/controllers/_te
 
 import argparse
 import os
-os.environ["CUDA_VISIBLE_DEVICES"] = "4,5"
+os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"
 
 import sys
 path = os.path.abspath(__file__)
diff --git a/tests/core/controllers/test_trainer_wo_evaluator_torch.py b/tests/core/controllers/test_trainer_wo_evaluator_torch.py
index f8058fc9..0a280a0c 100644
--- a/tests/core/controllers/test_trainer_wo_evaluator_torch.py
+++ b/tests/core/controllers/test_trainer_wo_evaluator_torch.py
@@ -77,15 +77,14 @@ def model_and_optimizers(request):
 
 # 测试一下 cpu；
 @pytest.mark.parametrize("driver,device", [("torch", "cpu")])
-@pytest.mark.parametrize("callbacks", [[RecordLossCallback(loss_threshold=0.1)]])
 @magic_argv_env_context
 def test_trainer_torch_without_evaluator(
         model_and_optimizers: TrainerParameters,
         driver,
         device,
-        callbacks,
         n_epochs=10,
 ):
+    callbacks = [RecordLossCallback(loss_threshold=0.1)]
     trainer = Trainer(
         model=model_and_optimizers.model,
         driver=driver,
@@ -108,8 +107,7 @@ def test_trainer_torch_without_evaluator(
         dist.destroy_process_group()
 
 
-@pytest.mark.parametrize("driver,device", [("torch", 4), ("torch", [4, 5])])  # ("torch", 4),
-@pytest.mark.parametrize("callbacks", [[RecordLossCallback(loss_threshold=0.1)]])
+@pytest.mark.parametrize("driver,device", [("torch", 1), ("torch", [1, 2])])  # ("torch", 4),
 @pytest.mark.parametrize("fp16", [False, True])
 @pytest.mark.parametrize("accumulation_steps", [1, 3])
 @magic_argv_env_context
@@ -117,11 +115,11 @@ def test_trainer_torch_without_evaluator_fp16_accumulation_steps(
         model_and_optimizers: TrainerParameters,
         driver,
         device,
-        callbacks,
         fp16,
         accumulation_steps,
         n_epochs=10,
 ):
+    callbacks = [RecordLossCallback(loss_threshold=0.1)]
     trainer = Trainer(
         model=model_and_optimizers.model,
         driver=driver,
@@ -148,7 +146,7 @@ def test_trainer_torch_without_evaluator_fp16_accumulation_steps(
 
 
 # 测试 accumulation_steps；
-@pytest.mark.parametrize("driver,device", [("torch", "cpu"), ("torch", 4), ("torch", [4, 5])])
+@pytest.mark.parametrize("driver,device", [("torch", "cpu"), ("torch", 1), ("torch", [1, 2])])
 @pytest.mark.parametrize("accumulation_steps", [1, 3])
 @magic_argv_env_context
 def test_trainer_torch_without_evaluator_accumulation_steps(
@@ -181,7 +179,7 @@ def test_trainer_torch_without_evaluator_accumulation_steps(
         dist.destroy_process_group()
 
 
-@pytest.mark.parametrize("driver,device", [("torch", [6, 7])])
+@pytest.mark.parametrize("driver,device", [("torch", [1, 2])])
 @pytest.mark.parametrize("output_from_new_proc", ["all", "ignore", "only_error", "test_log"])
 @magic_argv_env_context
 def test_trainer_output_from_new_proc(
@@ -244,7 +242,7 @@ def test_trainer_output_from_new_proc(
         synchronize_safe_rm(path)
 
 
-@pytest.mark.parametrize("driver,device", [("torch", [4, 5])])
+@pytest.mark.parametrize("driver,device", [("torch", [1, 2])])
 @pytest.mark.parametrize("cur_rank", [0])  # 依次测试如果是当前进程出现错误，是否能够正确地 kill 掉其他进程；  , 1, 2, 3
 @magic_argv_env_context
 def test_trainer_on_exception(
diff --git a/tests/core/log/test_logger.py b/tests/core/log/test_logger.py
index e69de29b..da9b7b6b 100644
--- a/tests/core/log/test_logger.py
+++ b/tests/core/log/test_logger.py
@@ -0,0 +1,300 @@
+import os
+import tempfile
+import datetime
+from pathlib import Path
+import logging
+import re
+
+from fastNLP.envs.env import FASTNLP_LAUNCH_TIME
+from tests.helpers.utils import magic_argv_env_context
+from fastNLP.core import synchronize_safe_rm
+
+
+# 测试 TorchDDPDriver；
+@magic_argv_env_context
+def test_add_file_ddp_1():
+    """
+    测试 path 是一个文件的地址，但是这个文件所在的文件夹存在；
+
+    多卡时根据时间创造文件名字有一个很大的 bug，就是不同的进程启动之间是有时差的，因此会导致他们各自输出到单独的 log 文件中；
+    """
+    import torch
+    import torch.distributed as dist
+
+    from fastNLP.core.log.logger import logger
+    from fastNLP.core.drivers.torch_driver.ddp import TorchDDPDriver
+    from tests.helpers.models.torch_model import TorchNormalModel_Classification_1
+
+    model = TorchNormalModel_Classification_1(num_labels=3, feature_dimension=10)
+
+    driver = TorchDDPDriver(
+        model=model,
+        parallel_device=[torch.device("cuda:0"), torch.device("cuda:1")],
+        output_from_new_proc="all"
+    )
+    driver.setup()
+    msg = 'some test log msg'
+
+    path = Path.cwd()
+    filepath = path.joinpath('log.txt')
+    handler = logger.add_file(filepath, mode="w")
+    logger.info(msg)
+    logger.warning(f"\nrank {driver.get_local_rank()} should have this message!\n")
+
+    for h in logger.handlers:
+        if isinstance(h, logging.FileHandler):
+            h.flush()
+    dist.barrier()
+    with open(filepath, 'r') as f:
+        line = ''.join([l for l in f])
+    assert msg in line
+    assert f"\nrank {driver.get_local_rank()} should have this message!\n" in line
+
+    pattern = re.compile(msg)
+    assert len(pattern.findall(line)) == 1
+
+    synchronize_safe_rm(filepath)
+    dist.barrier()
+    dist.destroy_process_group()
+    logger.removeHandler(handler)
+
+
+@magic_argv_env_context
+def test_add_file_ddp_2():
+    """
+    测试 path 是一个文件的地址，但是这个文件所在的文件夹不存在；
+    """
+
+    import torch
+    import torch.distributed as dist
+
+    from fastNLP.core.log.logger import logger
+    from fastNLP.core.drivers.torch_driver.ddp import TorchDDPDriver
+    from tests.helpers.models.torch_model import TorchNormalModel_Classification_1
+
+    model = TorchNormalModel_Classification_1(num_labels=3, feature_dimension=10)
+
+    driver = TorchDDPDriver(
+        model=model,
+        parallel_device=[torch.device("cuda:0"), torch.device("cuda:1")],
+        output_from_new_proc="all"
+    )
+    driver.setup()
+
+    msg = 'some test log msg'
+
+    origin_path = Path.cwd()
+    try:
+        path = origin_path.joinpath("not_existed")
+        filepath = path.joinpath('log.txt')
+        handler = logger.add_file(filepath)
+        logger.info(msg)
+        logger.warning(f"\nrank {driver.get_local_rank()} should have this message!\n")
+        for h in logger.handlers:
+            if isinstance(h, logging.FileHandler):
+                h.flush()
+        dist.barrier()
+        with open(filepath, 'r') as f:
+            line = ''.join([l for l in f])
+
+        assert msg in line
+        assert f"\nrank {driver.get_local_rank()} should have this message!\n" in line
+        pattern = re.compile(msg)
+        assert len(pattern.findall(line)) == 1
+    finally:
+        synchronize_safe_rm(path)
+        logger.removeHandler(handler)
+
+    dist.barrier()
+    dist.destroy_process_group()
+
+
+@magic_argv_env_context
+def test_add_file_ddp_3():
+    """
+    path = None;
+
+    多卡时根据时间创造文件名字有一个很大的 bug，就是不同的进程启动之间是有时差的，因此会导致他们各自输出到单独的 log 文件中；
+    """
+    import torch
+    import torch.distributed as dist
+
+    from fastNLP.core.log.logger import logger
+    from fastNLP.core.drivers.torch_driver.ddp import TorchDDPDriver
+    from tests.helpers.models.torch_model import TorchNormalModel_Classification_1
+
+    model = TorchNormalModel_Classification_1(num_labels=3, feature_dimension=10)
+
+    driver = TorchDDPDriver(
+        model=model,
+        parallel_device=[torch.device("cuda:0"), torch.device("cuda:1")],
+        output_from_new_proc="all"
+    )
+    driver.setup()
+    msg = 'some test log msg'
+
+    handler = logger.add_file()
+    logger.info(msg)
+    logger.warning(f"\nrank {driver.get_local_rank()} should have this message!\n")
+
+    for h in logger.handlers:
+        if isinstance(h, logging.FileHandler):
+            h.flush()
+    dist.barrier()
+    file = Path.cwd().joinpath(os.environ.get(FASTNLP_LAUNCH_TIME)+".log")
+    with open(file, 'r') as f:
+        line = ''.join([l for l in f])
+
+    # print(f"\nrank: {driver.get_local_rank()} line, {line}\n")
+    assert msg in line
+    assert f"\nrank {driver.get_local_rank()} should have this message!\n" in line
+
+    pattern = re.compile(msg)
+    assert len(pattern.findall(line)) == 1
+
+    synchronize_safe_rm(file)
+    dist.barrier()
+    dist.destroy_process_group()
+    logger.removeHandler(handler)
+
+@magic_argv_env_context
+def test_add_file_ddp_4():
+    """
+    测试 path 是文件夹；
+    """
+
+    import torch
+    import torch.distributed as dist
+
+    from fastNLP.core.log.logger import logger
+    from fastNLP.core.drivers.torch_driver.ddp import TorchDDPDriver
+    from tests.helpers.models.torch_model import TorchNormalModel_Classification_1
+
+    model = TorchNormalModel_Classification_1(num_labels=3, feature_dimension=10)
+
+    driver = TorchDDPDriver(
+        model=model,
+        parallel_device=[torch.device("cuda:0"), torch.device("cuda:1")],
+        output_from_new_proc="all"
+    )
+    driver.setup()
+    msg = 'some test log msg'
+
+    path = Path.cwd().joinpath("not_existed")
+    try:
+        handler = logger.add_file(path)
+        logger.info(msg)
+        logger.warning(f"\nrank {driver.get_local_rank()} should have this message!\n")
+
+        for h in logger.handlers:
+            if isinstance(h, logging.FileHandler):
+                h.flush()
+        dist.barrier()
+
+        file = path.joinpath(os.environ.get(FASTNLP_LAUNCH_TIME) + ".log")
+        with open(file, 'r') as f:
+            line = ''.join([l for l in f])
+        assert msg in line
+        assert f"\nrank {driver.get_local_rank()} should have this message!\n" in line
+        pattern = re.compile(msg)
+        assert len(pattern.findall(line)) == 1
+    finally:
+        synchronize_safe_rm(path)
+        logger.removeHandler(handler)
+
+    dist.barrier()
+    dist.destroy_process_group()
+
+
+class TestLogger:
+    msg = 'some test log msg'
+
+    def test_add_file_1(self):
+        """
+        测试 path 是一个文件的地址，但是这个文件所在的文件夹存在；
+        """
+        from fastNLP.core.log.logger import logger
+
+        path = Path(tempfile.mkdtemp())
+        try:
+            filepath = path.joinpath('log.txt')
+            handler = logger.add_file(filepath)
+            logger.info(self.msg)
+            with open(filepath, 'r') as f:
+                line = ''.join([l for l in f])
+            assert self.msg in line
+        finally:
+            synchronize_safe_rm(path)
+            logger.removeHandler(handler)
+
+    def test_add_file_2(self):
+        """
+        测试 path 是一个文件的地址，但是这个文件所在的文件夹不存在；
+        """
+        from fastNLP.core.log.logger import logger
+
+        origin_path = Path(tempfile.mkdtemp())
+
+        try:
+            path = origin_path.joinpath("not_existed")
+            path = path.joinpath('log.txt')
+            handler = logger.add_file(path)
+            logger.info(self.msg)
+            with open(path, 'r') as f:
+                line = ''.join([l for l in f])
+            assert self.msg in line
+        finally:
+            synchronize_safe_rm(origin_path)
+            logger.removeHandler(handler)
+
+    def test_add_file_3(self):
+        """
+        测试 path 是 None；
+        """
+        from fastNLP.core.log.logger import logger
+
+        handler = logger.add_file()
+        logger.info(self.msg)
+
+        path = Path.cwd()
+        cur_datetime = str(datetime.datetime.now().strftime('%Y-%m-%d'))
+        for file in path.iterdir():
+            if file.name.startswith(cur_datetime):
+                with open(file, 'r') as f:
+                    line = ''.join([l for l in f])
+                assert self.msg in line
+                file.unlink()
+        logger.removeHandler(handler)
+
+    def test_add_file_4(self):
+        """
+        测试 path 是文件夹；
+        """
+        from fastNLP.core.log.logger import logger
+
+        path = Path(tempfile.mkdtemp())
+        try:
+            handler = logger.add_file(path)
+            logger.info(self.msg)
+
+            cur_datetime = str(datetime.datetime.now().strftime('%Y-%m-%d'))
+            for file in path.iterdir():
+                if file.name.startswith(cur_datetime):
+                    with open(file, 'r') as f:
+                        line = ''.join([l for l in f])
+                    assert self.msg in line
+        finally:
+            synchronize_safe_rm(path)
+            logger.removeHandler(handler)
+
+    def test_stdout(self, capsys):
+        from fastNLP.core.log.logger import logger
+
+        handler = logger.set_stdout(stdout="raw")
+        logger.info(self.msg)
+        logger.debug('aabbc')
+        captured = capsys.readouterr()
+        assert "some test log msg\n" == captured.out
+
+        logger.removeHandler(handler)
+
diff --git a/tests/core/samplers/test_sampler.py b/tests/core/samplers/test_sampler.py
index 61e28dac..63d8e860 100644
--- a/tests/core/samplers/test_sampler.py
+++ b/tests/core/samplers/test_sampler.py
@@ -10,13 +10,6 @@ from fastNLP.core.drivers.torch_driver.utils import replace_batch_sampler
 from tests.helpers.datasets.torch_data import TorchNormalDataset
 
 
-
-
-
-
-
-
-
 class SamplerTest(unittest.TestCase):
 
     def test_sequentialsampler(self):