diff --git a/.gitattributes b/.gitattributes index 60ff0dd2..1a3015ec 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4,4 +4,6 @@ *.wav filter=lfs diff=lfs merge=lfs -text *.JPEG filter=lfs diff=lfs merge=lfs -text *.jpeg filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text *.avi filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text diff --git a/data/test/regression/fill_mask_bert_zh.bin b/data/test/regression/fill_mask_bert_zh.bin new file mode 100644 index 00000000..17c28b81 --- /dev/null +++ b/data/test/regression/fill_mask_bert_zh.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541183383bb06aa3ca2c44a68cd51c1be5e3e984a1dee2c58092b9552660f3ce +size 61883 diff --git a/data/test/regression/fill_mask_sbert_en.bin b/data/test/regression/fill_mask_sbert_en.bin new file mode 100644 index 00000000..09aaf300 --- /dev/null +++ b/data/test/regression/fill_mask_sbert_en.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0afcd9d2aa5ac9569114203bd9db4f1a520c903a88fd4854370cdde0e7eab7 +size 119940 diff --git a/data/test/regression/fill_mask_sbert_zh.bin b/data/test/regression/fill_mask_sbert_zh.bin new file mode 100644 index 00000000..812f7ba2 --- /dev/null +++ b/data/test/regression/fill_mask_sbert_zh.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd6fa6b23c2fdaf876606a767d9b64b1924e1acddfc06ac42db73ba86083280 +size 119940 diff --git a/data/test/regression/fill_mask_veco_en.bin b/data/test/regression/fill_mask_veco_en.bin new file mode 100644 index 00000000..be3fddc8 --- /dev/null +++ b/data/test/regression/fill_mask_veco_en.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d37672a0e299a08d2daf5c7fc29bfce96bb15701fe5e5e68f068861ac2ee705 +size 119619 diff --git a/data/test/regression/fill_mask_veco_zh.bin b/data/test/regression/fill_mask_veco_zh.bin new file mode 100644 index 00000000..c0d27e20 --- /dev/null +++ b/data/test/regression/fill_mask_veco_zh.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c692e0753cfe349e520511427727a8252f141fa10e85f9a61562845e8d731f9a +size 119619 diff --git a/data/test/regression/sbert_nli.bin b/data/test/regression/sbert_nli.bin new file mode 100644 index 00000000..a5f680bb --- /dev/null +++ b/data/test/regression/sbert_nli.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44e3925c15d86d8596baeb6bd1d153d86f57b7489798b2cf988a1248e110fd62 +size 62231 diff --git a/data/test/regression/sbert_sen_sim.bin b/data/test/regression/sbert_sen_sim.bin new file mode 100644 index 00000000..a59cbe0b --- /dev/null +++ b/data/test/regression/sbert_sen_sim.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff17a0272752de4c88d4254b2e881f97f8ef022f03609d03ee1de0ae964368a +size 62235 diff --git a/data/test/regression/sbert_ws_en.bin b/data/test/regression/sbert_ws_en.bin new file mode 100644 index 00000000..4eb562d6 --- /dev/null +++ b/data/test/regression/sbert_ws_en.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9103ce2bc89212f67fb49ce70783b7667e376900d0f70fb8f5c4432eb74bc572 +size 60801 diff --git a/data/test/regression/sbert_ws_zh.bin b/data/test/regression/sbert_ws_zh.bin new file mode 100644 index 00000000..555f640d --- /dev/null +++ b/data/test/regression/sbert_ws_zh.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d4dee34c7e83b77db04fb2f0d1200bfd37c7c24954c58e185da5cb96445975c +size 60801 diff --git a/data/test/regression/sbert_zero_shot.bin b/data/test/regression/sbert_zero_shot.bin new file mode 100644 index 00000000..23d40946 --- /dev/null +++ b/data/test/regression/sbert_zero_shot.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3ecc2c30d382641d561f84849b199c12bb1a9418e8099a191153f6f5275a85 +size 61589 diff --git a/modelscope/utils/regress_test_utils.py b/modelscope/utils/regress_test_utils.py new file mode 100644 index 00000000..ca50d579 --- /dev/null +++ b/modelscope/utils/regress_test_utils.py @@ -0,0 +1,703 @@ +import contextlib +import hashlib +import os +import pickle +import random +import shutil +import tempfile +from collections.abc import Mapping +from pathlib import Path +from types import FunctionType +from typing import Any, Dict, Union + +import json +import numpy as np +import torch.optim +from torch import nn + + +class RegressTool: + """This class is used to stop inference/training results from changing by some unaware affections by unittests. + + Firstly, run a baseline test to create a result file, then changes can be observed between + the latest version and the baseline file. + """ + + def __init__(self, + baseline: bool = None, + store_func: FunctionType = None, + load_func: FunctionType = None): + """A func to store the baseline file and a func to load the baseline file. + """ + self.baseline = baseline + self.store_func = store_func + self.load_func = load_func + print(f'Current working dir is: {Path.cwd()}') + + def store(self, local, remote): + if self.store_func is not None: + self.store_func(local, remote) + else: + path = os.path.abspath( + os.path.join(Path.cwd(), 'data', 'test', 'regression')) + os.makedirs(path, exist_ok=True) + shutil.copy(local, os.path.join(path, remote)) + + def load(self, local, remote): + if self.load_func is not None: + self.load_func(local, remote) + else: + path = os.path.abspath( + os.path.join(Path.cwd(), 'data', 'test', 'regression')) + baseline = os.path.join(path, remote) + if not os.path.exists(baseline): + raise ValueError(f'base line file {baseline} not exist') + print( + f'local file found:{baseline}, md5:{hashlib.md5(open(baseline,"rb").read()).hexdigest()}' + ) + if os.path.exists(local): + os.remove(local) + os.symlink(baseline, local, target_is_directory=False) + + @contextlib.contextmanager + def monitor_module_single_forward(self, + module: nn.Module, + file_name: str, + compare_fn=None): + """Monitor a pytorch module in a single forward. + + @param module: A torch module + @param file_name: The file_name to store or load file + @param compare_fn: A custom fn used to compare the results manually. + + >>> def compare_fn(v1, v2, key, type): + >>> return None + + v1 is the baseline value + v2 is the value of current version + key is the key of submodules + type is in one of 'input', 'output' + """ + baseline = os.getenv('REGRESSION_BASELINE') + if baseline is None or self.baseline is None: + yield + return + + baseline = self.baseline + io_json = {} + absolute_path = f'./{file_name}.bin' + if not isinstance(module, nn.Module): + assert hasattr(module, 'model') + module = module.model + + hack_forward(module, file_name, io_json) + intercept_module(module, io_json) + yield + hack_forward(module, None, None, restore=True) + intercept_module(module, None, restore=True) + if baseline: + with open(absolute_path, 'wb') as f: + pickle.dump(io_json, f) + self.store(absolute_path, f'{file_name}.bin') + os.remove(absolute_path) + else: + name = os.path.basename(absolute_path) + baseline = os.path.join(tempfile.gettempdir(), name) + self.load(baseline, name) + with open(baseline, 'rb') as f: + baseline_json = pickle.load(f) + + class NumpyEncoder(json.JSONEncoder): + """Special json encoder for numpy types + """ + + def default(self, obj): + if isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + return json.JSONEncoder.default(self, obj) + + print(f'baseline: {json.dumps(baseline_json, cls=NumpyEncoder)}') + print(f'latest : {json.dumps(io_json, cls=NumpyEncoder)}') + if not compare_io_and_print(baseline_json, io_json, compare_fn): + raise ValueError('Result not match!') + + @contextlib.contextmanager + def monitor_module_train(self, + trainer: Union[Dict, Any], + file_name, + level='config', + compare_fn=None, + ignore_keys=None, + compare_random=True, + lazy_stop_callback=None): + """Monitor a pytorch module's backward data and cfg data within a step of the optimizer. + + This is usually useful when you try to change some dangerous code + which has the risk of affecting the training loop. + + @param trainer: A dict or an object contains the model/optimizer/lr_scheduler + @param file_name: The file_name to store or load file + @param level: The regression level. + 'strict' for matching every single tensor. + Please make sure the parameters of head are fixed + and the drop-out rate is zero. + 'config' for matching the initial config, like cfg file, optimizer param_groups, + lr_scheduler params and the random seed. + 'metric' for compare the best metrics in the evaluation loop. + @param compare_fn: A custom fn used to compare the results manually. + @param ignore_keys: The keys to ignore of the named_parameters. + @param compare_random: If to compare random setttings, default True. + @param lazy_stop_callback: A callback passed in, when the moniting is over, this callback will be called. + + >>> def compare_fn(v1, v2, key, type): + >>> return None + + v1 is the baseline value + v2 is the value of current version + key is the key of modules/parameters + type is in one of 'input', 'output', 'backward', 'optimizer', 'lr_scheduler', 'cfg', 'state' + """ + baseline = os.getenv('REGRESSION_BASELINE') + if baseline is None or self.baseline is None: + yield + return + + baseline = self.baseline + + io_json = {} + bw_json = {} + absolute_path = f'./{file_name}.bin' + + if level == 'strict': + print( + "[Important] The level of regression is 'strict', please make sure your model's parameters are " + 'fixed and all drop-out rates have been set to zero.') + + assert hasattr( + trainer, 'model') or 'model' in trainer, 'model must be in trainer' + module = trainer['model'] if isinstance(trainer, + dict) else trainer.model + if not isinstance(module, nn.Module): + assert hasattr(module, 'model') + module = module.model + + assert hasattr( + trainer, 'optimizer' + ) or 'optimizer' in trainer, 'optimizer must be in trainer' + assert hasattr( + trainer, 'lr_scheduler' + ) or 'lr_scheduler' in trainer, 'lr_scheduler must be in trainer' + optimizer: torch.optim.Optimizer = trainer['optimizer'] if isinstance( + trainer, dict) else trainer.optimizer + lr_scheduler: torch.optim.lr_scheduler._LRScheduler = trainer['lr_scheduler'] if isinstance(trainer, dict) \ + else trainer.lr_scheduler + torch_state = numpify_tensor_nested(torch.get_rng_state()) + np_state = np.random.get_state() + random_seed = random.getstate() + seed = trainer._seed if hasattr( + trainer, + '_seed') else trainer.seed if hasattr(trainer, 'seed') else None + + if level == 'strict': + hack_forward(module, file_name, io_json) + intercept_module(module, io_json) + hack_backward( + module, optimizer, bw_json, lazy_stop_callback=lazy_stop_callback) + yield + hack_backward(module, optimizer, None, restore=True) + if level == 'strict': + hack_forward(module, None, None, restore=True) + intercept_module(module, None, restore=True) + + optimizer_dict = optimizer.state_dict() + optimizer_dict.pop('state', None) + summary = { + 'forward': io_json, + 'backward': bw_json, + 'optimizer': { + 'type': optimizer.__class__.__name__, + 'defaults': optimizer.defaults, + 'state_dict': optimizer_dict + }, + 'lr_scheduler': { + 'type': lr_scheduler.__class__.__name__, + 'state_dict': lr_scheduler.state_dict() + }, + 'cfg': trainer.cfg.to_dict() if hasattr(trainer, 'cfg') else None, + 'state': { + 'torch_state': torch_state, + 'np_state': np_state, + 'random_seed': random_seed, + 'seed': seed, + } + } + + if baseline: + with open(absolute_path, 'wb') as f: + pickle.dump(summary, f) + self.store(absolute_path, f'{file_name}.bin') + os.remove(absolute_path) + else: + name = os.path.basename(absolute_path) + baseline = os.path.join(tempfile.gettempdir(), name) + self.load(baseline, name) + with open(baseline, 'rb') as f: + baseline_json = pickle.load(f) + + if level == 'strict' and not compare_io_and_print( + baseline_json['forward'], io_json, compare_fn): + raise RuntimeError('Forward not match!') + if not compare_backward_and_print( + baseline_json['backward'], + bw_json, + compare_fn=compare_fn, + ignore_keys=ignore_keys, + level=level): + raise RuntimeError('Backward not match!') + cfg_opt1 = { + 'optimizer': baseline_json['optimizer'], + 'lr_scheduler': baseline_json['lr_scheduler'], + 'cfg': baseline_json['cfg'], + 'state': None if not compare_random else baseline_json['state'] + } + cfg_opt2 = { + 'optimizer': summary['optimizer'], + 'lr_scheduler': summary['lr_scheduler'], + 'cfg': summary['cfg'], + 'state': None if not compare_random else summary['state'] + } + if not compare_cfg_and_optimizers(cfg_opt1, cfg_opt2, compare_fn): + raise RuntimeError('Cfg or optimizers not match!') + + +class MsRegressTool(RegressTool): + + class EarlyStopError(Exception): + pass + + @contextlib.contextmanager + def monitor_ms_train(self, + trainer, + file_name, + level='config', + compare_fn=None, + ignore_keys=None): + + def lazy_stop_callback(): + + from modelscope.trainers.hooks.hook import Hook, Priority + + class EarlyStopHook(Hook): + PRIORITY = Priority.VERY_LOW + + def after_iter(self, trainer): + raise MsRegressTool.EarlyStopError('Test finished.') + + trainer.register_hook(EarlyStopHook()) + + def _train_loop(trainer, *args, **kwargs): + with self.monitor_module_train( + trainer, + file_name, + level, + compare_fn=compare_fn, + ignore_keys=ignore_keys, + lazy_stop_callback=lazy_stop_callback): + try: + return trainer.train_loop_origin(*args, **kwargs) + except MsRegressTool.EarlyStopError: + pass + + trainer.train_loop_origin, trainer.train_loop = \ + trainer.train_loop, type(trainer.train_loop)(_train_loop, trainer) + yield + + +def compare_module(module1: nn.Module, module2: nn.Module): + for p1, p2 in zip(module1.parameters(), module2.parameters()): + if p1.data.ne(p2.data).sum() > 0: + return False + return True + + +def numpify_tensor_nested(tensors, reduction=None, clip_value=10000): + import torch + "Numpify `tensors` (even if it's a nested list/tuple of tensors)." + if isinstance(tensors, (list, tuple)): + return type(tensors)( + numpify_tensor_nested(t, reduction, clip_value) for t in tensors) + if isinstance(tensors, Mapping): + return type(tensors)({ + k: numpify_tensor_nested(t, reduction, clip_value) + for k, t in tensors.items() + }) + if isinstance(tensors, torch.Tensor): + t: np.ndarray = tensors.cpu().numpy() + if clip_value is not None: + t = np.where(t > clip_value, clip_value, t) + t = np.where(t < -clip_value, -clip_value, t) + if reduction == 'sum': + return t.sum(dtype=np.float) + elif reduction == 'mean': + return t.mean(dtype=np.float) + return t + return tensors + + +def detach_tensor_nested(tensors): + import torch + "Detach `tensors` (even if it's a nested list/tuple of tensors)." + if isinstance(tensors, (list, tuple)): + return type(tensors)(detach_tensor_nested(t) for t in tensors) + if isinstance(tensors, Mapping): + return type(tensors)( + {k: detach_tensor_nested(t) + for k, t in tensors.items()}) + if isinstance(tensors, torch.Tensor): + return tensors.detach() + return tensors + + +def hack_forward(module: nn.Module, + name, + io_json, + restore=False, + keep_tensors=False): + + def _forward(self, *args, **kwargs): + ret = self.forward_origin(*args, **kwargs) + if keep_tensors: + args = numpify_tensor_nested(detach_tensor_nested(args)) + kwargs = numpify_tensor_nested(detach_tensor_nested(kwargs)) + output = numpify_tensor_nested(detach_tensor_nested(ret)) + else: + args = { + 'sum': + numpify_tensor_nested( + detach_tensor_nested(args), reduction='sum'), + 'mean': + numpify_tensor_nested( + detach_tensor_nested(args), reduction='mean'), + } + kwargs = { + 'sum': + numpify_tensor_nested( + detach_tensor_nested(kwargs), reduction='sum'), + 'mean': + numpify_tensor_nested( + detach_tensor_nested(kwargs), reduction='mean'), + } + output = { + 'sum': + numpify_tensor_nested( + detach_tensor_nested(ret), reduction='sum'), + 'mean': + numpify_tensor_nested( + detach_tensor_nested(ret), reduction='mean'), + } + + io_json[name] = { + 'input': { + 'args': args, + 'kwargs': kwargs, + }, + 'output': output, + } + return ret + + if not restore and not hasattr(module, 'forward_origin'): + module.forward_origin, module.forward = module.forward, type( + module.forward)(_forward, module) + if restore and hasattr(module, 'forward_origin'): + module.forward = module.forward_origin + del module.forward_origin + + +def hack_backward(module: nn.Module, + optimizer, + io_json, + restore=False, + lazy_stop_callback=None): + + def _step(self, *args, **kwargs): + for name, param in module.named_parameters(): + io_json[name] = { + 'data': { + 'sum': + numpify_tensor_nested( + detach_tensor_nested(param.data), reduction='sum'), + 'mean': + numpify_tensor_nested( + detach_tensor_nested(param.data), reduction='mean'), + }, + 'grad': { + 'sum': + numpify_tensor_nested( + detach_tensor_nested(param.grad), reduction='sum'), + 'mean': + numpify_tensor_nested( + detach_tensor_nested(param.grad), reduction='mean'), + } + } + ret = self.step_origin(*args, **kwargs) + for name, param in module.named_parameters(): + io_json[name]['data_after'] = { + 'sum': + numpify_tensor_nested( + detach_tensor_nested(param.data), reduction='sum'), + 'mean': + numpify_tensor_nested( + detach_tensor_nested(param.data), reduction='mean'), + } + if lazy_stop_callback is not None: + lazy_stop_callback() + return ret + + if not restore and not hasattr(optimizer, 'step_origin'): + optimizer.step_origin, optimizer.step = optimizer.step, type( + optimizer.state_dict)(_step, optimizer) + if restore and hasattr(optimizer, 'step_origin'): + optimizer.step = optimizer.step_origin + del optimizer.step_origin + + +def intercept_module(module: nn.Module, + io_json, + parent_name=None, + restore=False): + for name, module in module.named_children(): + full_name = parent_name + '.' + name if parent_name is not None else name + hack_forward(module, full_name, io_json, restore) + intercept_module(module, io_json, full_name, restore) + + +def compare_arguments_nested(print_content, arg1, arg2): + type1 = type(arg1) + type2 = type(arg2) + if type1.__name__ != type2.__name__: + if print_content is not None: + print( + f'{print_content}, type not equal:{type1.__name__} and {type2.__name__}' + ) + return False + + if arg1 is None: + return True + elif isinstance(arg1, (int, str, bool, np.bool, np.integer, np.str)): + if arg1 != arg2: + if print_content is not None: + print(f'{print_content}, arg1:{arg1}, arg2:{arg2}') + return False + return True + elif isinstance(arg1, (float, np.floating)): + if not np.isclose(arg1, arg2, rtol=1.e-3, atol=1.e-8, equal_nan=True): + if print_content is not None: + print(f'{print_content}, arg1:{arg1}, arg2:{arg2}') + return False + return True + elif isinstance(arg1, (tuple, list)): + if len(arg1) != len(arg2): + if print_content is not None: + print( + f'{print_content}, length is not equal:{len(arg1)}, {len(arg2)}' + ) + return False + if not all([ + compare_arguments_nested(None, sub_arg1, sub_arg2) + for sub_arg1, sub_arg2 in zip(arg1, arg2) + ]): + if print_content is not None: + print(f'{print_content}') + return False + return True + elif isinstance(arg1, Mapping): + keys1 = arg1.keys() + keys2 = arg2.keys() + if len(keys1) != len(keys2): + if print_content is not None: + print( + f'{print_content}, key length is not equal:{len(keys1)}, {len(keys2)}' + ) + return False + if len(set(keys1) - set(keys2)) > 0: + if print_content is not None: + print(f'{print_content}, key diff:{set(keys1) - set(keys2)}') + return False + if not all([ + compare_arguments_nested(None, arg1[key], arg2[key]) + for key in keys1 + ]): + if print_content is not None: + print(f'{print_content}') + return False + return True + elif isinstance(arg1, np.ndarray): + arg1 = np.where(np.equal(arg1, None), np.NaN, + arg1).astype(dtype=np.float) + arg2 = np.where(np.equal(arg2, None), np.NaN, + arg2).astype(dtype=np.float) + if not all( + np.isclose(arg1, arg2, rtol=1.e-3, atol=1.e-8, + equal_nan=True).flatten()): + if print_content is not None: + print(f'{print_content}') + return False + return True + else: + raise ValueError(f'type not supported: {type1}') + + +def compare_io_and_print(baseline_json, io_json, compare_fn=None): + if compare_fn is None: + + def compare_fn(*args, **kwargs): + return None + + keys1 = set(baseline_json.keys()) + keys2 = set(io_json.keys()) + added = keys1 - keys2 + removed = keys2 - keys1 + print(f'unmatched keys: {added}, {removed}') + shared_keys = keys1.intersection(keys2) + match = True + for key in shared_keys: + v1 = baseline_json[key] + v2 = io_json[key] + + v1input = numpify_tensor_nested(v1['input']) + v2input = numpify_tensor_nested(v2['input']) + res = compare_fn(v1input, v2input, key, 'input') + if res is not None: + print( + f'input of {key} compared with user compare_fn with result:{res}\n' + ) + match = match and res + else: + match = compare_arguments_nested( + f'unmatched module {key} input args', v1input['args'], + v2input['args']) and match + match = compare_arguments_nested( + f'unmatched module {key} input kwargs', v1input['kwargs'], + v2input['kwargs']) and match + v1output = numpify_tensor_nested(v1['output']) + v2output = numpify_tensor_nested(v2['output']) + res = compare_fn(v1output, v2output, key, 'output') + if res is not None: + print( + f'output of {key} compared with user compare_fn with result:{res}\n' + ) + match = match and res + else: + match = compare_arguments_nested(f'unmatched module {key} outputs', + v1output, v2output) and match + return match + + +def compare_backward_and_print(baseline_json, + bw_json, + level, + ignore_keys=None, + compare_fn=None): + if compare_fn is None: + + def compare_fn(*args, **kwargs): + return None + + keys1 = set(baseline_json.keys()) + keys2 = set(bw_json.keys()) + added = keys1 - keys2 + removed = keys2 - keys1 + print(f'unmatched backward keys: {added}, {removed}') + shared_keys = keys1.intersection(keys2) + match = True + for key in shared_keys: + if ignore_keys is not None and key in ignore_keys: + continue + + res = compare_fn(baseline_json[key], bw_json[key], key, 'backward') + if res is not None: + print(f'backward data of {key} compared with ' + f'user compare_fn with result:{res}\n') + match = match and res + else: + data1, grad1, data_after1 = baseline_json[key][ + 'data'], baseline_json[key]['grad'], baseline_json[key][ + 'data_after'] + data2, grad2, data_after2 = bw_json[key]['data'], bw_json[key][ + 'grad'], bw_json[key]['data_after'] + match = compare_arguments_nested( + f'unmatched module {key} tensor data', data1, data2) and match + if level == 'strict': + match = compare_arguments_nested( + f'unmatched module {key} grad data', grad1, + grad2) and match + match = compare_arguments_nested( + f'unmatched module {key} data after step', data_after1, + data_after2) and match + return match + + +def compare_cfg_and_optimizers(baseline_json, cfg_json, compare_fn=None): + if compare_fn is None: + + def compare_fn(*args, **kwargs): + return None + + optimizer1, lr_scheduler1, cfg1, state1 = baseline_json[ + 'optimizer'], baseline_json['lr_scheduler'], baseline_json[ + 'cfg'], baseline_json['state'] + optimizer2, lr_scheduler2, cfg2, state2 = cfg_json['optimizer'], cfg_json[ + 'lr_scheduler'], cfg_json['cfg'], baseline_json['state'] + + match = True + res = compare_fn(optimizer1, optimizer2, None, 'optimizer') + if res is not None: + print(f'optimizer compared with user compare_fn with result:{res}\n') + match = match and res + else: + if optimizer1['type'] != optimizer2['type']: + print( + f"Optimizer type not equal:{optimizer1['type']} and {optimizer2['type']}" + ) + match = compare_arguments_nested('unmatched optimizer defaults', + optimizer1['defaults'], + optimizer2['defaults']) and match + match = compare_arguments_nested('unmatched optimizer state_dict', + optimizer1['state_dict'], + optimizer2['state_dict']) and match + + res = compare_fn(lr_scheduler1, lr_scheduler2, None, 'lr_scheduler') + if res is not None: + print( + f'lr_scheduler compared with user compare_fn with result:{res}\n') + match = match and res + else: + if lr_scheduler1['type'] != lr_scheduler2['type']: + print( + f"Optimizer type not equal:{lr_scheduler1['type']} and {lr_scheduler2['type']}" + ) + match = compare_arguments_nested('unmatched lr_scheduler state_dict', + lr_scheduler1['state_dict'], + lr_scheduler2['state_dict']) and match + + res = compare_fn(cfg1, cfg2, None, 'cfg') + if res is not None: + print(f'cfg compared with user compare_fn with result:{res}\n') + match = match and res + else: + match = compare_arguments_nested('unmatched cfg', cfg1, cfg2) and match + + res = compare_fn(state1, state2, None, 'state') + if res is not None: + print( + f'random state compared with user compare_fn with result:{res}\n') + match = match and res + else: + match = compare_arguments_nested('unmatched random state', state1, + state2) and match + + return match diff --git a/tests/pipelines/test_fill_mask.py b/tests/pipelines/test_fill_mask.py index 2f57b2d8..1b709e27 100644 --- a/tests/pipelines/test_fill_mask.py +++ b/tests/pipelines/test_fill_mask.py @@ -9,6 +9,7 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import FillMaskPipeline from modelscope.preprocessors import FillMaskPreprocessor from modelscope.utils.constant import Tasks +from modelscope.utils.regress_test_utils import MsRegressTool from modelscope.utils.test_utils import test_level @@ -37,6 +38,7 @@ class FillMaskTest(unittest.TestCase): 'Everything in [MASK] you call reality is really [MASK] a reflection of your ' '[MASK]. Your [MASK] universe is just a mirror [MASK] of your story.' } + regress_tool = MsRegressTool(baseline=False) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_by_direct_model_download(self): @@ -98,9 +100,11 @@ class FillMaskTest(unittest.TestCase): second_sequence=None) pipeline_ins = pipeline( task=Tasks.fill_mask, model=model, preprocessor=preprocessor) - print( - f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: ' - f'{pipeline_ins(self.test_inputs[language])}\n') + with self.regress_tool.monitor_module_single_forward( + pipeline_ins.model, f'fill_mask_sbert_{language}'): + print( + f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: ' + f'{pipeline_ins(self.test_inputs[language])}\n') # veco model = Model.from_pretrained(self.model_id_veco) @@ -111,8 +115,11 @@ class FillMaskTest(unittest.TestCase): for language in ['zh', 'en']: ori_text = self.ori_texts[language] test_input = self.test_inputs[language].replace('[MASK]', '') - print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' - f'{pipeline_ins(test_input)}\n') + with self.regress_tool.monitor_module_single_forward( + pipeline_ins.model, f'fill_mask_veco_{language}'): + print( + f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' + f'{pipeline_ins(test_input)}\n') # zh bert model = Model.from_pretrained(self.model_id_bert) @@ -123,8 +130,10 @@ class FillMaskTest(unittest.TestCase): language = 'zh' ori_text = self.ori_texts[language] test_input = self.test_inputs[language] - print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' - f'{pipeline_ins(test_input)}\n') + with self.regress_tool.monitor_module_single_forward( + pipeline_ins.model, 'fill_mask_bert_zh'): + print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' + f'{pipeline_ins(test_input)}\n') @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_name(self): diff --git a/tests/pipelines/test_nli.py b/tests/pipelines/test_nli.py index 1e259a2e..1d3fba12 100644 --- a/tests/pipelines/test_nli.py +++ b/tests/pipelines/test_nli.py @@ -8,6 +8,7 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import PairSentenceClassificationPipeline from modelscope.preprocessors import PairSentenceClassificationPreprocessor from modelscope.utils.constant import Tasks +from modelscope.utils.regress_test_utils import MsRegressTool from modelscope.utils.test_utils import test_level @@ -15,6 +16,7 @@ class NLITest(unittest.TestCase): model_id = 'damo/nlp_structbert_nli_chinese-base' sentence1 = '四川商务职业学院和四川财经职业学院哪个好?' sentence2 = '四川商务职业学院商务管理在哪个校区?' + regress_tool = MsRegressTool(baseline=False) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_direct_file_download(self): @@ -26,7 +28,6 @@ class NLITest(unittest.TestCase): pipeline2 = pipeline(Tasks.nli, model=model, preprocessor=tokenizer) print(f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n' f'pipeline1:{pipeline1(input=(self.sentence1, self.sentence2))}') - print() print( f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n' f'pipeline1: {pipeline2(input=(self.sentence1, self.sentence2))}') @@ -42,7 +43,9 @@ class NLITest(unittest.TestCase): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_name(self): pipeline_ins = pipeline(task=Tasks.nli, model=self.model_id) - print(pipeline_ins(input=(self.sentence1, self.sentence2))) + with self.regress_tool.monitor_module_single_forward( + pipeline_ins.model, 'sbert_nli'): + print(pipeline_ins(input=(self.sentence1, self.sentence2))) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): diff --git a/tests/pipelines/test_sentence_similarity.py b/tests/pipelines/test_sentence_similarity.py index d39f6783..6990bf75 100644 --- a/tests/pipelines/test_sentence_similarity.py +++ b/tests/pipelines/test_sentence_similarity.py @@ -8,6 +8,7 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import PairSentenceClassificationPipeline from modelscope.preprocessors import PairSentenceClassificationPreprocessor from modelscope.utils.constant import Tasks +from modelscope.utils.regress_test_utils import MsRegressTool from modelscope.utils.test_utils import test_level @@ -15,6 +16,7 @@ class SentenceSimilarityTest(unittest.TestCase): model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base' sentence1 = '今天气温比昨天高么?' sentence2 = '今天湿度比昨天高么?' + regress_tool = MsRegressTool(baseline=False) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run(self): @@ -47,7 +49,9 @@ class SentenceSimilarityTest(unittest.TestCase): def test_run_with_model_name(self): pipeline_ins = pipeline( task=Tasks.sentence_similarity, model=self.model_id) - print(pipeline_ins(input=(self.sentence1, self.sentence2))) + with self.regress_tool.monitor_module_single_forward( + pipeline_ins.model, 'sbert_sen_sim'): + print(pipeline_ins(input=(self.sentence1, self.sentence2))) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): diff --git a/tests/pipelines/test_sentiment_classification.py b/tests/pipelines/test_sentiment_classification.py index f3bc6981..35c96282 100644 --- a/tests/pipelines/test_sentiment_classification.py +++ b/tests/pipelines/test_sentiment_classification.py @@ -30,7 +30,6 @@ class SentimentClassificationTaskModelTest(unittest.TestCase): preprocessor=tokenizer) print(f'sentence1: {self.sentence1}\n' f'pipeline1:{pipeline1(input=self.sentence1)}') - print() print(f'sentence1: {self.sentence1}\n' f'pipeline1: {pipeline2(input=self.sentence1)}') diff --git a/tests/pipelines/test_word_segmentation.py b/tests/pipelines/test_word_segmentation.py index c332d987..87006f96 100644 --- a/tests/pipelines/test_word_segmentation.py +++ b/tests/pipelines/test_word_segmentation.py @@ -9,6 +9,7 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import WordSegmentationPipeline from modelscope.preprocessors import TokenClassificationPreprocessor from modelscope.utils.constant import Tasks +from modelscope.utils.regress_test_utils import MsRegressTool from modelscope.utils.test_utils import test_level @@ -16,6 +17,7 @@ class WordSegmentationTest(unittest.TestCase): model_id = 'damo/nlp_structbert_word-segmentation_chinese-base' sentence = '今天天气不错,适合出去游玩' sentence_eng = 'I am a program.' + regress_tool = MsRegressTool(baseline=False) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_by_direct_model_download(self): @@ -27,7 +29,6 @@ class WordSegmentationTest(unittest.TestCase): Tasks.word_segmentation, model=model, preprocessor=tokenizer) print(f'sentence: {self.sentence}\n' f'pipeline1:{pipeline1(input=self.sentence)}') - print() print(f'pipeline2: {pipeline2(input=self.sentence)}') @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') @@ -42,8 +43,12 @@ class WordSegmentationTest(unittest.TestCase): def test_run_with_model_name(self): pipeline_ins = pipeline( task=Tasks.word_segmentation, model=self.model_id) - print(pipeline_ins(input=self.sentence)) - print(pipeline_ins(input=self.sentence_eng)) + with self.regress_tool.monitor_module_single_forward( + pipeline_ins.model, 'sbert_ws_zh'): + print(pipeline_ins(input=self.sentence)) + with self.regress_tool.monitor_module_single_forward( + pipeline_ins.model, 'sbert_ws_en'): + print(pipeline_ins(input=self.sentence_eng)) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): diff --git a/tests/pipelines/test_zero_shot_classification.py b/tests/pipelines/test_zero_shot_classification.py index 7620a0ed..f0f2a481 100644 --- a/tests/pipelines/test_zero_shot_classification.py +++ b/tests/pipelines/test_zero_shot_classification.py @@ -8,6 +8,7 @@ from modelscope.pipelines import pipeline from modelscope.pipelines.nlp import ZeroShotClassificationPipeline from modelscope.preprocessors import ZeroShotClassificationPreprocessor from modelscope.utils.constant import Tasks +from modelscope.utils.regress_test_utils import MsRegressTool from modelscope.utils.test_utils import test_level @@ -16,6 +17,7 @@ class ZeroShotClassificationTest(unittest.TestCase): sentence = '全新突破 解放军运20版空中加油机曝光' labels = ['文化', '体育', '娱乐', '财经', '家居', '汽车', '教育', '科技', '军事'] template = '这篇文章的标题是{}' + regress_tool = MsRegressTool(baseline=False) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_direct_file_download(self): @@ -33,7 +35,6 @@ class ZeroShotClassificationTest(unittest.TestCase): f'sentence: {self.sentence}\n' f'pipeline1:{pipeline1(input=self.sentence,candidate_labels=self.labels)}' ) - print() print( f'sentence: {self.sentence}\n' f'pipeline2: {pipeline2(self.sentence,candidate_labels=self.labels,hypothesis_template=self.template)}' @@ -53,7 +54,11 @@ class ZeroShotClassificationTest(unittest.TestCase): def test_run_with_model_name(self): pipeline_ins = pipeline( task=Tasks.zero_shot_classification, model=self.model_id) - print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) + with self.regress_tool.monitor_module_single_forward( + pipeline_ins.model, 'sbert_zero_shot'): + print( + pipeline_ins( + input=self.sentence, candidate_labels=self.labels)) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): diff --git a/tests/run.py b/tests/run.py index 1a601eda..79509745 100644 --- a/tests/run.py +++ b/tests/run.py @@ -334,6 +334,7 @@ if __name__ == '__main__': help='Save result to directory, internal use only') args = parser.parse_args() set_test_level(args.level) + os.environ['REGRESSION_BASELINE'] = '1' logger.info(f'TEST LEVEL: {test_level()}') if not args.disable_profile: from utils import profiler