@@ -145,11 +145,20 @@ class Trainers(object): | |||
For a model specific Trainer, you can use ${ModelName}-${Task}-trainer. | |||
""" | |||
default = 'Trainer' | |||
default = 'trainer' | |||
# multi-modal tasks | |||
# multi-modal trainers | |||
clip_multi_modal_embedding = 'clip-multi-modal-embedding' | |||
# cv trainers | |||
image_instance_segmentation = 'image-instance-segmentation' | |||
image_portrait_enhancement = 'image-portrait-enhancement' | |||
# nlp trainers | |||
bert_sentiment_analysis = 'bert-sentiment-analysis' | |||
nlp_base_trainer = 'nlp-base-trainer' | |||
nlp_veco_trainer = 'nlp-veco-trainer' | |||
class Preprocessors(object): | |||
""" Names for different preprocessor. | |||
@@ -219,3 +228,52 @@ class Metrics(object): | |||
image_color_enhance_metric = 'image-color-enhance-metric' | |||
# metrics for image-portrait-enhancement task | |||
image_portrait_enhancement_metric = 'image-portrait-enhancement-metric' | |||
class Optimizers(object): | |||
""" Names for different OPTIMIZER. | |||
Holds the standard optimizer name to use for identifying different optimizer. | |||
This should be used to register optimizer. | |||
""" | |||
default = 'optimizer' | |||
SGD = 'SGD' | |||
class Hooks(object): | |||
""" Names for different hooks. | |||
All kinds of hooks are defined here | |||
""" | |||
# lr | |||
LrSchedulerHook = 'LrSchedulerHook' | |||
PlateauLrSchedulerHook = 'PlateauLrSchedulerHook' | |||
NoneLrSchedulerHook = 'NoneLrSchedulerHook' | |||
# optimizer | |||
OptimizerHook = 'OptimizerHook' | |||
TorchAMPOptimizerHook = 'TorchAMPOptimizerHook' | |||
ApexAMPOptimizerHook = 'ApexAMPOptimizerHook' | |||
NoneOptimizerHook = 'NoneOptimizerHook' | |||
# checkpoint | |||
CheckpointHook = 'CheckpointHook' | |||
BestCkptSaverHook = 'BestCkptSaverHook' | |||
# logger | |||
TextLoggerHook = 'TextLoggerHook' | |||
TensorboardHook = 'TensorboardHook' | |||
IterTimerHook = 'IterTimerHook' | |||
EvaluationHook = 'EvaluationHook' | |||
class LR_Schedulers(object): | |||
"""learning rate scheduler is defined here | |||
""" | |||
LinearWarmup = 'LinearWarmup' | |||
ConstantWarmup = 'ConstantWarmup' | |||
ExponentialWarmup = 'ExponentialWarmup' |
@@ -1,8 +1,38 @@ | |||
from .base import DummyTrainer | |||
from .builder import build_trainer | |||
from .cv import (ImageInstanceSegmentationTrainer, | |||
ImagePortraitEnhancementTrainer) | |||
from .multi_modal import CLIPTrainer | |||
from .nlp import SequenceClassificationTrainer | |||
from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer | |||
from .trainer import EpochBasedTrainer | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from typing import TYPE_CHECKING | |||
from modelscope.utils.import_utils import LazyImportModule | |||
if TYPE_CHECKING: | |||
from .base import DummyTrainer | |||
from .builder import build_trainer | |||
from .cv import (ImageInstanceSegmentationTrainer, | |||
ImagePortraitEnhancementTrainer) | |||
from .multi_modal import CLIPTrainer | |||
from .nlp import SequenceClassificationTrainer | |||
from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer | |||
from .trainer import EpochBasedTrainer | |||
else: | |||
_import_structure = { | |||
'base': ['DummyTrainer'], | |||
'builder': ['build_trainer'], | |||
'cv': [ | |||
'ImageInstanceSegmentationTrainer', | |||
'ImagePortraitEnhancementTrainer' | |||
], | |||
'multi_modal': ['CLIPTrainer'], | |||
'nlp': ['SequenceClassificationTrainer'], | |||
'nlp_trainer': ['NlpEpochBasedTrainer', 'VecoTrainer'], | |||
'trainer': ['EpochBasedTrainer'] | |||
} | |||
import sys | |||
sys.modules[__name__] = LazyImportModule( | |||
__name__, | |||
globals()['__file__'], | |||
_import_structure, | |||
module_spec=__spec__, | |||
extra_objects={}, | |||
) |
@@ -1,5 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from modelscope.metainfo import Trainers | |||
from modelscope.utils.config import ConfigDict | |||
from modelscope.utils.constant import Tasks | |||
from modelscope.utils.registry import Registry, build_from_cfg | |||
@@ -8,7 +8,7 @@ TRAINERS = Registry('trainers') | |||
HOOKS = Registry('hooks') | |||
def build_trainer(name: str = 'EpochBasedTrainer', default_args: dict = None): | |||
def build_trainer(name: str = Trainers.default, default_args: dict = None): | |||
""" build trainer given a trainer name | |||
Args: | |||
@@ -1,3 +1,27 @@ | |||
from .image_instance_segmentation_trainer import \ | |||
ImageInstanceSegmentationTrainer | |||
from .image_portrait_enhancement_trainer import ImagePortraitEnhancementTrainer | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from typing import TYPE_CHECKING | |||
from modelscope.utils.import_utils import LazyImportModule | |||
if TYPE_CHECKING: | |||
from .image_instance_segmentation_trainer import \ | |||
ImageInstanceSegmentationTrainer | |||
from .image_portrait_enhancement_trainer import ImagePortraitEnhancementTrainer | |||
else: | |||
_import_structure = { | |||
'image_instance_segmentation_trainer': | |||
['ImageInstanceSegmentationTrainer'], | |||
'image_portrait_enhancement_trainer': | |||
['ImagePortraitEnhancementTrainer'], | |||
} | |||
import sys | |||
sys.modules[__name__] = LazyImportModule( | |||
__name__, | |||
globals()['__file__'], | |||
_import_structure, | |||
module_spec=__spec__, | |||
extra_objects={}, | |||
) |
@@ -1,8 +1,9 @@ | |||
from modelscope.metainfo import Trainers | |||
from modelscope.trainers.builder import TRAINERS | |||
from modelscope.trainers.trainer import EpochBasedTrainer | |||
@TRAINERS.register_module(module_name='image-instance-segmentation') | |||
@TRAINERS.register_module(module_name=Trainers.image_instance_segmentation) | |||
class ImageInstanceSegmentationTrainer(EpochBasedTrainer): | |||
def __init__(self, *args, **kwargs): | |||
@@ -4,6 +4,7 @@ from collections.abc import Mapping | |||
import torch | |||
from torch import distributed as dist | |||
from modelscope.metainfo import Trainers | |||
from modelscope.trainers.builder import TRAINERS | |||
from modelscope.trainers.optimizer.builder import build_optimizer | |||
from modelscope.trainers.trainer import EpochBasedTrainer | |||
@@ -11,7 +12,7 @@ from modelscope.utils.constant import ModeKeys | |||
from modelscope.utils.logger import get_logger | |||
@TRAINERS.register_module(module_name='gpen') | |||
@TRAINERS.register_module(module_name=Trainers.image_portrait_enhancement) | |||
class ImagePortraitEnhancementTrainer(EpochBasedTrainer): | |||
def train_step(self, model, inputs): | |||
@@ -1,18 +1,42 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from .builder import HOOKS, build_hook | |||
from .checkpoint_hook import BestCkptSaverHook, CheckpointHook | |||
from .evaluation_hook import EvaluationHook | |||
from .hook import Hook | |||
from .iter_timer_hook import IterTimerHook | |||
from .logger.text_logger_hook import TextLoggerHook | |||
from .lr_scheduler_hook import LrSchedulerHook | |||
from .optimizer_hook import (ApexAMPOptimizerHook, OptimizerHook, | |||
TorchAMPOptimizerHook) | |||
from .priority import Priority | |||
from typing import TYPE_CHECKING | |||
__all__ = [ | |||
'Hook', 'HOOKS', 'CheckpointHook', 'EvaluationHook', 'LrSchedulerHook', | |||
'OptimizerHook', 'Priority', 'build_hook', 'TextLoggerHook', | |||
'IterTimerHook', 'TorchAMPOptimizerHook', 'ApexAMPOptimizerHook', | |||
'BestCkptSaverHook', 'NoneOptimizerHook', 'NoneLrSchedulerHook' | |||
] | |||
from modelscope.utils.import_utils import LazyImportModule | |||
if TYPE_CHECKING: | |||
from .builder import HOOKS, build_hook | |||
from .checkpoint_hook import BestCkptSaverHook, CheckpointHook | |||
from .evaluation_hook import EvaluationHook | |||
from .hook import Hook | |||
from .iter_timer_hook import IterTimerHook | |||
from .logger import TextLoggerHook, TensorboardHook | |||
from .lr_scheduler_hook import LrSchedulerHook | |||
from .optimizer import (ApexAMPOptimizerHook, NoneOptimizerHook, | |||
OptimizerHook, TorchAMPOptimizerHook) | |||
from .priority import Priority, get_priority | |||
else: | |||
_import_structure = { | |||
'builder': ['HOOKS', 'build_hook'], | |||
'checkpoint_hook': ['BestCkptSaverHook', 'CheckpointHook'], | |||
'evaluation_hook': ['EvaluationHook'], | |||
'hook': ['Hook'], | |||
'iter_timer_hook': ['IterTimerHook'], | |||
'logger': ['TensorboardHook', 'TextLoggerHook'], | |||
'lr_scheduler_hook': ['LrSchedulerHook'], | |||
'optimizer_hook': [ | |||
'ApexAMPOptimizerHook', 'NoneOptimizerHook', 'OptimizerHook', | |||
'TorchAMPOptimizerHook' | |||
], | |||
'priority': ['Priority', 'get'] | |||
} | |||
import sys | |||
sys.modules[__name__] = LazyImportModule( | |||
__name__, | |||
globals()['__file__'], | |||
_import_structure, | |||
module_spec=__spec__, | |||
extra_objects={}, | |||
) |
@@ -2,6 +2,7 @@ | |||
import os | |||
from modelscope import __version__ | |||
from modelscope.metainfo import Hooks | |||
from modelscope.utils.checkpoint import save_checkpoint | |||
from modelscope.utils.constant import LogKeys | |||
from modelscope.utils.logger import get_logger | |||
@@ -11,7 +12,7 @@ from .hook import Hook | |||
from .priority import Priority | |||
@HOOKS.register_module() | |||
@HOOKS.register_module(module_name=Hooks.CheckpointHook) | |||
class CheckpointHook(Hook): | |||
"""Save checkpoints periodically. | |||
@@ -98,7 +99,7 @@ class CheckpointHook(Hook): | |||
return False | |||
@HOOKS.register_module() | |||
@HOOKS.register_module(module_name=Hooks.BestCkptSaverHook) | |||
class BestCkptSaverHook(CheckpointHook): | |||
"""Save best checkpoints hook. | |||
Args: | |||
@@ -1,9 +1,10 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from modelscope.metainfo import Hooks | |||
from .builder import HOOKS | |||
from .hook import Hook | |||
@HOOKS.register_module() | |||
@HOOKS.register_module(module_name=Hooks.EvaluationHook) | |||
class EvaluationHook(Hook): | |||
"""Evaluation hook. | |||
Args: | |||
@@ -1,13 +1,14 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import time | |||
from modelscope.metainfo import Hooks | |||
from modelscope.utils.constant import LogKeys | |||
from .builder import HOOKS | |||
from .hook import Hook | |||
from .priority import Priority | |||
@HOOKS.register_module() | |||
@HOOKS.register_module(module_name=Hooks.IterTimerHook) | |||
class IterTimerHook(Hook): | |||
PRIORITY = Priority.LOW | |||
@@ -1,7 +1,27 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from typing import TYPE_CHECKING | |||
from modelscope.trainers.utils.log_buffer import LogBuffer | |||
from .base import LoggerHook | |||
from .tensorboard_hook import TensorboardHook | |||
from .text_logger_hook import TextLoggerHook | |||
from modelscope.utils.import_utils import LazyImportModule | |||
if TYPE_CHECKING: | |||
from .base import LoggerHook | |||
from .tensorboard_hook import TensorboardHook | |||
from .text_logger_hook import TextLoggerHook | |||
else: | |||
_import_structure = { | |||
'base': ['LoggerHook'], | |||
'tensorboard_hook': ['TensorboardHook'], | |||
'text_logger_hook': ['TextLoggerHook'] | |||
} | |||
import sys | |||
__all__ = ['TextLoggerHook', 'LoggerHook', 'LogBuffer', 'TensorboardHook'] | |||
sys.modules[__name__] = LazyImportModule( | |||
__name__, | |||
globals()['__file__'], | |||
_import_structure, | |||
module_spec=__spec__, | |||
extra_objects={}, | |||
) |
@@ -1,13 +1,14 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import os | |||
from modelscope.metainfo import Hooks | |||
from modelscope.trainers.hooks.builder import HOOKS | |||
from modelscope.utils.constant import LogKeys | |||
from modelscope.utils.torch_utils import master_only | |||
from .base import LoggerHook | |||
@HOOKS.register_module() | |||
@HOOKS.register_module(module_name=Hooks.TensorboardHook) | |||
class TensorboardHook(LoggerHook): | |||
"""TensorBoard hook for visualization. | |||
Args: | |||
@@ -8,13 +8,14 @@ import json | |||
import torch | |||
from torch import distributed as dist | |||
from modelscope.metainfo import Hooks | |||
from modelscope.trainers.hooks.builder import HOOKS | |||
from modelscope.trainers.hooks.logger.base import LoggerHook | |||
from modelscope.utils.constant import LogKeys, ModeKeys | |||
from modelscope.utils.torch_utils import get_dist_info, is_master | |||
@HOOKS.register_module() | |||
@HOOKS.register_module(module_name=Hooks.TextLoggerHook) | |||
class TextLoggerHook(LoggerHook): | |||
"""Logger hook in text, Output log to both console and local json file. | |||
@@ -1,4 +1,5 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from modelscope.metainfo import Hooks | |||
from modelscope.trainers.lrscheduler.builder import build_lr_scheduler | |||
from modelscope.utils.constant import LogKeys | |||
from modelscope.utils.logger import get_logger | |||
@@ -8,7 +9,7 @@ from .hook import Hook | |||
from .priority import Priority | |||
@HOOKS.register_module() | |||
@HOOKS.register_module(module_name=Hooks.LrSchedulerHook) | |||
class LrSchedulerHook(Hook): | |||
"""Lr scheduler. | |||
@@ -78,7 +79,7 @@ class LrSchedulerHook(Hook): | |||
return lr | |||
@HOOKS.register_module() | |||
@HOOKS.register_module(module_name=Hooks.PlateauLrSchedulerHook) | |||
class PlateauLrSchedulerHook(LrSchedulerHook): | |||
"""Lr scheduler hook for `ReduceLROnPlateau`. | |||
@@ -119,7 +120,7 @@ class PlateauLrSchedulerHook(LrSchedulerHook): | |||
trainer.lr_scheduler.step(metrics=metrics) | |||
@HOOKS.register_module() | |||
@HOOKS.register_module(module_name=Hooks.NoneLrSchedulerHook) | |||
class NoneLrSchedulerHook(LrSchedulerHook): | |||
PRIORITY = Priority.LOW # should be after EvaluationHook | |||
@@ -0,0 +1,26 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from typing import TYPE_CHECKING | |||
from modelscope.utils.import_utils import LazyImportModule | |||
if TYPE_CHECKING: | |||
from .apex_optimizer_hook import ApexAMPOptimizerHook | |||
from .base import OptimizerHook, NoneOptimizerHook | |||
from .torch_optimizer_hook import TorchAMPOptimizerHook | |||
else: | |||
_import_structure = { | |||
'apex_optimizer_hook': ['ApexAMPOptimizerHook'], | |||
'base': ['OptimizerHook', 'NoneOptimizerHook'], | |||
'torch_optimizer_hook': ['TorchAMPOptimizerHook'] | |||
} | |||
import sys | |||
sys.modules[__name__] = LazyImportModule( | |||
__name__, | |||
globals()['__file__'], | |||
_import_structure, | |||
module_spec=__spec__, | |||
extra_objects={}, | |||
) |
@@ -0,0 +1,75 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import logging | |||
from modelscope.metainfo import Hooks | |||
from modelscope.trainers.hooks.builder import HOOKS | |||
from .base import OptimizerHook | |||
@HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook) | |||
class ApexAMPOptimizerHook(OptimizerHook): | |||
"""Fp16 optimizer, if torch version is less than 1.6.0, | |||
you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default | |||
Args: | |||
cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
grad_clip (dict): Default None. Containing keys: | |||
max_norm (float or int): max norm of the gradients | |||
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
loss_keys (str | list): keys list of loss | |||
opt_level (str): "O0" and "O3" are not true mixed precision, | |||
but they are useful for establishing accuracy and speed baselines, respectively. | |||
"O1" and "O2" are different implementations of mixed precision. | |||
Try both, and see what gives the best speedup and accuracy for your model. | |||
""" | |||
def __init__(self, | |||
cumulative_iters=1, | |||
grad_clip=None, | |||
loss_keys='loss', | |||
opt_level='O1'): | |||
super(ApexAMPOptimizerHook, self).__init__( | |||
grad_clip=grad_clip, loss_keys=loss_keys) | |||
self.cumulative_iters = cumulative_iters | |||
self.opt_level = opt_level | |||
try: | |||
from apex import amp | |||
except ImportError: | |||
raise ValueError( | |||
'apex not installed, please install apex from https://www.github.com/nvidia/apex.' | |||
) | |||
def before_run(self, trainer): | |||
from apex import amp | |||
logging.info('open fp16') | |||
# TODO: fix it should initialze amp with model not wrapper by DDP or DP | |||
if hasattr(trainer.model, 'module'): | |||
trainer.model, trainer.optimizer = amp.initialize( | |||
trainer.model.module, | |||
trainer.optimizer, | |||
opt_level=self.opt_level) | |||
else: | |||
trainer.model, trainer.optimizer = amp.initialize( | |||
trainer.model, trainer.optimizer, opt_level=self.opt_level) | |||
trainer.optimizer.zero_grad() | |||
def after_train_iter(self, trainer): | |||
for k in self.loss_keys: | |||
trainer.train_outputs[k] /= self.cumulative_iters | |||
from apex import amp | |||
for k in self.loss_keys: | |||
with amp.scale_loss(trainer.train_outputs[k], | |||
trainer.optimizer) as scaled_loss: | |||
scaled_loss.backward() | |||
if self.every_n_iters(trainer, self.cumulative_iters): | |||
if self.grad_clip is not None: | |||
self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
trainer.optimizer.step() | |||
trainer.optimizer.zero_grad() |
@@ -0,0 +1,73 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import logging | |||
from torch.nn.utils import clip_grad | |||
from modelscope.metainfo import Hooks | |||
from modelscope.trainers.hooks.builder import HOOKS | |||
from modelscope.trainers.hooks.hook import Hook | |||
from modelscope.trainers.hooks.priority import Priority | |||
@HOOKS.register_module(module_name=Hooks.OptimizerHook) | |||
class OptimizerHook(Hook): | |||
"""Optimizer hook | |||
Args: | |||
cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
grad_clip (dict): Default None. Containing keys: | |||
max_norm (float or int): max norm of the gradients | |||
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
loss_keys (str | list): keys list of loss | |||
""" | |||
PRIORITY = Priority.ABOVE_NORMAL | |||
def __init__(self, | |||
cumulative_iters=1, | |||
grad_clip=None, | |||
loss_keys='loss') -> None: | |||
if isinstance(loss_keys, str): | |||
loss_keys = [loss_keys] | |||
assert isinstance(loss_keys, (tuple, list)) | |||
self.loss_keys = loss_keys | |||
self.cumulative_iters = cumulative_iters | |||
self.grad_clip = grad_clip | |||
def clip_grads(self, params, **clip_args): | |||
params = list( | |||
filter(lambda p: p.requires_grad and p.grad is not None, params)) | |||
if len(params) > 0: | |||
return clip_grad.clip_grad_norm_(params, **clip_args) | |||
def before_run(self, trainer): | |||
trainer.optimizer.zero_grad() | |||
def after_train_iter(self, trainer): | |||
for k in self.loss_keys: | |||
trainer.train_outputs[k] /= self.cumulative_iters | |||
trainer.train_outputs[k].backward() | |||
if self.every_n_iters(trainer, self.cumulative_iters): | |||
if self.grad_clip is not None: | |||
self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
trainer.optimizer.step() | |||
trainer.optimizer.zero_grad() | |||
@HOOKS.register_module(module_name=Hooks.NoneOptimizerHook) | |||
class NoneOptimizerHook(OptimizerHook): | |||
def __init__(self, cumulative_iters=1, grad_clip=None, loss_keys='loss'): | |||
super(NoneOptimizerHook, self).__init__( | |||
grad_clip=grad_clip, loss_keys=loss_keys) | |||
self.cumulative_iters = cumulative_iters | |||
def before_run(self, trainer): | |||
return | |||
def after_train_iter(self, trainer): | |||
return |
@@ -0,0 +1,83 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import logging | |||
from modelscope.metainfo import Hooks | |||
from modelscope.trainers.hooks.builder import HOOKS | |||
from .base import OptimizerHook | |||
@HOOKS.register_module(module_name=Hooks.TorchAMPOptimizerHook) | |||
class TorchAMPOptimizerHook(OptimizerHook): | |||
"""Fp16 optimizer, if torch version is less than 1.6.0, | |||
you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default | |||
Args: | |||
cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
grad_clip (dict): Default None. Containing keys: | |||
max_norm (float or int): max norm of the gradients | |||
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
loss_keys (str | list): keys list of loss | |||
loss_scale (float | dict): grade scale config. If loss_scale is a float, | |||
static loss scaling will be used with the specified scale. | |||
It can also be a dict containing arguments of GradScalar. For Pytorch >= 1.6, | |||
we use official torch.cuda.amp.GradScaler. | |||
please refer to: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler for the parameters. | |||
""" | |||
def __init__(self, | |||
cumulative_iters=1, | |||
grad_clip=None, | |||
loss_keys='loss', | |||
loss_scale={}): | |||
super(TorchAMPOptimizerHook, self).__init__( | |||
grad_clip=grad_clip, loss_keys=loss_keys) | |||
self.cumulative_iters = cumulative_iters | |||
self._scale_update_param = None | |||
from torch.cuda import amp | |||
if isinstance(loss_scale, float): | |||
self._scale_update_param = loss_scale | |||
self.scaler = amp.GradScaler(init_scale=loss_scale) | |||
elif isinstance(loss_scale, dict): | |||
self.scaler = amp.GradScaler(**loss_scale) | |||
else: | |||
raise ValueError( | |||
'`loss_scale` type must be in [float, dict], but got {loss_scale}' | |||
) | |||
def before_run(self, trainer): | |||
logging.info('open fp16') | |||
trainer.optimizer.zero_grad() | |||
if hasattr(trainer.model, 'module'): | |||
self._ori_model_forward = trainer.model.module.forward | |||
self._model = trainer.model.module | |||
else: | |||
self._ori_model_forward = trainer.model.forward | |||
self._model = trainer.model | |||
self.ori_model_forward = trainer.model.forward | |||
def before_train_iter(self, trainer): | |||
from torch.cuda import amp | |||
setattr(self._model, 'forward', amp.autocast()(self._model.forward)) | |||
def after_train_iter(self, trainer): | |||
for k in self.loss_keys: | |||
trainer.train_outputs[k] /= self.cumulative_iters | |||
for k in self.loss_keys: | |||
self.scaler.scale(trainer.train_outputs[k]).backward() | |||
if self.every_n_iters(trainer, self.cumulative_iters): | |||
self.scaler.unscale_(trainer.optimizer) | |||
if self.grad_clip is not None: | |||
self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
self.scaler.step(trainer.optimizer) | |||
self.scaler.update(self._scale_update_param) | |||
trainer.optimizer.zero_grad() | |||
setattr(self._model, 'forward', self._ori_model_forward) |
@@ -1,218 +0,0 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
import logging | |||
from torch.nn.utils import clip_grad | |||
from .builder import HOOKS | |||
from .hook import Hook | |||
from .priority import Priority | |||
@HOOKS.register_module() | |||
class OptimizerHook(Hook): | |||
"""Optimizer hook | |||
Args: | |||
cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
grad_clip (dict): Default None. Containing keys: | |||
max_norm (float or int): max norm of the gradients | |||
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
loss_keys (str | list): keys list of loss | |||
""" | |||
PRIORITY = Priority.ABOVE_NORMAL | |||
def __init__(self, | |||
cumulative_iters=1, | |||
grad_clip=None, | |||
loss_keys='loss') -> None: | |||
if isinstance(loss_keys, str): | |||
loss_keys = [loss_keys] | |||
assert isinstance(loss_keys, (tuple, list)) | |||
self.loss_keys = loss_keys | |||
self.cumulative_iters = cumulative_iters | |||
self.grad_clip = grad_clip | |||
def clip_grads(self, params, **clip_args): | |||
params = list( | |||
filter(lambda p: p.requires_grad and p.grad is not None, params)) | |||
if len(params) > 0: | |||
return clip_grad.clip_grad_norm_(params, **clip_args) | |||
def before_run(self, trainer): | |||
trainer.optimizer.zero_grad() | |||
def after_train_iter(self, trainer): | |||
for k in self.loss_keys: | |||
trainer.train_outputs[k] /= self.cumulative_iters | |||
trainer.train_outputs[k].backward() | |||
if self.every_n_iters(trainer, self.cumulative_iters): | |||
if self.grad_clip is not None: | |||
self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
trainer.optimizer.step() | |||
trainer.optimizer.zero_grad() | |||
@HOOKS.register_module() | |||
class TorchAMPOptimizerHook(OptimizerHook): | |||
"""Fp16 optimizer, if torch version is less than 1.6.0, | |||
you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default | |||
Args: | |||
cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
grad_clip (dict): Default None. Containing keys: | |||
max_norm (float or int): max norm of the gradients | |||
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
loss_keys (str | list): keys list of loss | |||
loss_scale (float | dict): grade scale config. If loss_scale is a float, | |||
static loss scaling will be used with the specified scale. | |||
It can also be a dict containing arguments of GradScalar. For Pytorch >= 1.6, | |||
we use official torch.cuda.amp.GradScaler. | |||
please refer to: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler for the parameters. | |||
""" | |||
def __init__(self, | |||
cumulative_iters=1, | |||
grad_clip=None, | |||
loss_keys='loss', | |||
loss_scale={}): | |||
super(TorchAMPOptimizerHook, self).__init__( | |||
grad_clip=grad_clip, loss_keys=loss_keys) | |||
self.cumulative_iters = cumulative_iters | |||
self._scale_update_param = None | |||
from torch.cuda import amp | |||
if isinstance(loss_scale, float): | |||
self._scale_update_param = loss_scale | |||
self.scaler = amp.GradScaler(init_scale=loss_scale) | |||
elif isinstance(loss_scale, dict): | |||
self.scaler = amp.GradScaler(**loss_scale) | |||
else: | |||
raise ValueError( | |||
'`loss_scale` type must be in [float, dict], but got {loss_scale}' | |||
) | |||
def before_run(self, trainer): | |||
logging.info('open fp16') | |||
trainer.optimizer.zero_grad() | |||
if hasattr(trainer.model, 'module'): | |||
self._ori_model_forward = trainer.model.module.forward | |||
self._model = trainer.model.module | |||
else: | |||
self._ori_model_forward = trainer.model.forward | |||
self._model = trainer.model | |||
self.ori_model_forward = trainer.model.forward | |||
def before_train_iter(self, trainer): | |||
from torch.cuda import amp | |||
setattr(self._model, 'forward', amp.autocast()(self._model.forward)) | |||
def after_train_iter(self, trainer): | |||
for k in self.loss_keys: | |||
trainer.train_outputs[k] /= self.cumulative_iters | |||
for k in self.loss_keys: | |||
self.scaler.scale(trainer.train_outputs[k]).backward() | |||
if self.every_n_iters(trainer, self.cumulative_iters): | |||
self.scaler.unscale_(trainer.optimizer) | |||
if self.grad_clip is not None: | |||
self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
self.scaler.step(trainer.optimizer) | |||
self.scaler.update(self._scale_update_param) | |||
trainer.optimizer.zero_grad() | |||
setattr(self._model, 'forward', self._ori_model_forward) | |||
@HOOKS.register_module() | |||
class ApexAMPOptimizerHook(OptimizerHook): | |||
"""Fp16 optimizer, if torch version is less than 1.6.0, | |||
you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default | |||
Args: | |||
cumulative_iters (int): interval of gradients accumulation. Default: 1 | |||
grad_clip (dict): Default None. Containing keys: | |||
max_norm (float or int): max norm of the gradients | |||
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. | |||
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_` | |||
loss_keys (str | list): keys list of loss | |||
opt_level (str): "O0" and "O3" are not true mixed precision, | |||
but they are useful for establishing accuracy and speed baselines, respectively. | |||
"O1" and "O2" are different implementations of mixed precision. | |||
Try both, and see what gives the best speedup and accuracy for your model. | |||
""" | |||
def __init__(self, | |||
cumulative_iters=1, | |||
grad_clip=None, | |||
loss_keys='loss', | |||
opt_level='O1'): | |||
super(ApexAMPOptimizerHook, self).__init__( | |||
grad_clip=grad_clip, loss_keys=loss_keys) | |||
self.cumulative_iters = cumulative_iters | |||
self.opt_level = opt_level | |||
try: | |||
from apex import amp | |||
except ImportError: | |||
raise ValueError( | |||
'apex not installed, please install apex from https://www.github.com/nvidia/apex.' | |||
) | |||
def before_run(self, trainer): | |||
from apex import amp | |||
logging.info('open fp16') | |||
# TODO: fix it should initialze amp with model not wrapper by DDP or DP | |||
if hasattr(trainer.model, 'module'): | |||
trainer.model, trainer.optimizer = amp.initialize( | |||
trainer.model.module, | |||
trainer.optimizer, | |||
opt_level=self.opt_level) | |||
else: | |||
trainer.model, trainer.optimizer = amp.initialize( | |||
trainer.model, trainer.optimizer, opt_level=self.opt_level) | |||
trainer.optimizer.zero_grad() | |||
def after_train_iter(self, trainer): | |||
for k in self.loss_keys: | |||
trainer.train_outputs[k] /= self.cumulative_iters | |||
from apex import amp | |||
for k in self.loss_keys: | |||
with amp.scale_loss(trainer.train_outputs[k], | |||
trainer.optimizer) as scaled_loss: | |||
scaled_loss.backward() | |||
if self.every_n_iters(trainer, self.cumulative_iters): | |||
if self.grad_clip is not None: | |||
self.clip_grads(trainer.model.parameters(), **self.grad_clip) | |||
trainer.optimizer.step() | |||
trainer.optimizer.zero_grad() | |||
@HOOKS.register_module() | |||
class NoneOptimizerHook(OptimizerHook): | |||
def __init__(self, cumulative_iters=1, grad_clip=None, loss_keys='loss'): | |||
super(NoneOptimizerHook, self).__init__( | |||
grad_clip=grad_clip, loss_keys=loss_keys) | |||
self.cumulative_iters = cumulative_iters | |||
def before_run(self, trainer): | |||
return | |||
def after_train_iter(self, trainer): | |||
return |
@@ -1,8 +1,25 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from .builder import LR_SCHEDULER, build_lr_scheduler | |||
from .warmup import BaseWarmup, ConstantWarmup, ExponentialWarmup, LinearWarmup | |||
from typing import TYPE_CHECKING | |||
__all__ = [ | |||
'LR_SCHEDULER', 'build_lr_scheduler', 'BaseWarmup', 'ConstantWarmup', | |||
'LinearWarmup', 'ExponentialWarmup' | |||
] | |||
from modelscope.utils.import_utils import LazyImportModule | |||
if TYPE_CHECKING: | |||
from .builder import LR_SCHEDULER, build_lr_scheduler | |||
from .warmup import BaseWarmup, ConstantWarmup, ExponentialWarmup, LinearWarmup | |||
else: | |||
_import_structure = { | |||
'builder': ['LR_SCHEDULER', 'build_lr_scheduler'], | |||
'warmup': | |||
['BaseWarmup', 'ConstantWarmup', 'ExponentialWarmup', 'LinearWarmup'] | |||
} | |||
import sys | |||
sys.modules[__name__] = LazyImportModule( | |||
__name__, | |||
globals()['__file__'], | |||
_import_structure, | |||
module_spec=__spec__, | |||
extra_objects={}, | |||
) |
@@ -4,7 +4,7 @@ import inspect | |||
from modelscope.utils.config import ConfigDict | |||
from modelscope.utils.registry import Registry, build_from_cfg, default_group | |||
LR_SCHEDULER = Registry('lr scheduler') | |||
LR_SCHEDULER = Registry('lr_scheduler') | |||
def build_lr_scheduler(cfg: ConfigDict, default_args: dict = None): | |||
@@ -1,5 +1,25 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from .base import BaseWarmup | |||
from .warmup import ConstantWarmup, ExponentialWarmup, LinearWarmup | |||
__all__ = ['BaseWarmup', 'ConstantWarmup', 'LinearWarmup', 'ExponentialWarmup'] | |||
from typing import TYPE_CHECKING | |||
from modelscope.utils.import_utils import LazyImportModule | |||
if TYPE_CHECKING: | |||
from .base import BaseWarmup | |||
from .warmup import ConstantWarmup, ExponentialWarmup, LinearWarmup | |||
else: | |||
_import_structure = { | |||
'base': ['BaseWarmup'], | |||
'warmup': ['ConstantWarmup', 'ExponentialWarmup', 'LinearWarmup'] | |||
} | |||
import sys | |||
sys.modules[__name__] = LazyImportModule( | |||
__name__, | |||
globals()['__file__'], | |||
_import_structure, | |||
module_spec=__spec__, | |||
extra_objects={}, | |||
) |
@@ -1,9 +1,10 @@ | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from modelscope.metainfo import LR_Schedulers | |||
from modelscope.trainers.lrscheduler.builder import LR_SCHEDULER | |||
from .base import BaseWarmup | |||
@LR_SCHEDULER.register_module() | |||
@LR_SCHEDULER.register_module(module_name=LR_Schedulers.ConstantWarmup) | |||
class ConstantWarmup(BaseWarmup): | |||
"""Linear warmup scheduler. | |||
@@ -29,7 +30,7 @@ class ConstantWarmup(BaseWarmup): | |||
return self.warmup_ratio | |||
@LR_SCHEDULER.register_module() | |||
@LR_SCHEDULER.register_module(module_name=LR_Schedulers.LinearWarmup) | |||
class LinearWarmup(BaseWarmup): | |||
"""Linear warmup scheduler. | |||
@@ -54,7 +55,7 @@ class LinearWarmup(BaseWarmup): | |||
return 1 - k | |||
@LR_SCHEDULER.register_module() | |||
@LR_SCHEDULER.register_module(module_name=LR_Schedulers.ExponentialWarmup) | |||
class ExponentialWarmup(BaseWarmup): | |||
"""Exponential warmup scheduler. | |||
@@ -1 +1,20 @@ | |||
from .clip import CLIPTrainer | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from typing import TYPE_CHECKING | |||
from modelscope.utils.import_utils import LazyImportModule | |||
if TYPE_CHECKING: | |||
from .clip import CLIPTrainer | |||
else: | |||
_import_structure = {'clip': ['CLIPTrainer']} | |||
import sys | |||
sys.modules[__name__] = LazyImportModule( | |||
__name__, | |||
globals()['__file__'], | |||
_import_structure, | |||
module_spec=__spec__, | |||
extra_objects={}, | |||
) |
@@ -1 +1,22 @@ | |||
from .sequence_classification_trainer import SequenceClassificationTrainer | |||
# Copyright (c) Alibaba, Inc. and its affiliates. | |||
from typing import TYPE_CHECKING | |||
from modelscope.utils.import_utils import LazyImportModule | |||
if TYPE_CHECKING: | |||
from .sequence_classification_trainer import SequenceClassificationTrainer | |||
else: | |||
_import_structure = { | |||
'sequence_classification_trainer': ['SequenceClassificationTrainer'] | |||
} | |||
import sys | |||
sys.modules[__name__] = LazyImportModule( | |||
__name__, | |||
globals()['__file__'], | |||
_import_structure, | |||
module_spec=__spec__, | |||
extra_objects={}, | |||
) |
@@ -3,6 +3,7 @@ from typing import Dict, Optional, Tuple, Union | |||
import numpy as np | |||
from modelscope.metainfo import Trainers | |||
from modelscope.trainers.base import BaseTrainer | |||
from modelscope.trainers.builder import TRAINERS | |||
from modelscope.utils.logger import get_logger | |||
@@ -11,7 +12,7 @@ PATH = None | |||
logger = get_logger(PATH) | |||
@TRAINERS.register_module(module_name=r'bert-sentiment-analysis') | |||
@TRAINERS.register_module(module_name=Trainers.bert_sentiment_analysis) | |||
class SequenceClassificationTrainer(BaseTrainer): | |||
def __init__(self, cfg_file: str, *args, **kwargs): | |||
@@ -6,6 +6,7 @@ from torch import nn | |||
from torch.utils.data import Dataset | |||
from modelscope.hub.snapshot_download import snapshot_download | |||
from modelscope.metainfo import Trainers | |||
from modelscope.metrics.builder import build_metric | |||
from modelscope.models.base import Model, TorchModel | |||
from modelscope.msdatasets import MsDataset | |||
@@ -17,7 +18,7 @@ from .base import TRAINERS | |||
from .trainer import EpochBasedTrainer | |||
@TRAINERS.register_module(module_name='NlpEpochBasedTrainer') | |||
@TRAINERS.register_module(module_name=Trainers.nlp_base_trainer) | |||
class NlpEpochBasedTrainer(EpochBasedTrainer): | |||
def __init__( | |||
@@ -142,7 +143,7 @@ class NlpEpochBasedTrainer(EpochBasedTrainer): | |||
return build_preprocessor(cfg, Tasks.find_field_by_task(self.cfg.task)) | |||
@TRAINERS.register_module(module_name='VecoTrainer') | |||
@TRAINERS.register_module(module_name=Trainers.nlp_veco_trainer) | |||
class VecoTrainer(NlpEpochBasedTrainer): | |||
def evaluate(self, checkpoint_path=None): | |||
@@ -17,6 +17,7 @@ from torch.utils.data import DataLoader, Dataset | |||
from torch.utils.data.distributed import DistributedSampler | |||
from modelscope.hub.snapshot_download import snapshot_download | |||
from modelscope.metainfo import Trainers | |||
from modelscope.metrics import build_metric, task_default_metrics | |||
from modelscope.models.base import Model, TorchModel | |||
from modelscope.msdatasets.ms_dataset import MsDataset | |||
@@ -45,7 +46,7 @@ from .parallel.builder import build_parallel | |||
from .parallel.utils import is_parallel | |||
@TRAINERS.register_module() | |||
@TRAINERS.register_module(module_name=Trainers.default) | |||
class EpochBasedTrainer(BaseTrainer): | |||
"""Epoch based Trainer, a training helper for PyTorch. | |||
@@ -5,6 +5,7 @@ import importlib | |||
import os | |||
import os.path as osp | |||
import time | |||
import traceback | |||
from functools import reduce | |||
from typing import Generator, Union | |||
@@ -13,8 +14,9 @@ import json | |||
from modelscope import __version__ | |||
from modelscope.fileio.file import LocalStorage | |||
from modelscope.metainfo import (Heads, Metrics, Models, Pipelines, | |||
Preprocessors, TaskModels, Trainers) | |||
from modelscope.metainfo import (Heads, Hooks, LR_Schedulers, Metrics, Models, | |||
Optimizers, Pipelines, Preprocessors, | |||
TaskModels, Trainers) | |||
from modelscope.utils.constant import Fields, Tasks | |||
from modelscope.utils.file_utils import get_default_cache_dir | |||
from modelscope.utils.logger import get_logger | |||
@@ -28,7 +30,8 @@ MODELSCOPE_PATH = '/'.join(os.path.dirname(__file__).split('/')[:-1]) | |||
REGISTER_MODULE = 'register_module' | |||
IGNORED_PACKAGES = ['modelscope', '.'] | |||
SCAN_SUB_FOLDERS = [ | |||
'models', 'metrics', 'pipelines', 'preprocessors', 'task_datasets' | |||
'models', 'metrics', 'pipelines', 'preprocessors', 'task_datasets', | |||
'trainers' | |||
] | |||
INDEXER_FILE = 'ast_indexer' | |||
DECORATOR_KEY = 'decorators' | |||
@@ -305,9 +308,11 @@ class AstScaning(object): | |||
output = [functions[0]] | |||
if len(args_list) == 0 and len(keyword_list) == 0: | |||
args_list.append(None) | |||
args_list.append(default_group) | |||
if len(keyword_list) == 0 and len(args_list) == 1: | |||
args_list.append(None) | |||
if len(keyword_list) == 1 and len(args_list) == 0: | |||
args_list.append(default_group) | |||
args_list.extend(keyword_list) | |||
@@ -318,6 +323,8 @@ class AstScaning(object): | |||
# the case (default_group) | |||
elif item[1] is None: | |||
output.append(item[0]) | |||
elif isinstance(item, str): | |||
output.append(item) | |||
else: | |||
output.append('.'.join(item)) | |||
return (output[0], self._get_registry_value(output[1]), | |||
@@ -443,9 +450,11 @@ class FilesAstScaning(object): | |||
try: | |||
output = self.astScaner.generate_ast(file) | |||
except Exception as e: | |||
detail = traceback.extract_tb(e.__traceback__) | |||
raise Exception( | |||
'During ast indexing, there are index errors in the ' | |||
f'file {file} : {type(e).__name__}.{e}') | |||
f'During ast indexing, error is in the file {detail[-1].filename}' | |||
f' line: {detail[-1].lineno}: "{detail[-1].line}" with error msg: ' | |||
f'"{type(e).__name__}: {e}"') | |||
import_list = self.parse_import(output) | |||
return output[DECORATOR_KEY], import_list | |||
@@ -523,14 +532,14 @@ class FilesAstScaning(object): | |||
return md5.hexdigest() | |||
fileScaner = FilesAstScaning() | |||
file_scanner = FilesAstScaning() | |||
def _save_index(index, file_path): | |||
# convert tuple key to str key | |||
index[INDEX_KEY] = {str(k): v for k, v in index[INDEX_KEY].items()} | |||
index[VERSION_KEY] = __version__ | |||
index[MD5_KEY] = fileScaner.files_mtime_md5() | |||
index[MD5_KEY] = file_scanner.files_mtime_md5() | |||
json_index = json.dumps(index) | |||
storage.write(json_index.encode(), file_path) | |||
index[INDEX_KEY] = { | |||
@@ -579,7 +588,7 @@ def load_index(force_rebuild=False): | |||
index = None | |||
if not force_rebuild and os.path.exists(file_path): | |||
wrapped_index = _load_index(file_path) | |||
md5 = fileScaner.files_mtime_md5() | |||
md5 = file_scanner.files_mtime_md5() | |||
if (wrapped_index[VERSION_KEY] == __version__ | |||
and wrapped_index[MD5_KEY] == md5): | |||
index = wrapped_index | |||
@@ -591,7 +600,7 @@ def load_index(force_rebuild=False): | |||
logger.info( | |||
f'No valid ast index found from {file_path}, rebuilding ast index!' | |||
) | |||
index = fileScaner.get_files_scan_results() | |||
index = file_scanner.get_files_scan_results() | |||
_save_index(index, file_path) | |||
return index | |||
@@ -7,4 +7,6 @@ pycocotools>=2.0.4 | |||
# which introduced compatability issues that are being investigated | |||
rouge_score<=0.0.4 | |||
timm | |||
tokenizers | |||
torchvision | |||
transformers>=4.12.0 |
@@ -6,3 +6,5 @@ pai-easynlp | |||
rouge_score<=0.0.4 | |||
seqeval | |||
spacy>=2.3.5 | |||
tokenizers | |||
transformers>=4.12.0 |
@@ -13,7 +13,5 @@ requests | |||
scipy | |||
setuptools | |||
tensorboard | |||
tokenizers | |||
tqdm>=4.64.0 | |||
transformers>=4.12.0 | |||
yapf |
@@ -10,6 +10,7 @@ import numpy as np | |||
import torch | |||
from torch import nn | |||
from modelscope.metainfo import Trainers | |||
from modelscope.trainers import build_trainer | |||
from modelscope.utils.constant import LogKeys, ModelFile | |||
from modelscope.utils.test_utils import create_dummy_test_dataset | |||
@@ -73,7 +74,7 @@ class TensorboardHookTest(unittest.TestCase): | |||
with open(config_path, 'w') as f: | |||
json.dump(json_cfg, f) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=DummyModel(), | |||
@@ -9,6 +9,7 @@ import numpy as np | |||
import torch | |||
from torch import nn | |||
from modelscope.metainfo import Trainers | |||
from modelscope.metrics.builder import METRICS, MetricKeys | |||
from modelscope.trainers import build_trainer | |||
from modelscope.utils.constant import LogKeys, ModelFile | |||
@@ -108,7 +109,7 @@ class CheckpointHookTest(unittest.TestCase): | |||
with open(config_path, 'w') as f: | |||
json.dump(json_cfg, f) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=DummyModel(), | |||
@@ -179,7 +180,7 @@ class BestCkptSaverHookTest(unittest.TestCase): | |||
with open(config_path, 'w') as f: | |||
json.dump(json_cfg, f) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=DummyModel(), | |||
@@ -9,6 +9,7 @@ import numpy as np | |||
import torch | |||
from torch import nn | |||
from modelscope.metainfo import Trainers | |||
from modelscope.metrics.builder import METRICS, MetricKeys | |||
from modelscope.trainers import build_trainer | |||
from modelscope.utils.constant import LogKeys, ModelFile | |||
@@ -97,7 +98,7 @@ class EvaluationHookTest(unittest.TestCase): | |||
with open(config_path, 'w') as f: | |||
json.dump(json_cfg, f) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=DummyModel(), | |||
@@ -11,6 +11,7 @@ from torch import nn | |||
from torch.optim import SGD | |||
from torch.optim.lr_scheduler import MultiStepLR, ReduceLROnPlateau | |||
from modelscope.metainfo import Trainers | |||
from modelscope.metrics.builder import METRICS, MetricKeys | |||
from modelscope.trainers import build_trainer | |||
from modelscope.utils.constant import LogKeys, ModelFile, TrainerStages | |||
@@ -89,7 +90,7 @@ class LrSchedulerHookTest(unittest.TestCase): | |||
model = DummyModel() | |||
optimizer = SGD(model.parameters(), lr=0.01) | |||
lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4]) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=model, | |||
@@ -161,7 +162,7 @@ class LrSchedulerHookTest(unittest.TestCase): | |||
model = DummyModel() | |||
# optimmizer = SGD(model.parameters(), lr=0.01) | |||
# lr_scheduler = MultiStepLR(optimmizer, milestones=[2, 4]) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=model, | |||
@@ -258,7 +259,7 @@ class PlateauLrSchedulerHookTest(unittest.TestCase): | |||
model = DummyModel() | |||
optimizer = SGD(model.parameters(), lr=0.01) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=model, | |||
@@ -11,6 +11,7 @@ from torch import nn | |||
from torch.optim import SGD | |||
from torch.optim.lr_scheduler import MultiStepLR | |||
from modelscope.metainfo import Trainers | |||
from modelscope.trainers import build_trainer | |||
from modelscope.utils.constant import ModelFile, TrainerStages | |||
from modelscope.utils.test_utils import create_dummy_test_dataset | |||
@@ -64,7 +65,7 @@ class OptimizerHookTest(unittest.TestCase): | |||
model = DummyModel() | |||
optimizer = SGD(model.parameters(), lr=0.01) | |||
lr_scheduler = MultiStepLR(optimizer, milestones=[1, 2]) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=model, | |||
@@ -130,7 +131,7 @@ class TorchAMPOptimizerHookTest(unittest.TestCase): | |||
model = DummyModel().cuda() | |||
optimizer = SGD(model.parameters(), lr=0.01) | |||
lr_scheduler = MultiStepLR(optimizer, milestones=[1, 2]) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=model, | |||
@@ -11,6 +11,7 @@ from torch import nn | |||
from torch.optim import SGD | |||
from torch.optim.lr_scheduler import MultiStepLR | |||
from modelscope.metainfo import Trainers | |||
from modelscope.trainers import build_trainer | |||
from modelscope.utils.constant import LogKeys, ModelFile, TrainerStages | |||
from modelscope.utils.test_utils import create_dummy_test_dataset | |||
@@ -68,7 +69,7 @@ class IterTimerHookTest(unittest.TestCase): | |||
model = DummyModel() | |||
optimizer = SGD(model.parameters(), lr=0.01) | |||
lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4]) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=model, | |||
@@ -4,6 +4,7 @@ import shutil | |||
import tempfile | |||
import unittest | |||
from modelscope.metainfo import Trainers | |||
from modelscope.trainers import build_trainer | |||
@@ -23,7 +24,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): | |||
model_id, | |||
train_dataset, | |||
eval_dataset, | |||
name='NlpEpochBasedTrainer', | |||
name=Trainers.nlp_base_trainer, | |||
cfg_modify_fn=None, | |||
**kwargs): | |||
kwargs = dict( | |||
@@ -236,7 +237,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): | |||
'damo/nlp_veco_fill-mask-large', | |||
train_datasets, | |||
eval_datasets, | |||
name='VecoTrainer', | |||
name=Trainers.nlp_veco_trainer, | |||
cfg_modify_fn=cfg_modify_fn) | |||
@@ -5,6 +5,7 @@ import tempfile | |||
import unittest | |||
from functools import reduce | |||
from modelscope.metainfo import Trainers | |||
from modelscope.trainers import build_trainer | |||
from modelscope.utils.test_utils import test_level | |||
@@ -25,7 +26,7 @@ class TestFinetuneTokenClassification(unittest.TestCase): | |||
model_id, | |||
train_dataset, | |||
eval_dataset, | |||
name='NlpEpochBasedTrainer', | |||
name=Trainers.nlp_base_trainer, | |||
cfg_modify_fn=None, | |||
**kwargs): | |||
kwargs = dict( | |||
@@ -7,6 +7,7 @@ import zipfile | |||
from functools import partial | |||
from modelscope.hub.snapshot_download import snapshot_download | |||
from modelscope.metainfo import Trainers | |||
from modelscope.models.cv.image_instance_segmentation import ( | |||
CascadeMaskRCNNSwinModel, ImageInstanceSegmentationCocoDataset) | |||
from modelscope.trainers import build_trainer | |||
@@ -79,7 +80,7 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase): | |||
work_dir=self.tmp_dir) | |||
trainer = build_trainer( | |||
name='image-instance-segmentation', default_args=kwargs) | |||
name=Trainers.image_instance_segmentation, default_args=kwargs) | |||
trainer.train() | |||
results_files = os.listdir(self.tmp_dir) | |||
self.assertIn(f'{trainer.timestamp}.log.json', results_files) | |||
@@ -103,7 +104,7 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase): | |||
work_dir=self.tmp_dir) | |||
trainer = build_trainer( | |||
name='image-instance-segmentation', default_args=kwargs) | |||
name=Trainers.image_instance_segmentation, default_args=kwargs) | |||
trainer.train() | |||
results_files = os.listdir(self.tmp_dir) | |||
self.assertIn(f'{trainer.timestamp}.log.json', results_files) | |||
@@ -11,6 +11,7 @@ import torch | |||
from torch.utils import data as data | |||
from modelscope.hub.snapshot_download import snapshot_download | |||
from modelscope.metainfo import Trainers | |||
from modelscope.models.cv.image_portrait_enhancement import \ | |||
ImagePortraitEnhancement | |||
from modelscope.trainers import build_trainer | |||
@@ -91,7 +92,8 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase): | |||
device='gpu', | |||
work_dir=self.tmp_dir) | |||
trainer = build_trainer(name='gpen', default_args=kwargs) | |||
trainer = build_trainer( | |||
name=Trainers.image_portrait_enhancement, default_args=kwargs) | |||
trainer.train() | |||
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
@@ -111,7 +113,8 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase): | |||
max_epochs=2, | |||
work_dir=self.tmp_dir) | |||
trainer = build_trainer(name='gpen', default_args=kwargs) | |||
trainer = build_trainer( | |||
name=Trainers.image_portrait_enhancement, default_args=kwargs) | |||
trainer.train() | |||
@@ -5,6 +5,7 @@ import tempfile | |||
import unittest | |||
from modelscope.hub.snapshot_download import snapshot_download | |||
from modelscope.metainfo import Trainers | |||
from modelscope.models.nlp.palm_v2 import PalmForTextGeneration | |||
from modelscope.msdatasets import MsDataset | |||
from modelscope.trainers import build_trainer | |||
@@ -57,7 +58,7 @@ class TestTextGenerationTrainer(unittest.TestCase): | |||
work_dir=self.tmp_dir) | |||
trainer = build_trainer( | |||
name='NlpEpochBasedTrainer', default_args=kwargs) | |||
name=Trainers.nlp_base_trainer, default_args=kwargs) | |||
trainer.train() | |||
results_files = os.listdir(self.tmp_dir) | |||
self.assertIn(f'{trainer.timestamp}.log.json', results_files) | |||
@@ -122,7 +123,7 @@ class TestTextGenerationTrainer(unittest.TestCase): | |||
cfg_modify_fn=cfg_modify_fn, | |||
model_revision='beta') | |||
trainer = build_trainer( | |||
name='NlpEpochBasedTrainer', default_args=kwargs) | |||
name=Trainers.nlp_base_trainer, default_args=kwargs) | |||
trainer.train() | |||
@@ -13,6 +13,7 @@ from torch import nn | |||
from torch.optim import SGD | |||
from torch.optim.lr_scheduler import StepLR | |||
from modelscope.metainfo import Trainers | |||
from modelscope.metrics.builder import MetricKeys | |||
from modelscope.msdatasets import MsDataset | |||
from modelscope.trainers import build_trainer | |||
@@ -101,14 +102,14 @@ class TrainerTest(unittest.TestCase): | |||
'workers_per_gpu': 1, | |||
'shuffle': False | |||
}, | |||
'metrics': ['seq_cls_metric'] | |||
'metrics': ['seq-cls-metric'] | |||
} | |||
} | |||
config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION) | |||
with open(config_path, 'w') as f: | |||
json.dump(json_cfg, f) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=DummyModel(), | |||
@@ -155,7 +156,7 @@ class TrainerTest(unittest.TestCase): | |||
'workers_per_gpu': 1, | |||
'shuffle': False | |||
}, | |||
'metrics': ['seq_cls_metric'] | |||
'metrics': ['seq-cls-metric'] | |||
} | |||
} | |||
@@ -166,7 +167,7 @@ class TrainerTest(unittest.TestCase): | |||
model = DummyModel() | |||
optimmizer = SGD(model.parameters(), lr=0.01) | |||
lr_scheduler = StepLR(optimmizer, 2) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=model, | |||
@@ -205,7 +206,7 @@ class TrainerTest(unittest.TestCase): | |||
'workers_per_gpu': 1, | |||
'shuffle': False | |||
}, | |||
'metrics': ['seq_cls_metric'] | |||
'metrics': ['seq-cls-metric'] | |||
} | |||
} | |||
@@ -216,7 +217,7 @@ class TrainerTest(unittest.TestCase): | |||
model = DummyModel() | |||
optimmizer = SGD(model.parameters(), lr=0.01) | |||
lr_scheduler = StepLR(optimmizer, 2) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=model, | |||
@@ -12,8 +12,9 @@ from torch import nn | |||
from torch.optim import SGD | |||
from torch.optim.lr_scheduler import StepLR | |||
from modelscope.metainfo import Trainers | |||
from modelscope.metrics.builder import MetricKeys | |||
from modelscope.trainers import build_trainer | |||
from modelscope.trainers import EpochBasedTrainer, build_trainer | |||
from modelscope.utils.constant import LogKeys, ModeKeys, ModelFile | |||
from modelscope.utils.test_utils import (DistributedTestCase, | |||
create_dummy_test_dataset, test_level) | |||
@@ -70,7 +71,7 @@ def train_func(work_dir, dist=False): | |||
model = DummyModel() | |||
optimmizer = SGD(model.parameters(), lr=0.01) | |||
lr_scheduler = StepLR(optimmizer, 2) | |||
trainer_name = 'EpochBasedTrainer' | |||
trainer_name = Trainers.default | |||
kwargs = dict( | |||
cfg_file=config_path, | |||
model=model, | |||