Browse Source

[to #42322933] lazy load on trainer

master
zhangzhicheng.zzc 3 years ago
parent
commit
9d0b38b4e4
45 changed files with 632 additions and 322 deletions
  1. +60
    -2
      modelscope/metainfo.py
  2. +38
    -8
      modelscope/trainers/__init__.py
  3. +2
    -2
      modelscope/trainers/builder.py
  4. +27
    -3
      modelscope/trainers/cv/__init__.py
  5. +2
    -1
      modelscope/trainers/cv/image_instance_segmentation_trainer.py
  6. +2
    -1
      modelscope/trainers/cv/image_portrait_enhancement_trainer.py
  7. +40
    -16
      modelscope/trainers/hooks/__init__.py
  8. +3
    -2
      modelscope/trainers/hooks/checkpoint_hook.py
  9. +2
    -1
      modelscope/trainers/hooks/evaluation_hook.py
  10. +2
    -1
      modelscope/trainers/hooks/iter_timer_hook.py
  11. +24
    -4
      modelscope/trainers/hooks/logger/__init__.py
  12. +2
    -1
      modelscope/trainers/hooks/logger/tensorboard_hook.py
  13. +2
    -1
      modelscope/trainers/hooks/logger/text_logger_hook.py
  14. +4
    -3
      modelscope/trainers/hooks/lr_scheduler_hook.py
  15. +26
    -0
      modelscope/trainers/hooks/optimizer/__init__.py
  16. +75
    -0
      modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py
  17. +73
    -0
      modelscope/trainers/hooks/optimizer/base.py
  18. +83
    -0
      modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py
  19. +0
    -218
      modelscope/trainers/hooks/optimizer_hook.py
  20. +23
    -6
      modelscope/trainers/lrscheduler/__init__.py
  21. +1
    -1
      modelscope/trainers/lrscheduler/builder.py
  22. +23
    -3
      modelscope/trainers/lrscheduler/warmup/__init__.py
  23. +4
    -3
      modelscope/trainers/lrscheduler/warmup/warmup.py
  24. +20
    -1
      modelscope/trainers/multi_modal/__init__.py
  25. +22
    -1
      modelscope/trainers/nlp/__init__.py
  26. +2
    -1
      modelscope/trainers/nlp/sequence_classification_trainer.py
  27. +3
    -2
      modelscope/trainers/nlp_trainer.py
  28. +2
    -1
      modelscope/trainers/trainer.py
  29. +19
    -10
      modelscope/utils/ast_utils.py
  30. +2
    -0
      requirements/multi-modal.txt
  31. +2
    -0
      requirements/nlp.txt
  32. +0
    -2
      requirements/runtime.txt
  33. +2
    -1
      tests/trainers/hooks/logger/test_tensorboard_hook.py
  34. +3
    -2
      tests/trainers/hooks/test_checkpoint_hook.py
  35. +2
    -1
      tests/trainers/hooks/test_evaluation_hook.py
  36. +4
    -3
      tests/trainers/hooks/test_lr_scheduler_hook.py
  37. +3
    -2
      tests/trainers/hooks/test_optimizer_hook.py
  38. +2
    -1
      tests/trainers/hooks/test_timer_hook.py
  39. +3
    -2
      tests/trainers/test_finetune_sequence_classification.py
  40. +2
    -1
      tests/trainers/test_finetune_token_classificatin.py
  41. +3
    -2
      tests/trainers/test_image_instance_segmentation_trainer.py
  42. +5
    -2
      tests/trainers/test_image_portrait_enhancement_trainer.py
  43. +3
    -2
      tests/trainers/test_text_generation_trainer.py
  44. +7
    -6
      tests/trainers/test_trainer.py
  45. +3
    -2
      tests/trainers/test_trainer_gpu.py

+ 60
- 2
modelscope/metainfo.py View File

@@ -145,11 +145,20 @@ class Trainers(object):
For a model specific Trainer, you can use ${ModelName}-${Task}-trainer.
"""

default = 'Trainer'
default = 'trainer'

# multi-modal tasks
# multi-modal trainers
clip_multi_modal_embedding = 'clip-multi-modal-embedding'

# cv trainers
image_instance_segmentation = 'image-instance-segmentation'
image_portrait_enhancement = 'image-portrait-enhancement'

# nlp trainers
bert_sentiment_analysis = 'bert-sentiment-analysis'
nlp_base_trainer = 'nlp-base-trainer'
nlp_veco_trainer = 'nlp-veco-trainer'


class Preprocessors(object):
""" Names for different preprocessor.
@@ -219,3 +228,52 @@ class Metrics(object):
image_color_enhance_metric = 'image-color-enhance-metric'
# metrics for image-portrait-enhancement task
image_portrait_enhancement_metric = 'image-portrait-enhancement-metric'


class Optimizers(object):
""" Names for different OPTIMIZER.

Holds the standard optimizer name to use for identifying different optimizer.
This should be used to register optimizer.
"""

default = 'optimizer'

SGD = 'SGD'


class Hooks(object):
""" Names for different hooks.

All kinds of hooks are defined here
"""
# lr
LrSchedulerHook = 'LrSchedulerHook'
PlateauLrSchedulerHook = 'PlateauLrSchedulerHook'
NoneLrSchedulerHook = 'NoneLrSchedulerHook'

# optimizer
OptimizerHook = 'OptimizerHook'
TorchAMPOptimizerHook = 'TorchAMPOptimizerHook'
ApexAMPOptimizerHook = 'ApexAMPOptimizerHook'
NoneOptimizerHook = 'NoneOptimizerHook'

# checkpoint
CheckpointHook = 'CheckpointHook'
BestCkptSaverHook = 'BestCkptSaverHook'

# logger
TextLoggerHook = 'TextLoggerHook'
TensorboardHook = 'TensorboardHook'

IterTimerHook = 'IterTimerHook'
EvaluationHook = 'EvaluationHook'


class LR_Schedulers(object):
"""learning rate scheduler is defined here

"""
LinearWarmup = 'LinearWarmup'
ConstantWarmup = 'ConstantWarmup'
ExponentialWarmup = 'ExponentialWarmup'

+ 38
- 8
modelscope/trainers/__init__.py View File

@@ -1,8 +1,38 @@
from .base import DummyTrainer
from .builder import build_trainer
from .cv import (ImageInstanceSegmentationTrainer,
ImagePortraitEnhancementTrainer)
from .multi_modal import CLIPTrainer
from .nlp import SequenceClassificationTrainer
from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer
from .trainer import EpochBasedTrainer
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .base import DummyTrainer
from .builder import build_trainer
from .cv import (ImageInstanceSegmentationTrainer,
ImagePortraitEnhancementTrainer)
from .multi_modal import CLIPTrainer
from .nlp import SequenceClassificationTrainer
from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer
from .trainer import EpochBasedTrainer

else:
_import_structure = {
'base': ['DummyTrainer'],
'builder': ['build_trainer'],
'cv': [
'ImageInstanceSegmentationTrainer',
'ImagePortraitEnhancementTrainer'
],
'multi_modal': ['CLIPTrainer'],
'nlp': ['SequenceClassificationTrainer'],
'nlp_trainer': ['NlpEpochBasedTrainer', 'VecoTrainer'],
'trainer': ['EpochBasedTrainer']
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 2
- 2
modelscope/trainers/builder.py View File

@@ -1,5 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.metainfo import Trainers
from modelscope.utils.config import ConfigDict
from modelscope.utils.constant import Tasks
from modelscope.utils.registry import Registry, build_from_cfg
@@ -8,7 +8,7 @@ TRAINERS = Registry('trainers')
HOOKS = Registry('hooks')


def build_trainer(name: str = 'EpochBasedTrainer', default_args: dict = None):
def build_trainer(name: str = Trainers.default, default_args: dict = None):
""" build trainer given a trainer name

Args:


+ 27
- 3
modelscope/trainers/cv/__init__.py View File

@@ -1,3 +1,27 @@
from .image_instance_segmentation_trainer import \
ImageInstanceSegmentationTrainer
from .image_portrait_enhancement_trainer import ImagePortraitEnhancementTrainer
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .image_instance_segmentation_trainer import \
ImageInstanceSegmentationTrainer
from .image_portrait_enhancement_trainer import ImagePortraitEnhancementTrainer

else:
_import_structure = {
'image_instance_segmentation_trainer':
['ImageInstanceSegmentationTrainer'],
'image_portrait_enhancement_trainer':
['ImagePortraitEnhancementTrainer'],
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 2
- 1
modelscope/trainers/cv/image_instance_segmentation_trainer.py View File

@@ -1,8 +1,9 @@
from modelscope.metainfo import Trainers
from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.trainer import EpochBasedTrainer


@TRAINERS.register_module(module_name='image-instance-segmentation')
@TRAINERS.register_module(module_name=Trainers.image_instance_segmentation)
class ImageInstanceSegmentationTrainer(EpochBasedTrainer):

def __init__(self, *args, **kwargs):


+ 2
- 1
modelscope/trainers/cv/image_portrait_enhancement_trainer.py View File

@@ -4,6 +4,7 @@ from collections.abc import Mapping
import torch
from torch import distributed as dist

from modelscope.metainfo import Trainers
from modelscope.trainers.builder import TRAINERS
from modelscope.trainers.optimizer.builder import build_optimizer
from modelscope.trainers.trainer import EpochBasedTrainer
@@ -11,7 +12,7 @@ from modelscope.utils.constant import ModeKeys
from modelscope.utils.logger import get_logger


@TRAINERS.register_module(module_name='gpen')
@TRAINERS.register_module(module_name=Trainers.image_portrait_enhancement)
class ImagePortraitEnhancementTrainer(EpochBasedTrainer):

def train_step(self, model, inputs):


+ 40
- 16
modelscope/trainers/hooks/__init__.py View File

@@ -1,18 +1,42 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .builder import HOOKS, build_hook
from .checkpoint_hook import BestCkptSaverHook, CheckpointHook
from .evaluation_hook import EvaluationHook
from .hook import Hook
from .iter_timer_hook import IterTimerHook
from .logger.text_logger_hook import TextLoggerHook
from .lr_scheduler_hook import LrSchedulerHook
from .optimizer_hook import (ApexAMPOptimizerHook, OptimizerHook,
TorchAMPOptimizerHook)
from .priority import Priority
from typing import TYPE_CHECKING

__all__ = [
'Hook', 'HOOKS', 'CheckpointHook', 'EvaluationHook', 'LrSchedulerHook',
'OptimizerHook', 'Priority', 'build_hook', 'TextLoggerHook',
'IterTimerHook', 'TorchAMPOptimizerHook', 'ApexAMPOptimizerHook',
'BestCkptSaverHook', 'NoneOptimizerHook', 'NoneLrSchedulerHook'
]
from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .builder import HOOKS, build_hook
from .checkpoint_hook import BestCkptSaverHook, CheckpointHook
from .evaluation_hook import EvaluationHook
from .hook import Hook
from .iter_timer_hook import IterTimerHook
from .logger import TextLoggerHook, TensorboardHook
from .lr_scheduler_hook import LrSchedulerHook
from .optimizer import (ApexAMPOptimizerHook, NoneOptimizerHook,
OptimizerHook, TorchAMPOptimizerHook)
from .priority import Priority, get_priority

else:
_import_structure = {
'builder': ['HOOKS', 'build_hook'],
'checkpoint_hook': ['BestCkptSaverHook', 'CheckpointHook'],
'evaluation_hook': ['EvaluationHook'],
'hook': ['Hook'],
'iter_timer_hook': ['IterTimerHook'],
'logger': ['TensorboardHook', 'TextLoggerHook'],
'lr_scheduler_hook': ['LrSchedulerHook'],
'optimizer_hook': [
'ApexAMPOptimizerHook', 'NoneOptimizerHook', 'OptimizerHook',
'TorchAMPOptimizerHook'
],
'priority': ['Priority', 'get']
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 3
- 2
modelscope/trainers/hooks/checkpoint_hook.py View File

@@ -2,6 +2,7 @@
import os

from modelscope import __version__
from modelscope.metainfo import Hooks
from modelscope.utils.checkpoint import save_checkpoint
from modelscope.utils.constant import LogKeys
from modelscope.utils.logger import get_logger
@@ -11,7 +12,7 @@ from .hook import Hook
from .priority import Priority


@HOOKS.register_module()
@HOOKS.register_module(module_name=Hooks.CheckpointHook)
class CheckpointHook(Hook):
"""Save checkpoints periodically.

@@ -98,7 +99,7 @@ class CheckpointHook(Hook):
return False


@HOOKS.register_module()
@HOOKS.register_module(module_name=Hooks.BestCkptSaverHook)
class BestCkptSaverHook(CheckpointHook):
"""Save best checkpoints hook.
Args:


+ 2
- 1
modelscope/trainers/hooks/evaluation_hook.py View File

@@ -1,9 +1,10 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.metainfo import Hooks
from .builder import HOOKS
from .hook import Hook


@HOOKS.register_module()
@HOOKS.register_module(module_name=Hooks.EvaluationHook)
class EvaluationHook(Hook):
"""Evaluation hook.
Args:


+ 2
- 1
modelscope/trainers/hooks/iter_timer_hook.py View File

@@ -1,13 +1,14 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import time

from modelscope.metainfo import Hooks
from modelscope.utils.constant import LogKeys
from .builder import HOOKS
from .hook import Hook
from .priority import Priority


@HOOKS.register_module()
@HOOKS.register_module(module_name=Hooks.IterTimerHook)
class IterTimerHook(Hook):
PRIORITY = Priority.LOW



+ 24
- 4
modelscope/trainers/hooks/logger/__init__.py View File

@@ -1,7 +1,27 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.trainers.utils.log_buffer import LogBuffer
from .base import LoggerHook
from .tensorboard_hook import TensorboardHook
from .text_logger_hook import TextLoggerHook
from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .base import LoggerHook
from .tensorboard_hook import TensorboardHook
from .text_logger_hook import TextLoggerHook

else:
_import_structure = {
'base': ['LoggerHook'],
'tensorboard_hook': ['TensorboardHook'],
'text_logger_hook': ['TextLoggerHook']
}

import sys

__all__ = ['TextLoggerHook', 'LoggerHook', 'LogBuffer', 'TensorboardHook']
sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 2
- 1
modelscope/trainers/hooks/logger/tensorboard_hook.py View File

@@ -1,13 +1,14 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os

from modelscope.metainfo import Hooks
from modelscope.trainers.hooks.builder import HOOKS
from modelscope.utils.constant import LogKeys
from modelscope.utils.torch_utils import master_only
from .base import LoggerHook


@HOOKS.register_module()
@HOOKS.register_module(module_name=Hooks.TensorboardHook)
class TensorboardHook(LoggerHook):
"""TensorBoard hook for visualization.
Args:


+ 2
- 1
modelscope/trainers/hooks/logger/text_logger_hook.py View File

@@ -8,13 +8,14 @@ import json
import torch
from torch import distributed as dist

from modelscope.metainfo import Hooks
from modelscope.trainers.hooks.builder import HOOKS
from modelscope.trainers.hooks.logger.base import LoggerHook
from modelscope.utils.constant import LogKeys, ModeKeys
from modelscope.utils.torch_utils import get_dist_info, is_master


@HOOKS.register_module()
@HOOKS.register_module(module_name=Hooks.TextLoggerHook)
class TextLoggerHook(LoggerHook):
"""Logger hook in text, Output log to both console and local json file.



+ 4
- 3
modelscope/trainers/hooks/lr_scheduler_hook.py View File

@@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.metainfo import Hooks
from modelscope.trainers.lrscheduler.builder import build_lr_scheduler
from modelscope.utils.constant import LogKeys
from modelscope.utils.logger import get_logger
@@ -8,7 +9,7 @@ from .hook import Hook
from .priority import Priority


@HOOKS.register_module()
@HOOKS.register_module(module_name=Hooks.LrSchedulerHook)
class LrSchedulerHook(Hook):
"""Lr scheduler.

@@ -78,7 +79,7 @@ class LrSchedulerHook(Hook):
return lr


@HOOKS.register_module()
@HOOKS.register_module(module_name=Hooks.PlateauLrSchedulerHook)
class PlateauLrSchedulerHook(LrSchedulerHook):
"""Lr scheduler hook for `ReduceLROnPlateau`.

@@ -119,7 +120,7 @@ class PlateauLrSchedulerHook(LrSchedulerHook):
trainer.lr_scheduler.step(metrics=metrics)


@HOOKS.register_module()
@HOOKS.register_module(module_name=Hooks.NoneLrSchedulerHook)
class NoneLrSchedulerHook(LrSchedulerHook):

PRIORITY = Priority.LOW # should be after EvaluationHook


+ 26
- 0
modelscope/trainers/hooks/optimizer/__init__.py View File

@@ -0,0 +1,26 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .apex_optimizer_hook import ApexAMPOptimizerHook
from .base import OptimizerHook, NoneOptimizerHook
from .torch_optimizer_hook import TorchAMPOptimizerHook

else:
_import_structure = {
'apex_optimizer_hook': ['ApexAMPOptimizerHook'],
'base': ['OptimizerHook', 'NoneOptimizerHook'],
'torch_optimizer_hook': ['TorchAMPOptimizerHook']
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 75
- 0
modelscope/trainers/hooks/optimizer/apex_optimizer_hook.py View File

@@ -0,0 +1,75 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import logging

from modelscope.metainfo import Hooks
from modelscope.trainers.hooks.builder import HOOKS
from .base import OptimizerHook


@HOOKS.register_module(module_name=Hooks.ApexAMPOptimizerHook)
class ApexAMPOptimizerHook(OptimizerHook):
"""Fp16 optimizer, if torch version is less than 1.6.0,
you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default
Args:
cumulative_iters (int): interval of gradients accumulation. Default: 1
grad_clip (dict): Default None. Containing keys:
max_norm (float or int): max norm of the gradients
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm.
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_`
loss_keys (str | list): keys list of loss
opt_level (str): "O0" and "O3" are not true mixed precision,
but they are useful for establishing accuracy and speed baselines, respectively.
"O1" and "O2" are different implementations of mixed precision.
Try both, and see what gives the best speedup and accuracy for your model.
"""

def __init__(self,
cumulative_iters=1,
grad_clip=None,
loss_keys='loss',
opt_level='O1'):

super(ApexAMPOptimizerHook, self).__init__(
grad_clip=grad_clip, loss_keys=loss_keys)
self.cumulative_iters = cumulative_iters
self.opt_level = opt_level

try:
from apex import amp
except ImportError:
raise ValueError(
'apex not installed, please install apex from https://www.github.com/nvidia/apex.'
)

def before_run(self, trainer):
from apex import amp

logging.info('open fp16')
# TODO: fix it should initialze amp with model not wrapper by DDP or DP
if hasattr(trainer.model, 'module'):
trainer.model, trainer.optimizer = amp.initialize(
trainer.model.module,
trainer.optimizer,
opt_level=self.opt_level)
else:
trainer.model, trainer.optimizer = amp.initialize(
trainer.model, trainer.optimizer, opt_level=self.opt_level)

trainer.optimizer.zero_grad()

def after_train_iter(self, trainer):
for k in self.loss_keys:
trainer.train_outputs[k] /= self.cumulative_iters

from apex import amp
for k in self.loss_keys:
with amp.scale_loss(trainer.train_outputs[k],
trainer.optimizer) as scaled_loss:
scaled_loss.backward()

if self.every_n_iters(trainer, self.cumulative_iters):
if self.grad_clip is not None:
self.clip_grads(trainer.model.parameters(), **self.grad_clip)

trainer.optimizer.step()
trainer.optimizer.zero_grad()

+ 73
- 0
modelscope/trainers/hooks/optimizer/base.py View File

@@ -0,0 +1,73 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import logging

from torch.nn.utils import clip_grad

from modelscope.metainfo import Hooks
from modelscope.trainers.hooks.builder import HOOKS
from modelscope.trainers.hooks.hook import Hook
from modelscope.trainers.hooks.priority import Priority


@HOOKS.register_module(module_name=Hooks.OptimizerHook)
class OptimizerHook(Hook):
"""Optimizer hook

Args:
cumulative_iters (int): interval of gradients accumulation. Default: 1
grad_clip (dict): Default None. Containing keys:
max_norm (float or int): max norm of the gradients
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm.
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_`
loss_keys (str | list): keys list of loss
"""

PRIORITY = Priority.ABOVE_NORMAL

def __init__(self,
cumulative_iters=1,
grad_clip=None,
loss_keys='loss') -> None:
if isinstance(loss_keys, str):
loss_keys = [loss_keys]
assert isinstance(loss_keys, (tuple, list))
self.loss_keys = loss_keys
self.cumulative_iters = cumulative_iters
self.grad_clip = grad_clip

def clip_grads(self, params, **clip_args):
params = list(
filter(lambda p: p.requires_grad and p.grad is not None, params))
if len(params) > 0:
return clip_grad.clip_grad_norm_(params, **clip_args)

def before_run(self, trainer):
trainer.optimizer.zero_grad()

def after_train_iter(self, trainer):
for k in self.loss_keys:
trainer.train_outputs[k] /= self.cumulative_iters
trainer.train_outputs[k].backward()

if self.every_n_iters(trainer, self.cumulative_iters):
if self.grad_clip is not None:
self.clip_grads(trainer.model.parameters(), **self.grad_clip)

trainer.optimizer.step()
trainer.optimizer.zero_grad()


@HOOKS.register_module(module_name=Hooks.NoneOptimizerHook)
class NoneOptimizerHook(OptimizerHook):

def __init__(self, cumulative_iters=1, grad_clip=None, loss_keys='loss'):

super(NoneOptimizerHook, self).__init__(
grad_clip=grad_clip, loss_keys=loss_keys)
self.cumulative_iters = cumulative_iters

def before_run(self, trainer):
return

def after_train_iter(self, trainer):
return

+ 83
- 0
modelscope/trainers/hooks/optimizer/torch_optimizer_hook.py View File

@@ -0,0 +1,83 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import logging

from modelscope.metainfo import Hooks
from modelscope.trainers.hooks.builder import HOOKS
from .base import OptimizerHook


@HOOKS.register_module(module_name=Hooks.TorchAMPOptimizerHook)
class TorchAMPOptimizerHook(OptimizerHook):
"""Fp16 optimizer, if torch version is less than 1.6.0,
you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default
Args:
cumulative_iters (int): interval of gradients accumulation. Default: 1
grad_clip (dict): Default None. Containing keys:
max_norm (float or int): max norm of the gradients
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm.
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_`
loss_keys (str | list): keys list of loss
loss_scale (float | dict): grade scale config. If loss_scale is a float,
static loss scaling will be used with the specified scale.
It can also be a dict containing arguments of GradScalar. For Pytorch >= 1.6,
we use official torch.cuda.amp.GradScaler.
please refer to: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler for the parameters.
"""

def __init__(self,
cumulative_iters=1,
grad_clip=None,
loss_keys='loss',
loss_scale={}):

super(TorchAMPOptimizerHook, self).__init__(
grad_clip=grad_clip, loss_keys=loss_keys)
self.cumulative_iters = cumulative_iters
self._scale_update_param = None

from torch.cuda import amp

if isinstance(loss_scale, float):
self._scale_update_param = loss_scale
self.scaler = amp.GradScaler(init_scale=loss_scale)
elif isinstance(loss_scale, dict):
self.scaler = amp.GradScaler(**loss_scale)
else:
raise ValueError(
'`loss_scale` type must be in [float, dict], but got {loss_scale}'
)

def before_run(self, trainer):
logging.info('open fp16')
trainer.optimizer.zero_grad()

if hasattr(trainer.model, 'module'):
self._ori_model_forward = trainer.model.module.forward
self._model = trainer.model.module
else:
self._ori_model_forward = trainer.model.forward
self._model = trainer.model

self.ori_model_forward = trainer.model.forward

def before_train_iter(self, trainer):
from torch.cuda import amp
setattr(self._model, 'forward', amp.autocast()(self._model.forward))

def after_train_iter(self, trainer):
for k in self.loss_keys:
trainer.train_outputs[k] /= self.cumulative_iters

for k in self.loss_keys:
self.scaler.scale(trainer.train_outputs[k]).backward()

if self.every_n_iters(trainer, self.cumulative_iters):
self.scaler.unscale_(trainer.optimizer)
if self.grad_clip is not None:
self.clip_grads(trainer.model.parameters(), **self.grad_clip)

self.scaler.step(trainer.optimizer)
self.scaler.update(self._scale_update_param)
trainer.optimizer.zero_grad()

setattr(self._model, 'forward', self._ori_model_forward)

+ 0
- 218
modelscope/trainers/hooks/optimizer_hook.py View File

@@ -1,218 +0,0 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import logging

from torch.nn.utils import clip_grad

from .builder import HOOKS
from .hook import Hook
from .priority import Priority


@HOOKS.register_module()
class OptimizerHook(Hook):
"""Optimizer hook

Args:
cumulative_iters (int): interval of gradients accumulation. Default: 1
grad_clip (dict): Default None. Containing keys:
max_norm (float or int): max norm of the gradients
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm.
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_`
loss_keys (str | list): keys list of loss
"""

PRIORITY = Priority.ABOVE_NORMAL

def __init__(self,
cumulative_iters=1,
grad_clip=None,
loss_keys='loss') -> None:
if isinstance(loss_keys, str):
loss_keys = [loss_keys]
assert isinstance(loss_keys, (tuple, list))
self.loss_keys = loss_keys
self.cumulative_iters = cumulative_iters
self.grad_clip = grad_clip

def clip_grads(self, params, **clip_args):
params = list(
filter(lambda p: p.requires_grad and p.grad is not None, params))
if len(params) > 0:
return clip_grad.clip_grad_norm_(params, **clip_args)

def before_run(self, trainer):
trainer.optimizer.zero_grad()

def after_train_iter(self, trainer):
for k in self.loss_keys:
trainer.train_outputs[k] /= self.cumulative_iters
trainer.train_outputs[k].backward()

if self.every_n_iters(trainer, self.cumulative_iters):
if self.grad_clip is not None:
self.clip_grads(trainer.model.parameters(), **self.grad_clip)

trainer.optimizer.step()
trainer.optimizer.zero_grad()


@HOOKS.register_module()
class TorchAMPOptimizerHook(OptimizerHook):
"""Fp16 optimizer, if torch version is less than 1.6.0,
you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default
Args:
cumulative_iters (int): interval of gradients accumulation. Default: 1
grad_clip (dict): Default None. Containing keys:
max_norm (float or int): max norm of the gradients
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm.
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_`
loss_keys (str | list): keys list of loss
loss_scale (float | dict): grade scale config. If loss_scale is a float,
static loss scaling will be used with the specified scale.
It can also be a dict containing arguments of GradScalar. For Pytorch >= 1.6,
we use official torch.cuda.amp.GradScaler.
please refer to: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler for the parameters.
"""

def __init__(self,
cumulative_iters=1,
grad_clip=None,
loss_keys='loss',
loss_scale={}):

super(TorchAMPOptimizerHook, self).__init__(
grad_clip=grad_clip, loss_keys=loss_keys)
self.cumulative_iters = cumulative_iters
self._scale_update_param = None

from torch.cuda import amp

if isinstance(loss_scale, float):
self._scale_update_param = loss_scale
self.scaler = amp.GradScaler(init_scale=loss_scale)
elif isinstance(loss_scale, dict):
self.scaler = amp.GradScaler(**loss_scale)
else:
raise ValueError(
'`loss_scale` type must be in [float, dict], but got {loss_scale}'
)

def before_run(self, trainer):
logging.info('open fp16')
trainer.optimizer.zero_grad()

if hasattr(trainer.model, 'module'):
self._ori_model_forward = trainer.model.module.forward
self._model = trainer.model.module
else:
self._ori_model_forward = trainer.model.forward
self._model = trainer.model

self.ori_model_forward = trainer.model.forward

def before_train_iter(self, trainer):
from torch.cuda import amp
setattr(self._model, 'forward', amp.autocast()(self._model.forward))

def after_train_iter(self, trainer):
for k in self.loss_keys:
trainer.train_outputs[k] /= self.cumulative_iters

for k in self.loss_keys:
self.scaler.scale(trainer.train_outputs[k]).backward()

if self.every_n_iters(trainer, self.cumulative_iters):
self.scaler.unscale_(trainer.optimizer)
if self.grad_clip is not None:
self.clip_grads(trainer.model.parameters(), **self.grad_clip)

self.scaler.step(trainer.optimizer)
self.scaler.update(self._scale_update_param)
trainer.optimizer.zero_grad()

setattr(self._model, 'forward', self._ori_model_forward)


@HOOKS.register_module()
class ApexAMPOptimizerHook(OptimizerHook):
"""Fp16 optimizer, if torch version is less than 1.6.0,
you must install apex (https://www.github.com/nvidia/apex) else use torch.cuda.amp by default
Args:
cumulative_iters (int): interval of gradients accumulation. Default: 1
grad_clip (dict): Default None. Containing keys:
max_norm (float or int): max norm of the gradients
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm.
More details please refer to `torch.nn.utils.clip_grad.clip_grad_norm_`
loss_keys (str | list): keys list of loss
opt_level (str): "O0" and "O3" are not true mixed precision,
but they are useful for establishing accuracy and speed baselines, respectively.
"O1" and "O2" are different implementations of mixed precision.
Try both, and see what gives the best speedup and accuracy for your model.
"""

def __init__(self,
cumulative_iters=1,
grad_clip=None,
loss_keys='loss',
opt_level='O1'):

super(ApexAMPOptimizerHook, self).__init__(
grad_clip=grad_clip, loss_keys=loss_keys)
self.cumulative_iters = cumulative_iters
self.opt_level = opt_level

try:
from apex import amp
except ImportError:
raise ValueError(
'apex not installed, please install apex from https://www.github.com/nvidia/apex.'
)

def before_run(self, trainer):
from apex import amp

logging.info('open fp16')
# TODO: fix it should initialze amp with model not wrapper by DDP or DP
if hasattr(trainer.model, 'module'):
trainer.model, trainer.optimizer = amp.initialize(
trainer.model.module,
trainer.optimizer,
opt_level=self.opt_level)
else:
trainer.model, trainer.optimizer = amp.initialize(
trainer.model, trainer.optimizer, opt_level=self.opt_level)

trainer.optimizer.zero_grad()

def after_train_iter(self, trainer):
for k in self.loss_keys:
trainer.train_outputs[k] /= self.cumulative_iters

from apex import amp
for k in self.loss_keys:
with amp.scale_loss(trainer.train_outputs[k],
trainer.optimizer) as scaled_loss:
scaled_loss.backward()

if self.every_n_iters(trainer, self.cumulative_iters):
if self.grad_clip is not None:
self.clip_grads(trainer.model.parameters(), **self.grad_clip)

trainer.optimizer.step()
trainer.optimizer.zero_grad()


@HOOKS.register_module()
class NoneOptimizerHook(OptimizerHook):

def __init__(self, cumulative_iters=1, grad_clip=None, loss_keys='loss'):

super(NoneOptimizerHook, self).__init__(
grad_clip=grad_clip, loss_keys=loss_keys)
self.cumulative_iters = cumulative_iters

def before_run(self, trainer):
return

def after_train_iter(self, trainer):
return

+ 23
- 6
modelscope/trainers/lrscheduler/__init__.py View File

@@ -1,8 +1,25 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .builder import LR_SCHEDULER, build_lr_scheduler
from .warmup import BaseWarmup, ConstantWarmup, ExponentialWarmup, LinearWarmup
from typing import TYPE_CHECKING

__all__ = [
'LR_SCHEDULER', 'build_lr_scheduler', 'BaseWarmup', 'ConstantWarmup',
'LinearWarmup', 'ExponentialWarmup'
]
from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .builder import LR_SCHEDULER, build_lr_scheduler
from .warmup import BaseWarmup, ConstantWarmup, ExponentialWarmup, LinearWarmup

else:
_import_structure = {
'builder': ['LR_SCHEDULER', 'build_lr_scheduler'],
'warmup':
['BaseWarmup', 'ConstantWarmup', 'ExponentialWarmup', 'LinearWarmup']
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 1
- 1
modelscope/trainers/lrscheduler/builder.py View File

@@ -4,7 +4,7 @@ import inspect
from modelscope.utils.config import ConfigDict
from modelscope.utils.registry import Registry, build_from_cfg, default_group

LR_SCHEDULER = Registry('lr scheduler')
LR_SCHEDULER = Registry('lr_scheduler')


def build_lr_scheduler(cfg: ConfigDict, default_args: dict = None):


+ 23
- 3
modelscope/trainers/lrscheduler/warmup/__init__.py View File

@@ -1,5 +1,25 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .base import BaseWarmup
from .warmup import ConstantWarmup, ExponentialWarmup, LinearWarmup

__all__ = ['BaseWarmup', 'ConstantWarmup', 'LinearWarmup', 'ExponentialWarmup']
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .base import BaseWarmup
from .warmup import ConstantWarmup, ExponentialWarmup, LinearWarmup

else:
_import_structure = {
'base': ['BaseWarmup'],
'warmup': ['ConstantWarmup', 'ExponentialWarmup', 'LinearWarmup']
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 4
- 3
modelscope/trainers/lrscheduler/warmup/warmup.py View File

@@ -1,9 +1,10 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.metainfo import LR_Schedulers
from modelscope.trainers.lrscheduler.builder import LR_SCHEDULER
from .base import BaseWarmup


@LR_SCHEDULER.register_module()
@LR_SCHEDULER.register_module(module_name=LR_Schedulers.ConstantWarmup)
class ConstantWarmup(BaseWarmup):
"""Linear warmup scheduler.

@@ -29,7 +30,7 @@ class ConstantWarmup(BaseWarmup):
return self.warmup_ratio


@LR_SCHEDULER.register_module()
@LR_SCHEDULER.register_module(module_name=LR_Schedulers.LinearWarmup)
class LinearWarmup(BaseWarmup):
"""Linear warmup scheduler.

@@ -54,7 +55,7 @@ class LinearWarmup(BaseWarmup):
return 1 - k


@LR_SCHEDULER.register_module()
@LR_SCHEDULER.register_module(module_name=LR_Schedulers.ExponentialWarmup)
class ExponentialWarmup(BaseWarmup):
"""Exponential warmup scheduler.



+ 20
- 1
modelscope/trainers/multi_modal/__init__.py View File

@@ -1 +1,20 @@
from .clip import CLIPTrainer
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .clip import CLIPTrainer

else:
_import_structure = {'clip': ['CLIPTrainer']}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 22
- 1
modelscope/trainers/nlp/__init__.py View File

@@ -1 +1,22 @@
from .sequence_classification_trainer import SequenceClassificationTrainer
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .sequence_classification_trainer import SequenceClassificationTrainer

else:
_import_structure = {
'sequence_classification_trainer': ['SequenceClassificationTrainer']
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 2
- 1
modelscope/trainers/nlp/sequence_classification_trainer.py View File

@@ -3,6 +3,7 @@ from typing import Dict, Optional, Tuple, Union

import numpy as np

from modelscope.metainfo import Trainers
from modelscope.trainers.base import BaseTrainer
from modelscope.trainers.builder import TRAINERS
from modelscope.utils.logger import get_logger
@@ -11,7 +12,7 @@ PATH = None
logger = get_logger(PATH)


@TRAINERS.register_module(module_name=r'bert-sentiment-analysis')
@TRAINERS.register_module(module_name=Trainers.bert_sentiment_analysis)
class SequenceClassificationTrainer(BaseTrainer):

def __init__(self, cfg_file: str, *args, **kwargs):


+ 3
- 2
modelscope/trainers/nlp_trainer.py View File

@@ -6,6 +6,7 @@ from torch import nn
from torch.utils.data import Dataset

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers
from modelscope.metrics.builder import build_metric
from modelscope.models.base import Model, TorchModel
from modelscope.msdatasets import MsDataset
@@ -17,7 +18,7 @@ from .base import TRAINERS
from .trainer import EpochBasedTrainer


@TRAINERS.register_module(module_name='NlpEpochBasedTrainer')
@TRAINERS.register_module(module_name=Trainers.nlp_base_trainer)
class NlpEpochBasedTrainer(EpochBasedTrainer):

def __init__(
@@ -142,7 +143,7 @@ class NlpEpochBasedTrainer(EpochBasedTrainer):
return build_preprocessor(cfg, Tasks.find_field_by_task(self.cfg.task))


@TRAINERS.register_module(module_name='VecoTrainer')
@TRAINERS.register_module(module_name=Trainers.nlp_veco_trainer)
class VecoTrainer(NlpEpochBasedTrainer):

def evaluate(self, checkpoint_path=None):


+ 2
- 1
modelscope/trainers/trainer.py View File

@@ -17,6 +17,7 @@ from torch.utils.data import DataLoader, Dataset
from torch.utils.data.distributed import DistributedSampler

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers
from modelscope.metrics import build_metric, task_default_metrics
from modelscope.models.base import Model, TorchModel
from modelscope.msdatasets.ms_dataset import MsDataset
@@ -45,7 +46,7 @@ from .parallel.builder import build_parallel
from .parallel.utils import is_parallel


@TRAINERS.register_module()
@TRAINERS.register_module(module_name=Trainers.default)
class EpochBasedTrainer(BaseTrainer):
"""Epoch based Trainer, a training helper for PyTorch.



+ 19
- 10
modelscope/utils/ast_utils.py View File

@@ -5,6 +5,7 @@ import importlib
import os
import os.path as osp
import time
import traceback
from functools import reduce
from typing import Generator, Union

@@ -13,8 +14,9 @@ import json

from modelscope import __version__
from modelscope.fileio.file import LocalStorage
from modelscope.metainfo import (Heads, Metrics, Models, Pipelines,
Preprocessors, TaskModels, Trainers)
from modelscope.metainfo import (Heads, Hooks, LR_Schedulers, Metrics, Models,
Optimizers, Pipelines, Preprocessors,
TaskModels, Trainers)
from modelscope.utils.constant import Fields, Tasks
from modelscope.utils.file_utils import get_default_cache_dir
from modelscope.utils.logger import get_logger
@@ -28,7 +30,8 @@ MODELSCOPE_PATH = '/'.join(os.path.dirname(__file__).split('/')[:-1])
REGISTER_MODULE = 'register_module'
IGNORED_PACKAGES = ['modelscope', '.']
SCAN_SUB_FOLDERS = [
'models', 'metrics', 'pipelines', 'preprocessors', 'task_datasets'
'models', 'metrics', 'pipelines', 'preprocessors', 'task_datasets',
'trainers'
]
INDEXER_FILE = 'ast_indexer'
DECORATOR_KEY = 'decorators'
@@ -305,9 +308,11 @@ class AstScaning(object):
output = [functions[0]]

if len(args_list) == 0 and len(keyword_list) == 0:
args_list.append(None)
args_list.append(default_group)
if len(keyword_list) == 0 and len(args_list) == 1:
args_list.append(None)
if len(keyword_list) == 1 and len(args_list) == 0:
args_list.append(default_group)

args_list.extend(keyword_list)

@@ -318,6 +323,8 @@ class AstScaning(object):
# the case (default_group)
elif item[1] is None:
output.append(item[0])
elif isinstance(item, str):
output.append(item)
else:
output.append('.'.join(item))
return (output[0], self._get_registry_value(output[1]),
@@ -443,9 +450,11 @@ class FilesAstScaning(object):
try:
output = self.astScaner.generate_ast(file)
except Exception as e:
detail = traceback.extract_tb(e.__traceback__)
raise Exception(
'During ast indexing, there are index errors in the '
f'file {file} : {type(e).__name__}.{e}')
f'During ast indexing, error is in the file {detail[-1].filename}'
f' line: {detail[-1].lineno}: "{detail[-1].line}" with error msg: '
f'"{type(e).__name__}: {e}"')

import_list = self.parse_import(output)
return output[DECORATOR_KEY], import_list
@@ -523,14 +532,14 @@ class FilesAstScaning(object):
return md5.hexdigest()


fileScaner = FilesAstScaning()
file_scanner = FilesAstScaning()


def _save_index(index, file_path):
# convert tuple key to str key
index[INDEX_KEY] = {str(k): v for k, v in index[INDEX_KEY].items()}
index[VERSION_KEY] = __version__
index[MD5_KEY] = fileScaner.files_mtime_md5()
index[MD5_KEY] = file_scanner.files_mtime_md5()
json_index = json.dumps(index)
storage.write(json_index.encode(), file_path)
index[INDEX_KEY] = {
@@ -579,7 +588,7 @@ def load_index(force_rebuild=False):
index = None
if not force_rebuild and os.path.exists(file_path):
wrapped_index = _load_index(file_path)
md5 = fileScaner.files_mtime_md5()
md5 = file_scanner.files_mtime_md5()
if (wrapped_index[VERSION_KEY] == __version__
and wrapped_index[MD5_KEY] == md5):
index = wrapped_index
@@ -591,7 +600,7 @@ def load_index(force_rebuild=False):
logger.info(
f'No valid ast index found from {file_path}, rebuilding ast index!'
)
index = fileScaner.get_files_scan_results()
index = file_scanner.get_files_scan_results()
_save_index(index, file_path)
return index



+ 2
- 0
requirements/multi-modal.txt View File

@@ -7,4 +7,6 @@ pycocotools>=2.0.4
# which introduced compatability issues that are being investigated
rouge_score<=0.0.4
timm
tokenizers
torchvision
transformers>=4.12.0

+ 2
- 0
requirements/nlp.txt View File

@@ -6,3 +6,5 @@ pai-easynlp
rouge_score<=0.0.4
seqeval
spacy>=2.3.5
tokenizers
transformers>=4.12.0

+ 0
- 2
requirements/runtime.txt View File

@@ -13,7 +13,5 @@ requests
scipy
setuptools
tensorboard
tokenizers
tqdm>=4.64.0
transformers>=4.12.0
yapf

+ 2
- 1
tests/trainers/hooks/logger/test_tensorboard_hook.py View File

@@ -10,6 +10,7 @@ import numpy as np
import torch
from torch import nn

from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from modelscope.utils.constant import LogKeys, ModelFile
from modelscope.utils.test_utils import create_dummy_test_dataset
@@ -73,7 +74,7 @@ class TensorboardHookTest(unittest.TestCase):
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=DummyModel(),


+ 3
- 2
tests/trainers/hooks/test_checkpoint_hook.py View File

@@ -9,6 +9,7 @@ import numpy as np
import torch
from torch import nn

from modelscope.metainfo import Trainers
from modelscope.metrics.builder import METRICS, MetricKeys
from modelscope.trainers import build_trainer
from modelscope.utils.constant import LogKeys, ModelFile
@@ -108,7 +109,7 @@ class CheckpointHookTest(unittest.TestCase):
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=DummyModel(),
@@ -179,7 +180,7 @@ class BestCkptSaverHookTest(unittest.TestCase):
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=DummyModel(),


+ 2
- 1
tests/trainers/hooks/test_evaluation_hook.py View File

@@ -9,6 +9,7 @@ import numpy as np
import torch
from torch import nn

from modelscope.metainfo import Trainers
from modelscope.metrics.builder import METRICS, MetricKeys
from modelscope.trainers import build_trainer
from modelscope.utils.constant import LogKeys, ModelFile
@@ -97,7 +98,7 @@ class EvaluationHookTest(unittest.TestCase):
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=DummyModel(),


+ 4
- 3
tests/trainers/hooks/test_lr_scheduler_hook.py View File

@@ -11,6 +11,7 @@ from torch import nn
from torch.optim import SGD
from torch.optim.lr_scheduler import MultiStepLR, ReduceLROnPlateau

from modelscope.metainfo import Trainers
from modelscope.metrics.builder import METRICS, MetricKeys
from modelscope.trainers import build_trainer
from modelscope.utils.constant import LogKeys, ModelFile, TrainerStages
@@ -89,7 +90,7 @@ class LrSchedulerHookTest(unittest.TestCase):
model = DummyModel()
optimizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4])
trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=model,
@@ -161,7 +162,7 @@ class LrSchedulerHookTest(unittest.TestCase):
model = DummyModel()
# optimmizer = SGD(model.parameters(), lr=0.01)
# lr_scheduler = MultiStepLR(optimmizer, milestones=[2, 4])
trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=model,
@@ -258,7 +259,7 @@ class PlateauLrSchedulerHookTest(unittest.TestCase):

model = DummyModel()
optimizer = SGD(model.parameters(), lr=0.01)
trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=model,


+ 3
- 2
tests/trainers/hooks/test_optimizer_hook.py View File

@@ -11,6 +11,7 @@ from torch import nn
from torch.optim import SGD
from torch.optim.lr_scheduler import MultiStepLR

from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from modelscope.utils.constant import ModelFile, TrainerStages
from modelscope.utils.test_utils import create_dummy_test_dataset
@@ -64,7 +65,7 @@ class OptimizerHookTest(unittest.TestCase):
model = DummyModel()
optimizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = MultiStepLR(optimizer, milestones=[1, 2])
trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=model,
@@ -130,7 +131,7 @@ class TorchAMPOptimizerHookTest(unittest.TestCase):
model = DummyModel().cuda()
optimizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = MultiStepLR(optimizer, milestones=[1, 2])
trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=model,


+ 2
- 1
tests/trainers/hooks/test_timer_hook.py View File

@@ -11,6 +11,7 @@ from torch import nn
from torch.optim import SGD
from torch.optim.lr_scheduler import MultiStepLR

from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from modelscope.utils.constant import LogKeys, ModelFile, TrainerStages
from modelscope.utils.test_utils import create_dummy_test_dataset
@@ -68,7 +69,7 @@ class IterTimerHookTest(unittest.TestCase):
model = DummyModel()
optimizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4])
trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=model,


+ 3
- 2
tests/trainers/test_finetune_sequence_classification.py View File

@@ -4,6 +4,7 @@ import shutil
import tempfile
import unittest

from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer


@@ -23,7 +24,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase):
model_id,
train_dataset,
eval_dataset,
name='NlpEpochBasedTrainer',
name=Trainers.nlp_base_trainer,
cfg_modify_fn=None,
**kwargs):
kwargs = dict(
@@ -236,7 +237,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase):
'damo/nlp_veco_fill-mask-large',
train_datasets,
eval_datasets,
name='VecoTrainer',
name=Trainers.nlp_veco_trainer,
cfg_modify_fn=cfg_modify_fn)




+ 2
- 1
tests/trainers/test_finetune_token_classificatin.py View File

@@ -5,6 +5,7 @@ import tempfile
import unittest
from functools import reduce

from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from modelscope.utils.test_utils import test_level

@@ -25,7 +26,7 @@ class TestFinetuneTokenClassification(unittest.TestCase):
model_id,
train_dataset,
eval_dataset,
name='NlpEpochBasedTrainer',
name=Trainers.nlp_base_trainer,
cfg_modify_fn=None,
**kwargs):
kwargs = dict(


+ 3
- 2
tests/trainers/test_image_instance_segmentation_trainer.py View File

@@ -7,6 +7,7 @@ import zipfile
from functools import partial

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers
from modelscope.models.cv.image_instance_segmentation import (
CascadeMaskRCNNSwinModel, ImageInstanceSegmentationCocoDataset)
from modelscope.trainers import build_trainer
@@ -79,7 +80,7 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase):
work_dir=self.tmp_dir)

trainer = build_trainer(
name='image-instance-segmentation', default_args=kwargs)
name=Trainers.image_instance_segmentation, default_args=kwargs)
trainer.train()
results_files = os.listdir(self.tmp_dir)
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
@@ -103,7 +104,7 @@ class TestImageInstanceSegmentationTrainer(unittest.TestCase):
work_dir=self.tmp_dir)

trainer = build_trainer(
name='image-instance-segmentation', default_args=kwargs)
name=Trainers.image_instance_segmentation, default_args=kwargs)
trainer.train()
results_files = os.listdir(self.tmp_dir)
self.assertIn(f'{trainer.timestamp}.log.json', results_files)


+ 5
- 2
tests/trainers/test_image_portrait_enhancement_trainer.py View File

@@ -11,6 +11,7 @@ import torch
from torch.utils import data as data

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers
from modelscope.models.cv.image_portrait_enhancement import \
ImagePortraitEnhancement
from modelscope.trainers import build_trainer
@@ -91,7 +92,8 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase):
device='gpu',
work_dir=self.tmp_dir)

trainer = build_trainer(name='gpen', default_args=kwargs)
trainer = build_trainer(
name=Trainers.image_portrait_enhancement, default_args=kwargs)
trainer.train()

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
@@ -111,7 +113,8 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase):
max_epochs=2,
work_dir=self.tmp_dir)

trainer = build_trainer(name='gpen', default_args=kwargs)
trainer = build_trainer(
name=Trainers.image_portrait_enhancement, default_args=kwargs)
trainer.train()




+ 3
- 2
tests/trainers/test_text_generation_trainer.py View File

@@ -5,6 +5,7 @@ import tempfile
import unittest

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Trainers
from modelscope.models.nlp.palm_v2 import PalmForTextGeneration
from modelscope.msdatasets import MsDataset
from modelscope.trainers import build_trainer
@@ -57,7 +58,7 @@ class TestTextGenerationTrainer(unittest.TestCase):
work_dir=self.tmp_dir)

trainer = build_trainer(
name='NlpEpochBasedTrainer', default_args=kwargs)
name=Trainers.nlp_base_trainer, default_args=kwargs)
trainer.train()
results_files = os.listdir(self.tmp_dir)
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
@@ -122,7 +123,7 @@ class TestTextGenerationTrainer(unittest.TestCase):
cfg_modify_fn=cfg_modify_fn,
model_revision='beta')
trainer = build_trainer(
name='NlpEpochBasedTrainer', default_args=kwargs)
name=Trainers.nlp_base_trainer, default_args=kwargs)
trainer.train()




+ 7
- 6
tests/trainers/test_trainer.py View File

@@ -13,6 +13,7 @@ from torch import nn
from torch.optim import SGD
from torch.optim.lr_scheduler import StepLR

from modelscope.metainfo import Trainers
from modelscope.metrics.builder import MetricKeys
from modelscope.msdatasets import MsDataset
from modelscope.trainers import build_trainer
@@ -101,14 +102,14 @@ class TrainerTest(unittest.TestCase):
'workers_per_gpu': 1,
'shuffle': False
},
'metrics': ['seq_cls_metric']
'metrics': ['seq-cls-metric']
}
}
config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=DummyModel(),
@@ -155,7 +156,7 @@ class TrainerTest(unittest.TestCase):
'workers_per_gpu': 1,
'shuffle': False
},
'metrics': ['seq_cls_metric']
'metrics': ['seq-cls-metric']
}
}

@@ -166,7 +167,7 @@ class TrainerTest(unittest.TestCase):
model = DummyModel()
optimmizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = StepLR(optimmizer, 2)
trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=model,
@@ -205,7 +206,7 @@ class TrainerTest(unittest.TestCase):
'workers_per_gpu': 1,
'shuffle': False
},
'metrics': ['seq_cls_metric']
'metrics': ['seq-cls-metric']
}
}

@@ -216,7 +217,7 @@ class TrainerTest(unittest.TestCase):
model = DummyModel()
optimmizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = StepLR(optimmizer, 2)
trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=model,


+ 3
- 2
tests/trainers/test_trainer_gpu.py View File

@@ -12,8 +12,9 @@ from torch import nn
from torch.optim import SGD
from torch.optim.lr_scheduler import StepLR

from modelscope.metainfo import Trainers
from modelscope.metrics.builder import MetricKeys
from modelscope.trainers import build_trainer
from modelscope.trainers import EpochBasedTrainer, build_trainer
from modelscope.utils.constant import LogKeys, ModeKeys, ModelFile
from modelscope.utils.test_utils import (DistributedTestCase,
create_dummy_test_dataset, test_level)
@@ -70,7 +71,7 @@ def train_func(work_dir, dist=False):
model = DummyModel()
optimmizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = StepLR(optimmizer, 2)
trainer_name = 'EpochBasedTrainer'
trainer_name = Trainers.default
kwargs = dict(
cfg_file=config_path,
model=model,


Loading…
Cancel
Save