Browse Source

[to #43112534] finetune support and first case

co-contributed with 夕陌&雨泓

 * add torch epoch based trainer and dis utils
 * add hooks including optimizer, lrscheduler, logging, checkpoint, evaluation, time profiling
 * add torch mdoel base and test
 * add optimizer and lrscheduler module
 * add sbert for text classification example
 * add task_dataset for dataset-level processor

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9338412
master
wenmeng.zwm 3 years ago
parent
commit
231f400133
91 changed files with 3936 additions and 68 deletions
  1. +175
    -0
      configs/cv/configuration.json
  2. +77
    -0
      configs/nlp/sbert_sentence_similarity.json
  3. +2
    -2
      docs/source/api/modelscope.pipelines.rst
  4. +13
    -0
      modelscope/metainfo.py
  5. +3
    -0
      modelscope/metrics/__init__.py
  6. +37
    -0
      modelscope/metrics/base.py
  7. +35
    -0
      modelscope/metrics/builder.py
  8. +40
    -0
      modelscope/metrics/sequence_classification_metric.py
  9. +23
    -0
      modelscope/models/base_torch.py
  10. +2
    -2
      modelscope/models/multi_modal/clip/clip_model.py
  11. +1
    -1
      modelscope/models/multi_modal/image_captioning_model.py
  12. +12
    -1
      modelscope/models/nlp/sbert_for_sequence_classification.py
  13. +1
    -0
      modelscope/outputs.py
  14. +1
    -1
      modelscope/pipelines/audio/ans_pipeline.py
  15. +1
    -1
      modelscope/pipelines/audio/linear_aec_pipeline.py
  16. +1
    -1
      modelscope/pipelines/audio/text_to_speech_pipeline.py
  17. +1
    -1
      modelscope/pipelines/base.py
  18. +1
    -1
      modelscope/pipelines/cv/action_recognition_pipeline.py
  19. +1
    -1
      modelscope/pipelines/cv/animal_recog_pipeline.py
  20. +1
    -1
      modelscope/pipelines/cv/cmdssl_video_embedding_pipleline.py
  21. +1
    -1
      modelscope/pipelines/cv/image_cartoon_pipeline.py
  22. +1
    -1
      modelscope/pipelines/cv/image_matting_pipeline.py
  23. +1
    -1
      modelscope/pipelines/cv/ocr_detection_pipeline.py
  24. +1
    -1
      modelscope/pipelines/multi_modal/text_to_image_synthesis_pipeline.py
  25. +1
    -1
      modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py
  26. +1
    -1
      modelscope/pipelines/nlp/dialog_modeling_pipeline.py
  27. +1
    -1
      modelscope/pipelines/nlp/dialog_state_tracking_pipeline.py
  28. +1
    -1
      modelscope/pipelines/nlp/fill_mask_pipeline.py
  29. +1
    -1
      modelscope/pipelines/nlp/nli_pipeline.py
  30. +1
    -1
      modelscope/pipelines/nlp/sentence_similarity_pipeline.py
  31. +1
    -1
      modelscope/pipelines/nlp/sentiment_classification_pipeline.py
  32. +1
    -1
      modelscope/pipelines/nlp/sequence_classification_pipeline.py
  33. +1
    -1
      modelscope/pipelines/nlp/text_generation_pipeline.py
  34. +1
    -1
      modelscope/pipelines/nlp/translation_pipeline.py
  35. +1
    -1
      modelscope/pipelines/nlp/word_segmentation_pipeline.py
  36. +1
    -1
      modelscope/pipelines/nlp/zero_shot_classification_pipeline.py
  37. +28
    -2
      modelscope/preprocessors/nlp.py
  38. +3
    -0
      modelscope/task_datasets/__init__.py
  39. +48
    -0
      modelscope/task_datasets/base.py
  40. +21
    -0
      modelscope/task_datasets/builder.py
  41. +63
    -0
      modelscope/task_datasets/torch_base_dataset.py
  42. +1
    -0
      modelscope/trainers/__init__.py
  43. +4
    -0
      modelscope/trainers/base.py
  44. +2
    -3
      modelscope/trainers/builder.py
  45. +16
    -0
      modelscope/trainers/hooks/__init__.py
  46. +9
    -0
      modelscope/trainers/hooks/builder.py
  47. +92
    -0
      modelscope/trainers/hooks/checkpoint_hook.py
  48. +74
    -0
      modelscope/trainers/hooks/evaluation_hook.py
  49. +208
    -0
      modelscope/trainers/hooks/hook.py
  50. +22
    -0
      modelscope/trainers/hooks/iter_timer_hook.py
  51. +6
    -0
      modelscope/trainers/hooks/logger/__init__.py
  52. +115
    -0
      modelscope/trainers/hooks/logger/base.py
  53. +159
    -0
      modelscope/trainers/hooks/logger/text_logger_hook.py
  54. +71
    -0
      modelscope/trainers/hooks/lr_scheduler_hook.py
  55. +37
    -0
      modelscope/trainers/hooks/optimizer_hook.py
  56. +62
    -0
      modelscope/trainers/hooks/priority.py
  57. +8
    -0
      modelscope/trainers/lrscheduler/__init__.py
  58. +47
    -0
      modelscope/trainers/lrscheduler/builder.py
  59. +5
    -0
      modelscope/trainers/lrscheduler/warmup/__init__.py
  60. +75
    -0
      modelscope/trainers/lrscheduler/warmup/base.py
  61. +79
    -0
      modelscope/trainers/lrscheduler/warmup/warmup.py
  62. +4
    -0
      modelscope/trainers/optimizer/__init__.py
  63. +39
    -0
      modelscope/trainers/optimizer/builder.py
  64. +703
    -0
      modelscope/trainers/trainer.py
  65. +208
    -0
      modelscope/trainers/utils/inference.py
  66. +42
    -0
      modelscope/trainers/utils/log_buffer.py
  67. +74
    -0
      modelscope/utils/checkpoint.py
  68. +49
    -6
      modelscope/utils/constant.py
  69. +13
    -0
      modelscope/utils/hub.py
  70. +32
    -0
      modelscope/utils/import_utils.py
  71. +77
    -0
      modelscope/utils/tensor_utils.py
  72. +127
    -0
      modelscope/utils/torch_utils.py
  73. +0
    -3
      output.wav
  74. +0
    -0
      tests/models/__init__.py
  75. +60
    -0
      tests/models/test_base_torch.py
  76. +1
    -1
      tests/pipelines/test_base.py
  77. +1
    -1
      tests/pipelines/test_image_captioning.py
  78. +1
    -1
      tests/pipelines/test_image_matting.py
  79. +1
    -1
      tests/pipelines/test_person_image_cartoon.py
  80. +1
    -1
      tests/pipelines/test_text_to_image_synthesis.py
  81. +1
    -1
      tests/pipelines/test_text_to_speech.py
  82. +0
    -0
      tests/trainers/hooks/__init__.py
  83. +108
    -0
      tests/trainers/hooks/test_checkpoint_hook.py
  84. +188
    -0
      tests/trainers/hooks/test_lr_scheduler_hook.py
  85. +128
    -0
      tests/trainers/hooks/test_timer_hook.py
  86. +0
    -0
      tests/trainers/lrscheduler/__init__.py
  87. +0
    -0
      tests/trainers/lrscheduler/warmup/__init__.py
  88. +79
    -0
      tests/trainers/lrscheduler/warmup/test_warmup_base.py
  89. +209
    -0
      tests/trainers/test_trainer.py
  90. +0
    -19
      tests/trainers/test_trainer_base.py
  91. +91
    -0
      tests/trainers/test_trainer_with_nlp.py

+ 175
- 0
configs/cv/configuration.json View File

@@ -0,0 +1,175 @@
{
"framework": "pytorch",

"task": "image_classification",
"work_dir": "./work_dir",

"model": {
"type": "classification",
"pretrained": null,
"backbone": {
"type": "ResNet",
"depth": 50,
"out_indices": [
4
],
"norm_cfg": {
"type": "BN"
}
},
"head": {
"type": "ClsHead",
"with_avg_pool": true,
"in_channels": 2048,
"loss_config": {
"type": "CrossEntropyLossWithLabelSmooth",
"label_smooth": 0
},
"num_classes": 1000
}
},

"dataset": {
"train": {
"type": "ClsDataset",
"data_source": {
"list_file": "data/imagenet_raw/meta/train_labeled.txt",
"root": "data/imagenet_raw/train/",
"type": "ClsSourceImageList"
}
},
"val": {
"type": "ClsDataset",
"data_source": {
"list_file": "data/imagenet_raw/meta/val_labeled.txt",
"root": "data/imagenet_raw/validation/",
"type": "ClsSourceImageList"
}
},
"test": {}
},


"preprocessor":{
"train": [
{
"type": "RandomResizedCrop",
"size": 224
},
{
"type": "RandomHorizontalFlip"
},
{
"type": "ToTensor"
},
{
"type": "Normalize",
"mean": [
0.485,
0.456,
0.406
],
"std": [
0.229,
0.224,
0.225
]
},
{
"type": "Collect",
"keys": [
"img",
"gt_labels"
]
}
],
"val": [
{
"type": "Resize",
"size": 256
},
{
"type": "CenterCrop",
"size": 224
},
{
"type": "ToTensor"
},
{
"type": "Normalize",
"mean": [
0.485,
0.456,
0.406
],
"std": [
0.229,
0.224,
0.225
]
},
{
"type": "Collect",
"keys": [
"img",
"gt_labels"
]
}
]
},

"train": {
"dataloader": {
"batch_size_per_gpu": 2,
"workers_per_gpu": 1
},
"optimizer": {
"type": "SGD",
"lr": 0.01,
"options": {
"grad_clip": {
"max_norm": 2.0
}
}
},
"lr_scheduler": {
"type": "StepLR",
"step_size": 2,
"options": {
"warmup": {
"type": "LinearWarmup",
"warmup_iters": 2

}
}
},
"hooks":
[
{
"type": "CheckpointHook",
"interval": 2
},
{
"type": "TextLoggerHook",
"interval": 1
},
{
"type": "IterTimerHook"
},
{
"type": "EvaluationHook",
"interval": 1
}
]
},

"evaluation": {
"dataloader": {
"batch_size_per_gpu": 2,
"workers_per_gpu": 1,
"shuffle": false
},
"metrics": ["accuracy", "precision", "recall"]
}

}

+ 77
- 0
configs/nlp/sbert_sentence_similarity.json View File

@@ -0,0 +1,77 @@
{
"task": "sentence-similarity",
"preprocessor": {
"type": "bert-seq-cls-tokenizer-finetune",
"first_sequence": "sentence1",
"second_sequence": "sentence2"
},
"model": {
"type": "structbert",
"attention_probs_dropout_prob": 0.1,
"easynlp_version": "0.0.3",
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"transformers_version": "4.6.0.dev0",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 30522
},
"pipeline": {
"type": "sentence-similarity"
},
"work_dir": "/tmp",
"train": {
"dataloader": {
"batch_size_per_gpu": 2,
"workers_per_gpu": 1
},
"optimizer": {
"type": "SGD",
"lr": 0.01,
"options": {
"grad_clip": {
"max_norm": 2.0
}
}
},
"lr_scheduler": {
"type": "StepLR",
"step_size": 2,
"options": {
"warmup": {
"type": "LinearWarmup",
"warmup_iters": 2
}
}
},
"hooks": [{
"type": "CheckpointHook",
"interval": 1
}, {
"type": "TextLoggerHook",
"interval": 1
}, {
"type": "IterTimerHook"
}, {
"type": "EvaluationHook",
"interval": 1
}]
},
"evaluation": {
"dataloader": {
"batch_size_per_gpu": 2,
"workers_per_gpu": 1,
"shuffle": false
}
}
}

+ 2
- 2
docs/source/api/modelscope.pipelines.rst View File

@@ -36,10 +36,10 @@ modelscope.pipelines.base module
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


modelscope.pipelines.outputs module
modelscope.outputs module
----------------------------------- -----------------------------------


.. automodule:: modelscope.pipelines.outputs
.. automodule:: modelscope.outputs
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


+ 13
- 0
modelscope/metainfo.py View File

@@ -127,3 +127,16 @@ class Preprocessors(object):
# multi-modal # multi-modal
ofa_image_caption = 'ofa-image-caption' ofa_image_caption = 'ofa-image-caption'
mplug_visual_question_answering = 'mplug-visual-question-answering' mplug_visual_question_answering = 'mplug-visual-question-answering'


class Metrics(object):
""" Names for different metrics.
"""

# accuracy
accuracy = 'accuracy'

# metrics for sequence classification task
seq_cls_metric = 'seq_cls_metric'
# metrics for token-classification task
token_cls_metric = 'token-cls-metric'

+ 3
- 0
modelscope/metrics/__init__.py View File

@@ -0,0 +1,3 @@
from .base import Metric
from .builder import METRICS, build_metric, task_default_metrics
from .sequence_classification_metric import SequenceClassificationMetric

+ 37
- 0
modelscope/metrics/base.py View File

@@ -0,0 +1,37 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from abc import ABC, abstractmethod
from typing import Dict


class Metric(ABC):
"""The metric base class for computing metrics.

The subclasses can either compute a single metric like 'accuracy', or compute the
complex metrics for a specific task with or without other Metric subclasses.
"""

@abstractmethod
def add(self, outputs: Dict, inputs: Dict):
""" Append logits and labels within an eval loop.

Will be called after every batch finished to gather the model predictions and the labels.

Args:
outputs: The model prediction outputs.
inputs: The mini batch inputs from the dataloader.

Returns: None

"""
pass

@abstractmethod
def evaluate(self):
"""Evaluate the metrics after the eval finished.

Will be called after the whole validation finished.

Returns: The actual metric dict with standard names.

"""
pass

+ 35
- 0
modelscope/metrics/builder.py View File

@@ -0,0 +1,35 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from ..metainfo import Metrics
from ..utils.config import ConfigDict
from ..utils.constant import Tasks
from ..utils.registry import Registry, build_from_cfg, default_group

METRICS = Registry('metrics')


class MetricKeys(object):
ACCURACY = 'accuracy'
F1 = 'f1'
PRECISION = 'precision'
RECALL = 'recall'


task_default_metrics = {
Tasks.sentence_similarity: [Metrics.seq_cls_metric],
}


def build_metric(metric_name: str,
field: str = default_group,
default_args: dict = None):
""" Build metric given metric_name and field.

Args:
metric_name (:obj:`str`): The metric name.
field (str, optional): The field of this metric, default value: 'default' for all fields.
default_args (dict, optional): Default initialization arguments.
"""
cfg = ConfigDict({'type': metric_name})
return build_from_cfg(
cfg, METRICS, group_key=field, default_args=default_args)

+ 40
- 0
modelscope/metrics/sequence_classification_metric.py View File

@@ -0,0 +1,40 @@
from typing import Dict, List, Union

import numpy as np

from modelscope.outputs import OutputKeys
from ..metainfo import Metrics
from ..utils.registry import default_group
from ..utils.tensor_utils import torch_nested_detach, torch_nested_numpify
from .base import Metric
from .builder import METRICS, MetricKeys


@METRICS.register_module(
group_key=default_group, module_name=Metrics.seq_cls_metric)
class SequenceClassificationMetric(Metric):
"""The metric computation class for sequence classification classes.
"""

label_name = 'labels'

def __init__(self):
self.preds = []
self.labels = []

def add(self, outputs: Dict, inputs: Dict):
ground_truths = inputs[SequenceClassificationMetric.label_name]
eval_results = outputs[OutputKeys.LOGITS]
self.preds.append(
torch_nested_numpify(torch_nested_detach(eval_results)))
self.labels.append(
torch_nested_numpify(torch_nested_detach(ground_truths)))

def evaluate(self):
preds = np.concatenate(self.preds, axis=0)
labels = np.concatenate(self.labels, axis=0)
preds = np.argmax(preds, axis=1)
return {
MetricKeys.ACCURACY:
(preds == labels).astype(np.float32).mean().item()
}

+ 23
- 0
modelscope/models/base_torch.py View File

@@ -0,0 +1,23 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from typing import Dict

import torch

from .base import Model


class TorchModel(Model, torch.nn.Module):
""" Base model interface for pytorch

"""

def __init__(self, model_dir=None, *args, **kwargs):
# init reference: https://stackoverflow.com/questions\
# /9575409/calling-parent-class-init-with-multiple-inheritance-whats-the-right-way
super().__init__(model_dir)
super(Model, self).__init__()

def forward(self, inputs: Dict[str,
torch.Tensor]) -> Dict[str, torch.Tensor]:
raise NotImplementedError

+ 2
- 2
modelscope/models/multi_modal/clip/clip_model.py View File

@@ -108,7 +108,7 @@ class CLIPForMultiModalEmbedding(Model):
return text_ids_tensor, text_mask_tensor return text_ids_tensor, text_mask_tensor


def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
from modelscope.pipelines.outputs import OutputKeys
from modelscope.outputs import OutputKeys
output = { output = {
OutputKeys.IMG_EMBEDDING: None, OutputKeys.IMG_EMBEDDING: None,
OutputKeys.TEXT_EMBEDDING: None OutputKeys.TEXT_EMBEDDING: None
@@ -134,7 +134,7 @@ class CLIPForMultiModalEmbedding(Model):


img_embedding = self.clip_model( img_embedding = self.clip_model(
input_data=img_tensor, input_type='img') input_data=img_tensor, input_type='img')
from modelscope.pipelines.outputs import OutputKeys
from modelscope.outputs import OutputKeys
output[OutputKeys.IMG_EMBEDDING] = img_embedding.data.cpu().numpy() output[OutputKeys.IMG_EMBEDDING] = img_embedding.data.cpu().numpy()


if 'text' in input and input['text'] is not None: if 'text' in input and input['text'] is not None:


+ 1
- 1
modelscope/models/multi_modal/image_captioning_model.py View File

@@ -76,7 +76,7 @@ class OfaForImageCaptioning(Model):
input = fairseq.utils.move_to_cuda(input, device=self._device) input = fairseq.utils.move_to_cuda(input, device=self._device)
results, _ = self.eval_caption(self.task, self.generator, self.models, results, _ = self.eval_caption(self.task, self.generator, self.models,
input) input)
from ...pipelines.outputs import OutputKeys
from modelscope.outputs import OutputKeys
return { return {
'image_id': results[0]['image_id'], 'image_id': results[0]['image_id'],
OutputKeys.CAPTION: results[0][OutputKeys.CAPTION] OutputKeys.CAPTION: results[0][OutputKeys.CAPTION]


+ 12
- 1
modelscope/models/nlp/sbert_for_sequence_classification.py View File

@@ -7,7 +7,10 @@ import torch
from sofa.models.sbert.modeling_sbert import SbertModel, SbertPreTrainedModel from sofa.models.sbert.modeling_sbert import SbertModel, SbertPreTrainedModel
from torch import nn from torch import nn


from modelscope.metainfo import Models
from modelscope.utils.constant import Tasks
from ..base import Model from ..base import Model
from ..builder import MODELS




class SbertTextClassfier(SbertPreTrainedModel): class SbertTextClassfier(SbertPreTrainedModel):
@@ -20,7 +23,11 @@ class SbertTextClassfier(SbertPreTrainedModel):
self.dropout = nn.Dropout(config.hidden_dropout_prob) self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.classifier = nn.Linear(config.hidden_size, config.num_labels)


def forward(self, input_ids=None, token_type_ids=None):
def forward(self,
input_ids=None,
token_type_ids=None,
labels=None,
**kwargs):
outputs = self.encoder( outputs = self.encoder(
input_ids, input_ids,
token_type_ids=token_type_ids, token_type_ids=token_type_ids,
@@ -29,6 +36,10 @@ class SbertTextClassfier(SbertPreTrainedModel):
pooled_output = outputs[1] pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output) pooled_output = self.dropout(pooled_output)
logits = self.classifier(pooled_output) logits = self.classifier(pooled_output)
if labels is not None:
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
return {'logits': logits, 'loss': loss}
return {'logits': logits} return {'logits': logits}






modelscope/pipelines/outputs.py → modelscope/outputs.py View File

@@ -4,6 +4,7 @@ from modelscope.utils.constant import Tasks




class OutputKeys(object): class OutputKeys(object):
LOGITS = 'logits'
SCORES = 'scores' SCORES = 'scores'
LABEL = 'label' LABEL = 'label'
LABELS = 'labels' LABELS = 'labels'

+ 1
- 1
modelscope/pipelines/audio/ans_pipeline.py View File

@@ -7,10 +7,10 @@ import soundfile as sf
import torch import torch


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from ..base import Input, Pipeline from ..base import Input, Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys




def audio_norm(x): def audio_norm(x):


+ 1
- 1
modelscope/pipelines/audio/linear_aec_pipeline.py View File

@@ -9,12 +9,12 @@ import yaml


from modelscope.fileio import File from modelscope.fileio import File
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.preprocessors.audio import LinearAECAndFbank from modelscope.preprocessors.audio import LinearAECAndFbank
from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


logger = get_logger() logger = get_logger()




+ 1
- 1
modelscope/pipelines/audio/text_to_speech_pipeline.py View File

@@ -5,9 +5,9 @@ import numpy as np
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models import Model from modelscope.models import Model
from modelscope.models.audio.tts import SambertHifigan from modelscope.models.audio.tts import SambertHifigan
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input, InputModel, Pipeline from modelscope.pipelines.base import Input, InputModel, Pipeline
from modelscope.pipelines.builder import PIPELINES from modelscope.pipelines.builder import PIPELINES
from modelscope.pipelines.outputs import OutputKeys
from modelscope.utils.constant import Fields, Tasks from modelscope.utils.constant import Fields, Tasks


__all__ = ['TextToSpeechSambertHifiganPipeline'] __all__ = ['TextToSpeechSambertHifiganPipeline']


+ 1
- 1
modelscope/pipelines/base.py View File

@@ -7,10 +7,10 @@ from typing import Any, Dict, Generator, List, Union
from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.base import Model from modelscope.models.base import Model
from modelscope.msdatasets import MsDataset from modelscope.msdatasets import MsDataset
from modelscope.outputs import TASK_OUTPUTS
from modelscope.preprocessors import Preprocessor from modelscope.preprocessors import Preprocessor
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from .outputs import TASK_OUTPUTS
from .util import is_model, is_official_hub_path from .util import is_model, is_official_hub_path


Tensor = Union['torch.Tensor', 'tf.Tensor'] Tensor = Union['torch.Tensor', 'tf.Tensor']


+ 1
- 1
modelscope/pipelines/cv/action_recognition_pipeline.py View File

@@ -6,6 +6,7 @@ import torch


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.cv.action_recognition.models import BaseVideoModel from modelscope.models.cv.action_recognition.models import BaseVideoModel
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input from modelscope.pipelines.base import Input
from modelscope.preprocessors.video import ReadVideoData from modelscope.preprocessors.video import ReadVideoData
from modelscope.utils.config import Config from modelscope.utils.config import Config
@@ -13,7 +14,6 @@ from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


logger = get_logger() logger = get_logger()




+ 1
- 1
modelscope/pipelines/cv/animal_recog_pipeline.py View File

@@ -10,13 +10,13 @@ from torchvision import transforms
from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.cv.animal_recognition import resnet from modelscope.models.cv.animal_recognition import resnet
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image from modelscope.preprocessors import load_image
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


logger = get_logger() logger = get_logger()




+ 1
- 1
modelscope/pipelines/cv/cmdssl_video_embedding_pipleline.py View File

@@ -11,8 +11,8 @@ from PIL import Image
from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.cv.cmdssl_video_embedding.resnet2p1d import \ from modelscope.models.cv.cmdssl_video_embedding.resnet2p1d import \
resnet26_2p1d resnet26_2p1d
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input from modelscope.pipelines.base import Input
from modelscope.pipelines.outputs import OutputKeys
from modelscope.utils.config import Config from modelscope.utils.config import Config
from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger


+ 1
- 1
modelscope/pipelines/cv/image_cartoon_pipeline.py View File

@@ -11,13 +11,13 @@ from modelscope.models.cv.cartoon.facelib.facer import FaceAna
from modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans import ( from modelscope.models.cv.cartoon.mtcnn_pytorch.src.align_trans import (
get_reference_facial_points, warp_and_crop_face) get_reference_facial_points, warp_and_crop_face)
from modelscope.models.cv.cartoon.utils import get_f5p, padTo16x, resize_size from modelscope.models.cv.cartoon.utils import get_f5p, padTo16x, resize_size
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image from modelscope.preprocessors import load_image
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


if tf.__version__ >= '2.0': if tf.__version__ >= '2.0':
tf = tf.compat.v1 tf = tf.compat.v1


+ 1
- 1
modelscope/pipelines/cv/image_matting_pipeline.py View File

@@ -6,13 +6,13 @@ import numpy as np
import PIL import PIL


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image from modelscope.preprocessors import load_image
from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


logger = get_logger() logger = get_logger()




+ 1
- 1
modelscope/pipelines/cv/ocr_detection_pipeline.py View File

@@ -7,13 +7,13 @@ import PIL
import tensorflow as tf import tensorflow as tf


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input from modelscope.pipelines.base import Input
from modelscope.preprocessors import load_image from modelscope.preprocessors import load_image
from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys
from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils


if tf.__version__ >= '2.0': if tf.__version__ >= '2.0':


+ 1
- 1
modelscope/pipelines/multi_modal/text_to_image_synthesis_pipeline.py View File

@@ -3,12 +3,12 @@ from typing import Any, Dict
import torch import torch


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input from modelscope.pipelines.base import Input
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from ..base import Model, Pipeline from ..base import Model, Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


logger = get_logger() logger = get_logger()




+ 1
- 1
modelscope/pipelines/nlp/dialog_intent_prediction_pipeline.py View File

@@ -2,6 +2,7 @@


from typing import Any, Dict, Union from typing import Any, Dict, Union


from modelscope.outputs import OutputKeys
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models import Model from ...models import Model
from ...models.nlp import SpaceForDialogIntent from ...models.nlp import SpaceForDialogIntent
@@ -9,7 +10,6 @@ from ...preprocessors import DialogIntentPredictionPreprocessor
from ...utils.constant import Tasks from ...utils.constant import Tasks
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['DialogIntentPredictionPipeline'] __all__ = ['DialogIntentPredictionPipeline']




+ 1
- 1
modelscope/pipelines/nlp/dialog_modeling_pipeline.py View File

@@ -2,6 +2,7 @@


from typing import Dict, Union from typing import Dict, Union


from modelscope.outputs import OutputKeys
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models import Model from ...models import Model
from ...models.nlp import SpaceForDialogModeling from ...models.nlp import SpaceForDialogModeling
@@ -9,7 +10,6 @@ from ...preprocessors import DialogModelingPreprocessor
from ...utils.constant import Tasks from ...utils.constant import Tasks
from ..base import Pipeline, Tensor from ..base import Pipeline, Tensor
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['DialogModelingPipeline'] __all__ = ['DialogModelingPipeline']




+ 1
- 1
modelscope/pipelines/nlp/dialog_state_tracking_pipeline.py View File

@@ -1,12 +1,12 @@
from typing import Any, Dict, Union from typing import Any, Dict, Union


from modelscope.outputs import OutputKeys
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models import Model, SpaceForDialogStateTracking from ...models import Model, SpaceForDialogStateTracking
from ...preprocessors import DialogStateTrackingPreprocessor from ...preprocessors import DialogStateTrackingPreprocessor
from ...utils.constant import Tasks from ...utils.constant import Tasks
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['DialogStateTrackingPipeline'] __all__ = ['DialogStateTrackingPipeline']




+ 1
- 1
modelscope/pipelines/nlp/fill_mask_pipeline.py View File

@@ -3,6 +3,7 @@ from typing import Any, Dict, Optional, Union


import torch import torch


from modelscope.outputs import OutputKeys
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models import Model from ...models import Model
from ...models.nlp.masked_language import MaskedLanguageModelBase from ...models.nlp.masked_language import MaskedLanguageModelBase
@@ -11,7 +12,6 @@ from ...utils.config import Config
from ...utils.constant import ModelFile, Tasks from ...utils.constant import ModelFile, Tasks
from ..base import Pipeline, Tensor from ..base import Pipeline, Tensor
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['FillMaskPipeline'] __all__ = ['FillMaskPipeline']
_type_map = {'veco': 'roberta', 'sbert': 'bert'} _type_map = {'veco': 'roberta', 'sbert': 'bert'}


+ 1
- 1
modelscope/pipelines/nlp/nli_pipeline.py View File

@@ -4,6 +4,7 @@ from typing import Any, Dict, Union
import numpy as np import numpy as np
import torch import torch


from modelscope.outputs import OutputKeys
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models import Model from ...models import Model
from ...models.nlp import SbertForNLI from ...models.nlp import SbertForNLI
@@ -11,7 +12,6 @@ from ...preprocessors import NLIPreprocessor
from ...utils.constant import Tasks from ...utils.constant import Tasks
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['NLIPipeline'] __all__ = ['NLIPipeline']




+ 1
- 1
modelscope/pipelines/nlp/sentence_similarity_pipeline.py View File

@@ -3,6 +3,7 @@ from typing import Any, Dict, Union
import numpy as np import numpy as np
import torch import torch


from modelscope.outputs import OutputKeys
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models import Model from ...models import Model
from ...models.nlp import SbertForSentenceSimilarity from ...models.nlp import SbertForSentenceSimilarity
@@ -10,7 +11,6 @@ from ...preprocessors import SentenceSimilarityPreprocessor
from ...utils.constant import Tasks from ...utils.constant import Tasks
from ..base import Input, Pipeline from ..base import Input, Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['SentenceSimilarityPipeline'] __all__ = ['SentenceSimilarityPipeline']




+ 1
- 1
modelscope/pipelines/nlp/sentiment_classification_pipeline.py View File

@@ -3,6 +3,7 @@ from typing import Any, Dict, Union
import numpy as np import numpy as np
import torch import torch


from modelscope.outputs import OutputKeys
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models import Model from ...models import Model
from ...models.nlp import SbertForSentimentClassification from ...models.nlp import SbertForSentimentClassification
@@ -10,7 +11,6 @@ from ...preprocessors import SentimentClassificationPreprocessor
from ...utils.constant import Tasks from ...utils.constant import Tasks
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['SentimentClassificationPipeline'] __all__ = ['SentimentClassificationPipeline']




+ 1
- 1
modelscope/pipelines/nlp/sequence_classification_pipeline.py View File

@@ -4,12 +4,12 @@ import numpy as np


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models.nlp import BertForSequenceClassification from modelscope.models.nlp import BertForSequenceClassification
from modelscope.outputs import OutputKeys
from modelscope.preprocessors import SequenceClassificationPreprocessor from modelscope.preprocessors import SequenceClassificationPreprocessor
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from ...models import Model from ...models import Model
from ..base import Input, Pipeline from ..base import Input, Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['SequenceClassificationPipeline'] __all__ = ['SequenceClassificationPipeline']




+ 1
- 1
modelscope/pipelines/nlp/text_generation_pipeline.py View File

@@ -2,6 +2,7 @@ from typing import Any, Dict, Optional, Union


import torch import torch


from modelscope.outputs import OutputKeys
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models import Model from ...models import Model
from ...models.nlp import PalmForTextGeneration from ...models.nlp import PalmForTextGeneration
@@ -9,7 +10,6 @@ from ...preprocessors import TextGenerationPreprocessor
from ...utils.constant import Tasks from ...utils.constant import Tasks
from ..base import Pipeline, Tensor from ..base import Pipeline, Tensor
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['TextGenerationPipeline'] __all__ = ['TextGenerationPipeline']




+ 1
- 1
modelscope/pipelines/nlp/translation_pipeline.py View File

@@ -4,6 +4,7 @@ from typing import Any, Dict
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf


from modelscope.outputs import OutputKeys
from ...hub.snapshot_download import snapshot_download from ...hub.snapshot_download import snapshot_download
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models.nlp import CsanmtForTranslation from ...models.nlp import CsanmtForTranslation
@@ -11,7 +12,6 @@ from ...utils.constant import ModelFile, Tasks
from ...utils.logger import get_logger from ...utils.logger import get_logger
from ..base import Pipeline, Tensor from ..base import Pipeline, Tensor
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


if tf.__version__ >= '2.0': if tf.__version__ >= '2.0':
tf = tf.compat.v1 tf = tf.compat.v1


+ 1
- 1
modelscope/pipelines/nlp/word_segmentation_pipeline.py View File

@@ -2,6 +2,7 @@ from typing import Any, Dict, Optional, Union


import torch import torch


from modelscope.outputs import OutputKeys
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models import Model from ...models import Model
from ...models.nlp import SbertForTokenClassification from ...models.nlp import SbertForTokenClassification
@@ -9,7 +10,6 @@ from ...preprocessors import TokenClassificationPreprocessor
from ...utils.constant import Tasks from ...utils.constant import Tasks
from ..base import Pipeline, Tensor from ..base import Pipeline, Tensor
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['WordSegmentationPipeline'] __all__ = ['WordSegmentationPipeline']




+ 1
- 1
modelscope/pipelines/nlp/zero_shot_classification_pipeline.py View File

@@ -3,6 +3,7 @@ from typing import Any, Dict, Union
import torch import torch
from scipy.special import softmax from scipy.special import softmax


from modelscope.outputs import OutputKeys
from ...metainfo import Pipelines from ...metainfo import Pipelines
from ...models import Model from ...models import Model
from ...models.nlp import SbertForZeroShotClassification from ...models.nlp import SbertForZeroShotClassification
@@ -10,7 +11,6 @@ from ...preprocessors import ZeroShotClassificationPreprocessor
from ...utils.constant import Tasks from ...utils.constant import Tasks
from ..base import Pipeline from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from ..outputs import OutputKeys


__all__ = ['ZeroShotClassificationPipeline'] __all__ = ['ZeroShotClassificationPipeline']




+ 28
- 2
modelscope/preprocessors/nlp.py View File

@@ -8,6 +8,7 @@ from transformers import AutoTokenizer
from ..metainfo import Preprocessors from ..metainfo import Preprocessors
from ..models import Model from ..models import Model
from ..utils.constant import Fields, InputFields from ..utils.constant import Fields, InputFields
from ..utils.hub import parse_label_mapping
from ..utils.type_assert import type_assert from ..utils.type_assert import type_assert
from .base import Preprocessor from .base import Preprocessor
from .builder import PREPROCESSORS from .builder import PREPROCESSORS
@@ -115,7 +116,8 @@ class SentenceSimilarityPreprocessor(NLPPreprocessorBase):


def __init__(self, model_dir: str, *args, **kwargs): def __init__(self, model_dir: str, *args, **kwargs):
kwargs['truncation'] = True kwargs['truncation'] = True
kwargs['padding'] = False
kwargs['padding'] = False if 'padding' not in kwargs else kwargs[
'padding']
kwargs['return_tensors'] = 'pt' kwargs['return_tensors'] = 'pt'
kwargs['max_length'] = kwargs.pop('sequence_length', 128) kwargs['max_length'] = kwargs.pop('sequence_length', 128)
super().__init__(model_dir, *args, **kwargs) super().__init__(model_dir, *args, **kwargs)
@@ -143,6 +145,7 @@ class SequenceClassificationPreprocessor(Preprocessor):


self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir) self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir)
print(f'this is the tokenzier {self.tokenizer}') print(f'this is the tokenzier {self.tokenizer}')
self.label2id = parse_label_mapping(self.model_dir)


@type_assert(object, (str, tuple, Dict)) @type_assert(object, (str, tuple, Dict))
def __call__(self, data: Union[str, tuple, Dict]) -> Dict[str, Any]: def __call__(self, data: Union[str, tuple, Dict]) -> Dict[str, Any]:
@@ -164,7 +167,7 @@ class SequenceClassificationPreprocessor(Preprocessor):
'id': [], 'id': [],
'input_ids': [], 'input_ids': [],
'attention_mask': [], 'attention_mask': [],
'token_type_ids': []
'token_type_ids': [],
} }


max_seq_length = self.sequence_length max_seq_length = self.sequence_length
@@ -186,6 +189,29 @@ class SequenceClassificationPreprocessor(Preprocessor):
return rst return rst




@PREPROCESSORS.register_module(
Fields.nlp, module_name='bert-seq-cls-tokenizer-finetune')
class SentenceSimilarityFinetunePreprocessor(SentenceSimilarityPreprocessor):
"""Sentence similarity preprocessor in the finetune scenario

Mainly added the label mapping procedure.
"""

def __init__(self, model_dir: str, *args, **kwargs):
kwargs['padding'] = 'max_length'
super().__init__(model_dir, *args, **kwargs)
self.label2id = parse_label_mapping(self.model_dir)

@type_assert(object, (str, tuple, Dict))
def __call__(self, data: Union[str, tuple, Dict]) -> Dict[str, Any]:
rst = super().__call__(data)
rst = {k: v.squeeze() for k, v in rst.items()}
if self.label2id is not None and 'label' in data:
rst['labels'] = []
rst['labels'].append(self.label2id[str(data['label'])])
return rst


@PREPROCESSORS.register_module( @PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.palm_text_gen_tokenizer) Fields.nlp, module_name=Preprocessors.palm_text_gen_tokenizer)
class TextGenerationPreprocessor(NLPPreprocessorBase): class TextGenerationPreprocessor(NLPPreprocessorBase):


+ 3
- 0
modelscope/task_datasets/__init__.py View File

@@ -0,0 +1,3 @@
from .base import TaskDataset
from .builder import build_task_dataset
from .torch_base_dataset import TorchTaskDataset

+ 48
- 0
modelscope/task_datasets/base.py View File

@@ -0,0 +1,48 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from abc import ABC, abstractmethod
from typing import Any, List, Tuple


class TaskDataset(ABC):
"""The task dataset base class for all the task specific dataset processors.
"""

def __init__(self,
datasets: Tuple[Any, List[Any]],
mode,
preprocessor=None,
**kwargs):
super().__init__()
self.mode = mode
self.preprocessor = preprocessor
self._inner_dataset = self.compose_dataset(datasets)

@abstractmethod
def compose_dataset(self, datasets: Tuple[Any, List[Any]]) -> Any:
"""Prepare a dataset.

User can process the input datasets in a whole dataset perspective.
This method also helps to merge several datasets to one.

Args:
datasets: The original dataset(s)

Returns: A single dataset, which may be created after merging.

"""
pass

@abstractmethod
def preprocess_dataset(self, data):
"""Preprocess the data fetched from the inner_dataset.

If the preprocessor is None, the original data will be returned, else the preprocessor will be called.
User can override this method to implement custom logics.

Args:
data: The data fetched from the dataset.

Returns: The processed data.

"""
pass

+ 21
- 0
modelscope/task_datasets/builder.py View File

@@ -0,0 +1,21 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from modelscope.utils.config import ConfigDict
from modelscope.utils.registry import Registry, build_from_cfg

TASK_DATASETS = Registry('task_datasets')


def build_task_dataset(cfg: ConfigDict,
task_name: str = None,
default_args: dict = None):
""" Build task specific dataset processor given model config dict and the task name.

Args:
cfg (:obj:`ConfigDict`): config dict for model object.
task_name (str, optional): task name, refer to
:obj:`Tasks` for more details
default_args (dict, optional): Default initialization arguments.
"""
return build_from_cfg(
cfg, TASK_DATASETS, group_key=task_name, default_args=default_args)

+ 63
- 0
modelscope/task_datasets/torch_base_dataset.py View File

@@ -0,0 +1,63 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, List, Tuple

from torch.utils.data import ConcatDataset, Dataset

from .base import TaskDataset


class TorchTaskDataset(TaskDataset, Dataset):
"""The task dataset base class for all the torch-based task processors.

This base class is enough for most cases, except there are procedures which can not be executed in
preprocessors and Datasets like dataset merging.
"""

def __init__(self,
datasets: Tuple[Any, List[Any]],
mode,
preprocessor=None,
**kwargs):
TaskDataset.__init__(self, datasets, mode, preprocessor, **kwargs)

def __getitem__(self, index) -> Any:
return self.preprocess_dataset(self._inner_dataset[index])

def __len__(self):
return len(self._inner_dataset)

def compose_dataset(self, datasets: Tuple[Any, List[Any]]) -> Any:
"""Prepare a dataset.

User can process the input datasets in a whole dataset perspective.
This method gives a default implementation of datasets merging, user can override this
method to write custom logics.

Args:
datasets: The original dataset(s)

Returns: A single dataset, which may be created after merging.

"""
if isinstance(datasets, List):
if len(datasets) == 1:
return datasets[0]
elif len(datasets) > 1:
return ConcatDataset(datasets)
else:
return datasets

def preprocess_dataset(self, data):
"""Preprocess the data fetched from the inner_dataset.

If the preprocessor is None, the original data will be returned, else the preprocessor will be called.
User can override this method to implement custom logics.

Args:
data: The data fetched from the dataset.

Returns: The processed data.

"""
return self.preprocessor(
data) if self.preprocessor is not None else data

+ 1
- 0
modelscope/trainers/__init__.py View File

@@ -1,3 +1,4 @@
from .base import DummyTrainer from .base import DummyTrainer
from .builder import build_trainer from .builder import build_trainer
from .nlp import SequenceClassificationTrainer from .nlp import SequenceClassificationTrainer
from .trainer import EpochBasedTrainer

+ 4
- 0
modelscope/trainers/base.py View File

@@ -1,10 +1,12 @@
# Copyright (c) Alibaba, Inc. and its affiliates. # Copyright (c) Alibaba, Inc. and its affiliates.


import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Callable, Dict, List, Optional, Tuple, Union from typing import Callable, Dict, List, Optional, Tuple, Union


from modelscope.trainers.builder import TRAINERS from modelscope.trainers.builder import TRAINERS
from modelscope.utils.config import Config from modelscope.utils.config import Config
from .utils.log_buffer import LogBuffer




class BaseTrainer(ABC): class BaseTrainer(ABC):
@@ -27,6 +29,8 @@ class BaseTrainer(ABC):
self.args = self.cfg.to_args(arg_parse_fn) self.args = self.cfg.to_args(arg_parse_fn)
else: else:
self.args = None self.args = None
self.log_buffer = LogBuffer()
self.timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())


@abstractmethod @abstractmethod
def train(self, *args, **kwargs): def train(self, *args, **kwargs):


+ 2
- 3
modelscope/trainers/builder.py View File

@@ -5,9 +5,10 @@ from modelscope.utils.constant import Tasks
from modelscope.utils.registry import Registry, build_from_cfg from modelscope.utils.registry import Registry, build_from_cfg


TRAINERS = Registry('trainers') TRAINERS = Registry('trainers')
HOOKS = Registry('hooks')




def build_trainer(name: str = None, default_args: dict = None):
def build_trainer(name: str = 'EpochBasedTrainer', default_args: dict = None):
""" build trainer given a trainer name """ build trainer given a trainer name


Args: Args:
@@ -15,7 +16,5 @@ def build_trainer(name: str = None, default_args: dict = None):
will be used. will be used.
default_args (dict, optional): Default initialization arguments. default_args (dict, optional): Default initialization arguments.
""" """
if name is None:
name = 'Trainer'
cfg = dict(type=name) cfg = dict(type=name)
return build_from_cfg(cfg, TRAINERS, default_args=default_args) return build_from_cfg(cfg, TRAINERS, default_args=default_args)

+ 16
- 0
modelscope/trainers/hooks/__init__.py View File

@@ -0,0 +1,16 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .builder import HOOKS, build_hook
from .checkpoint_hook import CheckpointHook
from .evaluation_hook import EvaluationHook
from .hook import Hook
from .iter_timer_hook import IterTimerHook
from .logger.text_logger_hook import TextLoggerHook
from .lr_scheduler_hook import LrSchedulerHook
from .optimizer_hook import OptimizerHook
from .priority import Priority

__all__ = [
'Hook', 'HOOKS', 'CheckpointHook', 'EvaluationHook', 'LrSchedulerHook',
'OptimizerHook', 'Priority', 'build_hook', 'TextLoggerHook',
'IterTimerHook'
]

+ 9
- 0
modelscope/trainers/hooks/builder.py View File

@@ -0,0 +1,9 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.utils.registry import Registry, build_from_cfg, default_group

HOOKS = Registry('hooks')


def build_hook(cfg, default_args=None):
return build_from_cfg(
cfg, HOOKS, group_key=default_group, default_args=default_args)

+ 92
- 0
modelscope/trainers/hooks/checkpoint_hook.py View File

@@ -0,0 +1,92 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os

from modelscope import __version__
from modelscope.utils.checkpoint import save_checkpoint
from modelscope.utils.logger import get_logger
from modelscope.utils.torch_utils import get_dist_info
from .builder import HOOKS
from .hook import Hook
from .priority import Priority


@HOOKS.register_module()
class CheckpointHook(Hook):
"""Save checkpoints periodically.

Args:
interval (int): The frequency to save model. If `by_epoch=True`,
it means the number of epochs, else means the number of iterations
by_epoch (bool): Saving checkpoints by epoch or by iteration.
save_optimizer (bool): Whether to save optimizer state dict. Default: True.
save_dir (str): The directory to save checkpoints. If is None, use `trainer.work_dir`
save_last (bool): Whether to save the last checkpoint. Default: True.
"""

PRIORITY = Priority.NORMAL

def __init__(self,
interval=0,
by_epoch=True,
save_optimizer=True,
save_dir=None,
save_last=True):
self.interval = interval
self.by_epoch = by_epoch
self.save_optimizer = save_optimizer
self.save_dir = save_dir
self.save_last = save_last

def before_run(self, trainer):
if not self.save_dir:
self.save_dir = trainer.work_dir

if not hasattr(trainer, 'logger'):
self.logger = get_logger(__name__)
else:
self.logger = trainer.logger

self.logger.info(f'Checkpoints will be saved to {self.save_dir}')

def after_train_epoch(self, trainer):
if not self.by_epoch:
return

if self._should_save(trainer):
self.logger.info(f'Saving checkpoint at {trainer.epoch + 1} epoch')
self._save_checkpoint(trainer)

def _save_checkpoint(self, trainer):
if self.by_epoch:
cur_save_name = os.path.join(self.save_dir,
f'epoch_{trainer.epoch + 1}.pth')
else:
cur_save_name = os.path.join(self.save_dir,
f'iter_{trainer.epoch + 1}.pth')

rank, _ = get_dist_info()
if rank == 0:
save_checkpoint(trainer.model, cur_save_name, trainer.optimizer)

def after_train_iter(self, trainer):
if self.by_epoch:
return

if self._should_save(trainer):
self.logger.info(
f'Saving checkpoint at {trainer.iter + 1} iterations')
self._save_checkpoint(trainer)

def _should_save(self, trainer):
if self.by_epoch:
check_last = self.is_last_epoch
check_frequency = self.every_n_epochs
else:
check_last = self.is_last_iter
check_frequency = self.every_n_iters

if check_frequency(trainer,
self.interval) or (self.save_last
and check_last(trainer)):
return True
return False

+ 74
- 0
modelscope/trainers/hooks/evaluation_hook.py View File

@@ -0,0 +1,74 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .builder import HOOKS
from .hook import Hook
from .priority import Priority


@HOOKS.register_module()
class EvaluationHook(Hook):
"""Evaluation hook.
Args:
interval (int): Evaluation interval.
by_epoch (bool): Evaluate by epoch or by iteration.
start_idx (int | None, optional): The epoch/iterations validation begins.
Default: None, validate every interval epochs/iterations from scratch.
"""

PRIORITY = Priority.NORMAL

def __init__(self, interval=1, by_epoch=True, start_idx=None):

assert interval > 0, 'interval must be a positive number'

self.interval = interval
self.start_idx = start_idx
self.by_epoch = by_epoch

def after_train_iter(self, trainer):
"""Called after every training iter to evaluate the results."""
if not self.by_epoch and self._should_evaluate(trainer):
self.do_evaluate(trainer)

def after_train_epoch(self, trainer):
"""Called after every training epoch to evaluate the results."""
if self.by_epoch and self._should_evaluate(trainer):
self.do_evaluate(trainer)

def do_evaluate(self, trainer):
"""Evaluate the results."""
eval_res = trainer.evaluate()
for name, val in eval_res.items():
trainer.log_buffer.output[name] = val

trainer.log_buffer.ready = True

def _should_evaluate(self, trainer):
"""Judge whether to perform evaluation.

Here is the rule to judge whether to perform evaluation:
1. It will not perform evaluation during the epoch/iteration interval,
which is determined by ``self.interval``.
2. It will not perform evaluation if the ``start_idx`` is larger than
current epochs/iters.
3. It will not perform evaluation when current epochs/iters is larger than
the ``start_idx`` but during epoch/iteration interval.

Returns:
bool: The flag indicating whether to perform evaluation.
"""
if self.by_epoch:
current = trainer.epoch
check_time = self.every_n_epochs
else:
current = trainer.iter
check_time = self.every_n_iters

if self.start_idx is None:
if not check_time(trainer, self.interval):
return False
elif (current + 1) < self.start_idx:
return False
else:
if (current + 1 - self.start_idx) % self.interval:
return False
return True

+ 208
- 0
modelscope/trainers/hooks/hook.py View File

@@ -0,0 +1,208 @@
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.utils.import_utils import is_method_overridden
from .priority import Priority


class Hook:
"""
The Hook base class of any modelscope trainer. You can build your own hook inherited from this class.
"""

# TODO @jiangnana.jnn use constant variable for stages
stages = ('before_run', 'before_train_epoch', 'before_train_iter',
'after_train_iter', 'after_train_epoch', 'before_val_epoch',
'before_val_iter', 'after_val_iter', 'after_val_epoch',
'after_run')
PRIORITY = Priority.NORMAL

def before_run(self, trainer):
"""
Will be called before any loop begins.
Args:
trainer: The trainer instance.

Returns: None

"""
pass

def after_run(self, trainer):
"""
Will be called after all loops end.
Args:
trainer: The trainer instance.

Returns: None

"""
pass

def before_epoch(self, trainer):
"""
Will be called before every epoch begins.
Args:
trainer: The trainer instance.

Returns: None

"""
pass

def after_epoch(self, trainer):
"""
Will be called after every epoch ends.
Args:
trainer: The trainer instance.

Returns: None

"""
pass

def before_iter(self, trainer):
"""
Will be called before every loop begins.
Args:
trainer: The trainer instance.

Returns: None
"""
pass

def after_iter(self, trainer):
"""
Will be called after every loop ends.
Args:
trainer: The trainer instance.

Returns: None
"""
pass

def before_train_epoch(self, trainer):
"""
Will be called before every train epoch begins. Default call ``self.before_epoch``
Args:
trainer: The trainer instance.

Returns: None

"""
self.before_epoch(trainer)

def before_val_epoch(self, trainer):
"""
Will be called before every validation epoch begins. Default call ``self.before_epoch``
Args:
trainer: The trainer instance.

Returns: None

"""
self.before_epoch(trainer)

def after_train_epoch(self, trainer):
"""
Will be called after every train epoch ends. Default call ``self.after_epoch``
Args:
trainer: The trainer instance.

Returns: None

"""
self.after_epoch(trainer)

def after_val_epoch(self, trainer):
"""
Will be called after every validation epoch ends. Default call ``self.after_epoch``
Args:
trainer: The trainer instance.

Returns: None

"""
self.after_epoch(trainer)

def before_train_iter(self, trainer):
"""
Will be called before every train loop begins. Default call ``self.before_iter``
Args:
trainer: The trainer instance.

Returns: None
"""
self.before_iter(trainer)

def before_val_iter(self, trainer):
"""
Will be called before every validation loop begins. Default call ``self.before_iter``
Args:
trainer: The trainer instance.

Returns: None
"""
self.before_iter(trainer)

def after_train_iter(self, trainer):
"""
Will be called after every train loop ends. Default call ``self.after_iter``
Args:
trainer: The trainer instance.

Returns: None
"""
self.after_iter(trainer)

def after_val_iter(self, trainer):
"""
Will be called after every validation loop ends. Default call ``self.after_iter``
Args:
trainer: The trainer instance.

Returns: None
"""
self.after_iter(trainer)

def every_n_epochs(self, trainer, n):
"""
Whether to reach every ``n`` epochs
Returns: bool
"""
return (trainer.epoch + 1) % n == 0 if n > 0 else False

def every_n_iters(self, trainer, n):
"""
Whether to reach every ``n`` iterations
Returns: bool
"""
return (trainer.iter + 1) % n == 0 if n > 0 else False

def end_of_epoch(self, trainer):
"""
Whether to reach the end of every epoch
Returns: bool
"""
return trainer.inner_iter + 1 == len(trainer.data_loader)

def is_last_epoch(self, trainer):
"""
Whether to reach the last epoch
Returns: bool
"""
return trainer.epoch + 1 == trainer._max_epochs

def is_last_iter(self, trainer):
"""
Whether to reach the last iteration in the entire training process
Returns: bool
"""
return trainer.iter + 1 == trainer._max_iters

def get_triggered_stages(self):
trigger_stages = set()
for stage in Hook.stages:
if is_method_overridden(stage, Hook, self):
trigger_stages.add(stage)

return [stage for stage in Hook.stages if stage in trigger_stages]

+ 22
- 0
modelscope/trainers/hooks/iter_timer_hook.py View File

@@ -0,0 +1,22 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import time

from .builder import HOOKS
from .hook import Hook
from .priority import Priority


@HOOKS.register_module()
class IterTimerHook(Hook):
PRIORITY = Priority.LOW

def before_epoch(self, trainer):
self.start_time = time.time()

def before_iter(self, trainer):
trainer.log_buffer.update(
{'data_load_time': time.time() - self.start_time})

def after_iter(self, trainer):
trainer.log_buffer.update({'time': time.time() - self.start_time})
self.start_time = time.time()

+ 6
- 0
modelscope/trainers/hooks/logger/__init__.py View File

@@ -0,0 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.trainers.utils.log_buffer import LogBuffer
from .base import LoggerHook
from .text_logger_hook import TextLoggerHook

__all__ = ['TextLoggerHook', 'LoggerHook', 'LogBuffer']

+ 115
- 0
modelscope/trainers/hooks/logger/base.py View File

@@ -0,0 +1,115 @@
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) Alibaba, Inc. and its affiliates.
import numbers
from abc import ABCMeta, abstractmethod

import numpy as np
import torch

from modelscope.trainers.hooks.hook import Hook
from ..priority import Priority


class LoggerHook(Hook):
"""Base class for logger hooks.

Args:
interval (int): Logging interval (every k iterations).
ignore_last (bool): Ignore the log of last iterations in each epoch
if less than `interval`.
reset_flag (bool): Whether to clear the output buffer after logging.
by_epoch (bool): Whether EpochBasedtrainer is used.
"""

__metaclass__ = ABCMeta
PRIORITY = Priority.VERY_LOW

def __init__(self,
interval=10,
ignore_last=True,
reset_flag=False,
by_epoch=True):
self.interval = interval
self.ignore_last = ignore_last
self.reset_flag = reset_flag
self.by_epoch = by_epoch

@abstractmethod
def log(self, trainer):
pass

@staticmethod
def is_scalar(val, include_np=True, include_torch=True):
"""Tell the input variable is a scalar or not.

Args:
val: Input variable.
include_np (bool): Whether to treat 0-d np.ndarray as a scalar.
include_torch (bool): Whether to treat 0-d torch.Tensor as a scalar.

Returns:
bool: True or False.
"""
if isinstance(val, numbers.Number):
return True
elif include_np and isinstance(val, np.ndarray) and val.ndim == 0:
return True
elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1:
return True
else:
return False

def get_epoch(self, trainer):
if trainer.mode == 'train':
epoch = trainer.epoch + 1
elif trainer.mode == 'val':
# normal val mode
# trainer.epoch += 1 has been done before val workflow
epoch = trainer.epoch
else:
raise ValueError(f"trainer mode should be 'train' or 'val', "
f'but got {trainer.mode}')
return epoch

def get_iter(self, trainer, inner_iter=False):
"""Get the current training iteration step."""
if self.by_epoch and inner_iter:
current_iter = trainer.inner_iter + 1
else:
current_iter = trainer.iter + 1
return current_iter

def before_run(self, trainer):
for hook in trainer.hooks[::-1]:
if isinstance(hook, LoggerHook):
hook.reset_flag = True
break

def before_epoch(self, trainer):
trainer.log_buffer.clear() # clear logs of last epoch

def after_train_iter(self, trainer):
if self.by_epoch and self.every_n_epochs(trainer, self.interval):
trainer.log_buffer.average(self.interval)
elif not self.by_epoch and self.every_n_iters(trainer, self.interval):
trainer.log_buffer.average(self.interval)
elif self.end_of_epoch(trainer) and not self.ignore_last:
# not precise but more stable
trainer.log_buffer.average(self.interval)

if trainer.log_buffer.ready:
self.log(trainer)
if self.reset_flag:
trainer.log_buffer.clear_output()

def after_train_epoch(self, trainer):
if trainer.log_buffer.ready:
self.log(trainer)
if self.reset_flag:
trainer.log_buffer.clear_output()

def after_val_epoch(self, trainer):
trainer.log_buffer.average()
self.log(trainer)
if self.reset_flag:
trainer.log_buffer.clear_output()

+ 159
- 0
modelscope/trainers/hooks/logger/text_logger_hook.py View File

@@ -0,0 +1,159 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import datetime
import os
import os.path as osp
from collections import OrderedDict

import json
import torch
from torch import distributed as dist

from modelscope.utils.torch_utils import get_dist_info
from ..builder import HOOKS
from .base import LoggerHook


@HOOKS.register_module()
class TextLoggerHook(LoggerHook):
"""Logger hook in text, Output log to both console and local json file.

Args:
by_epoch (bool, optional): Whether EpochBasedtrainer is used.
Default: True.
interval (int, optional): Logging interval (every k iterations).
Default: 10.
ignore_last (bool, optional): Ignore the log of last iterations in each
epoch if less than :attr:`interval`. Default: True.
reset_flag (bool, optional): Whether to clear the output buffer after
logging. Default: False.
out_dir (str): The directory to save log. If is None, use `trainer.work_dir`
"""

def __init__(self,
by_epoch=True,
interval=10,
ignore_last=True,
reset_flag=False,
out_dir=None):
super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag,
by_epoch)
self.by_epoch = by_epoch
self.time_sec_tot = 0
self.out_dir = out_dir
self._logged_keys = [] # store the key has been logged

def before_run(self, trainer):
super(TextLoggerHook, self).before_run(trainer)

if self.out_dir is None:
self.out_dir = trainer.work_dir

if not osp.exists(self.out_dir):
os.makedirs(self.out_dir)

trainer.logger.info('Text logs will be saved to {}'.format(
self.out_dir))

self.start_iter = trainer.iter
self.json_log_path = osp.join(self.out_dir,
'{}.log.json'.format(trainer.timestamp))
if hasattr(trainer, 'meta') and trainer.meta is not None:
self._dump_log(trainer.meta, trainer)

def _get_max_memory(self, trainer):
device = getattr(trainer.model, 'output_device', None)
mem = torch.cuda.max_memory_allocated(device=device)
mem_mb = torch.tensor([mem / (1024 * 1024)],
dtype=torch.int,
device=device)
_, world_size = get_dist_info()
if world_size > 1:
dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX)
return mem_mb.item()

def _log_info(self, log_dict, trainer):
if log_dict['mode'] == 'train':
if isinstance(log_dict['lr'], dict):
lr_str = []
for k, val in log_dict['lr'].items():
lr_str.append(f'lr_{k}: {val:.3e}')
lr_str = ' '.join(lr_str)
else:
lr_str = f'lr: {log_dict["lr"]:.3e}'

if self.by_epoch:
log_str = f'Epoch [{log_dict["epoch"]}][{log_dict["iter"]}/{len(trainer.data_loader)}]\t'
else:
log_str = f'Iter [{log_dict["iter"]}/{trainer.max_iters}]\t'
log_str += f'{lr_str}, '
self._logged_keys.extend(['lr', 'mode', 'iter', 'epoch'])

if 'time' in log_dict.keys():
self.time_sec_tot += (log_dict['time'] * self.interval)
time_sec_avg = self.time_sec_tot / (
trainer.iter - self.start_iter + 1)
eta_sec = time_sec_avg * (trainer.max_iters - trainer.iter - 1)
eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
log_str += f'eta: {eta_str}, '
log_str += f'time: {log_dict["time"]:.3f}, data_load_time: {log_dict["data_load_time"]:.3f}, '
self._logged_keys.extend([
'time',
'data_load_time',
])
else:
# val/test time
# here 1000 is the length of the val dataloader
# by epoch: Epoch[val] [4][1000]
# by iter: Iter[val] [1000]
if self.by_epoch:
log_str = f'Epoch({log_dict["mode"]}) [{log_dict["epoch"]}][{log_dict["iter"]}]\t'
else:
log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t'
self._logged_keys.extend(['mode', 'iter', 'epoch'])

log_items = []
for name, val in log_dict.items():
if name in self._logged_keys:
continue
if isinstance(val, float):
val = f'{val:.4f}'
log_items.append(f'{name}: {val}')
log_str += ', '.join(log_items)

trainer.logger.info(log_str)

def _dump_log(self, log_dict):
# dump log in json format
json_log = OrderedDict()
for k, v in log_dict.items():
json_log[k] = self._round_float(v)

rank, _ = get_dist_info()
if rank == 0:
with open(self.json_log_path, 'a+') as f:
json.dump(json_log, f)
f.write('\n')

def _round_float(self, items, ndigits=5):
if isinstance(items, list):
return [self._round_float(item) for item in items]
elif isinstance(items, float):
return round(items, ndigits)
else:
return items

def log(self, trainer):
cur_iter = self.get_iter(trainer, inner_iter=True)

log_dict = OrderedDict(
mode=trainer.mode, epoch=self.get_epoch(trainer), iter=cur_iter)

# statistic memory
if torch.cuda.is_available():
log_dict['memory'] = self._get_max_memory(trainer)

log_dict = dict(log_dict, **trainer.log_buffer.output)

self._log_info(log_dict, trainer)
self._dump_log(log_dict)
return log_dict

+ 71
- 0
modelscope/trainers/hooks/lr_scheduler_hook.py View File

@@ -0,0 +1,71 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.trainers.lrscheduler.builder import build_lr_scheduler
from .builder import HOOKS
from .hook import Hook
from .priority import Priority


@HOOKS.register_module()
class LrSchedulerHook(Hook):
"""Lr scheduler.

Args:
by_epoch (bool): Whether lr changes by epoch
warmup (dict): warm up config
"""
PRIORITY = Priority.VERY_HIGH

def __init__(self, by_epoch=True, warmup=None) -> None:
super().__init__()
self.by_epoch = by_epoch
if not self.by_epoch:
raise ValueError('We only support ``by_epoch=True`` now!')

self.warmup = warmup
self.warmup_lr_scheduler = None

def before_run(self, trainer):
if self.warmup is not None:
assert isinstance(self.warmup, dict) and 'type' in self.warmup
self.warmup_lr_scheduler = build_lr_scheduler(
cfg=self.warmup,
default_args={'base_scheduler': trainer.lr_scheduler})

def get_current_lr(self, trainer):
import torch

if isinstance(trainer.optimizer, torch.optim.Optimizer):
lr = [group['lr'] for group in trainer.optimizer.param_groups]
elif isinstance(trainer.optimizer, dict):
lr = dict()
for name, optim in trainer.optimizer.items():
lr[name] = [group['lr'] for group in optim.param_groups]
else:
raise RuntimeError(
'lr is not applicable because optimizer does not exist.')
return lr

def before_train_iter(self, trainer):
trainer.log_buffer.output['lr'] = self._get_log_lr(trainer)

def before_train_epoch(self, trainer):
if self.by_epoch:
if self.warmup_lr_scheduler is not None:
self.warmup_lr_scheduler.step()
else:
trainer.lr_scheduler.step()
trainer.log_buffer.output['lr'] = self._get_log_lr(trainer)

def _get_log_lr(self, trainer):
cur_lr = self.get_current_lr(trainer)
# only record lr of the first param group
if isinstance(cur_lr, list):
lr = cur_lr[0]
else:
assert isinstance(cur_lr, dict)
lr = {}
for k, lr_ in cur_lr.items():
assert isinstance(lr_, list)
lr.update({k: lr_[0]})

return lr

+ 37
- 0
modelscope/trainers/hooks/optimizer_hook.py View File

@@ -0,0 +1,37 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from torch.nn.utils import clip_grad

from .builder import HOOKS
from .hook import Hook
from .priority import Priority


@HOOKS.register_module()
class OptimizerHook(Hook):

PRIORITY = Priority.ABOVE_NORMAL

def __init__(self, grad_clip=None, loss_keys='loss') -> None:
if isinstance(loss_keys, str):
loss_keys = [loss_keys]
assert isinstance(loss_keys, (tuple, list))
self.loss_keys = loss_keys
self.grad_clip = grad_clip

def clip_grads(self, params, **clip_args):
params = list(
filter(lambda p: p.requires_grad and p.grad is not None, params))
if len(params) > 0:
return clip_grad.clip_grad_norm_(params, **clip_args)

def after_train_iter(self, trainer):
trainer.optimizer.zero_grad()

for k in self.loss_keys:
trainer.train_outputs[k].backward()

clip_args = self.grad_clip
if clip_args is not None:
self.clip_grads(trainer.model.parameters(), **clip_args)

trainer.optimizer.step()

+ 62
- 0
modelscope/trainers/hooks/priority.py View File

@@ -0,0 +1,62 @@
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) Alibaba, Inc. and its affiliates.
from enum import Enum
from typing import Union


class Priority(Enum):
"""Hook priority levels.

+--------------+------------+
| Level | Value |
+==============+============+
| HIGHEST | 0 |
+--------------+------------+
| VERY_HIGH | 10 |
+--------------+------------+
| HIGH | 30 |
+--------------+------------+
| ABOVE_NORMAL | 40 |
+--------------+------------+
| NORMAL | 50 |
+--------------+------------+
| BELOW_NORMAL | 60 |
+--------------+------------+
| LOW | 70 |
+--------------+------------+
| VERY_LOW | 90 |
+--------------+------------+
| LOWEST | 100 |
+--------------+------------+
"""

HIGHEST = 0
VERY_HIGH = 10
HIGH = 30
ABOVE_NORMAL = 40
NORMAL = 50
BELOW_NORMAL = 60
LOW = 70
VERY_LOW = 90
LOWEST = 100


def get_priority(priority: Union[int, str, Priority]) -> int:
"""Get priority value.

Args:
priority (int or str or :obj:`Priority`): Priority.

Returns:
int: The priority value.
"""
if isinstance(priority, int):
if priority < 0 or priority > 100:
raise ValueError('priority must be between 0 and 100')
return priority
elif isinstance(priority, Priority):
return priority.value
elif isinstance(priority, str):
return Priority[priority.upper()].value
else:
raise TypeError('priority must be an integer or Priority enum value')

+ 8
- 0
modelscope/trainers/lrscheduler/__init__.py View File

@@ -0,0 +1,8 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .builder import LR_SCHEDULER, build_lr_scheduler
from .warmup import BaseWarmup, ConstantWarmup, ExponentialWarmup, LinearWarmup

__all__ = [
'LR_SCHEDULER', 'build_lr_scheduler', 'BaseWarmup', 'ConstantWarmup',
'LinearWarmup', 'ExponentialWarmup'
]

+ 47
- 0
modelscope/trainers/lrscheduler/builder.py View File

@@ -0,0 +1,47 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import inspect

from modelscope.utils.config import ConfigDict
from modelscope.utils.registry import Registry, build_from_cfg, default_group

LR_SCHEDULER = Registry('lr scheduler')


def build_lr_scheduler(cfg: ConfigDict, default_args: dict = None):
""" build lr scheduler from given lr scheduler config dict

Args:
cfg (:obj:`ConfigDict`): config dict for lr scheduler object.
default_args (dict, optional): Default initialization arguments.
"""
if cfg['type'].lower().endswith('warmup'):
# build warmup lr scheduler
if not hasattr(cfg, 'base_scheduler'):
if default_args is None or ('base_scheduler' not in default_args):
raise ValueError(
'Must provide ``base_scheduler`` which is an instance of ``torch.optim.lr_scheduler._LRScheduler`` '
'for build warmup lr scheduler.')
else:
# build lr scheduler without warmup
if not hasattr(cfg, 'optimizer'):
if default_args is None or ('optimizer' not in default_args):
raise ValueError(
'Must provide ``optimizer`` which is an instance of ``torch.optim.Optimizer`` '
'for build lr scheduler')

return build_from_cfg(
cfg, LR_SCHEDULER, group_key=default_group, default_args=default_args)


def register_torch_lr_scheduler():
from torch.optim import lr_scheduler
from torch.optim.lr_scheduler import _LRScheduler

members = inspect.getmembers(lr_scheduler)

for name, obj in members:
if inspect.isclass(obj) and issubclass(obj, _LRScheduler):
LR_SCHEDULER.register_module(module_name=name, module_cls=obj)


register_torch_lr_scheduler()

+ 5
- 0
modelscope/trainers/lrscheduler/warmup/__init__.py View File

@@ -0,0 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .base import BaseWarmup
from .warmup import ConstantWarmup, ExponentialWarmup, LinearWarmup

__all__ = ['BaseWarmup', 'ConstantWarmup', 'LinearWarmup', 'ExponentialWarmup']

+ 75
- 0
modelscope/trainers/lrscheduler/warmup/base.py View File

@@ -0,0 +1,75 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from torch.optim.lr_scheduler import _LRScheduler


class BaseWarmup(_LRScheduler):
"""Base warmup scheduler

Args:
base_scheduler (torch.optim._LRScheduler): an instance of torch.optim._LRScheduler type
warmup_iters (int | list): Warmup iterations
last_epoch (int): The index of last epoch.
"""

def __init__(self,
base_scheduler,
warmup_iters,
last_epoch=-1,
verbose=False):
self.base_scheduler = base_scheduler
self.warmup_iters = warmup_iters
optimizer = self.base_scheduler.optimizer
self._is_init_step = True

super(BaseWarmup, self).__init__(
optimizer, last_epoch=last_epoch, verbose=verbose)

def get_lr(self):
return self.base_scheduler.get_lr()

def state_dict(self):
self.base_scheduler.state_dict()

def load_state_dict(self, state_dict):
self.base_scheduler.load_state_dict(state_dict)

def scale(self):
"""Scale the learning rates.
"""
scale_value = self.get_warmup_scale(self.base_scheduler._step_count
- 1)
if isinstance(scale_value, (int, float)):
scale_value = [
scale_value for _ in range(len(self.optimizer.param_groups))
]
else:
assert isinstance(
scale_value, (list, tuple)), 'Only support list or tuple type!'
assert len(scale_value) == len(
self.optimizer.param_groups), ('Size mismatch {} != {}'.format(
len(scale_value), len(self.optimizer.param_groups)))

for i, group in enumerate(self.optimizer.param_groups):
group['lr'] *= scale_value[i]

def step(self, epoch=None):
"""
When ``self.base_scheduler._step_count`` is less than ``self.warmup_iters``, multiply lr by scale
"""
if self.base_scheduler._step_count > self.warmup_iters:
return self.base_scheduler.step(epoch=epoch)

for group, lr in zip(self.optimizer.param_groups, self.base_lrs):
group['lr'] = lr

# `base_scheduler` has done step() at init when build
if self._is_init_step:
self._is_init_step = False
else:
self.base_scheduler.step(epoch=epoch)

self.scale()

@classmethod
def get_warmup_scale(self, cur_iter):
pass

+ 79
- 0
modelscope/trainers/lrscheduler/warmup/warmup.py View File

@@ -0,0 +1,79 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from modelscope.trainers.lrscheduler.builder import LR_SCHEDULER
from .base import BaseWarmup


@LR_SCHEDULER.register_module()
class ConstantWarmup(BaseWarmup):
"""Linear warmup scheduler.

Args:
base_scheduler (torch.optim._LRScheduler): an instance of torch.optim._LRScheduler type
warmup_ratio (float): Lr used at warmup stage equals to warmup_ratio * initial_lr
warmup_iters (int | list): Warmup iterations
last_epoch (int): The index of last epoch.
"""

def __init__(self,
base_scheduler,
warmup_iters,
warmup_ratio=0.1,
last_epoch=-1):
self.warmup_ratio = warmup_ratio
super(ConstantWarmup, self).__init__(
base_scheduler, warmup_iters=warmup_iters, last_epoch=last_epoch)

def get_warmup_scale(self, cur_iter):
if cur_iter >= self.warmup_iters:
return 1.0
return self.warmup_ratio


@LR_SCHEDULER.register_module()
class LinearWarmup(BaseWarmup):
"""Linear warmup scheduler.

Args:
base_scheduler (torch.optim._LRScheduler): an instance of torch.optim._LRScheduler type
warmup_iters (int | list): Warmup iterations
warmup_ratio (float): Lr used at the beginning of warmup equals to warmup_ratio * initial_lr
last_epoch (int): The index of last epoch.
"""

def __init__(self,
base_scheduler,
warmup_iters,
warmup_ratio=0.1,
last_epoch=-1):
self.warmup_ratio = warmup_ratio
super(LinearWarmup, self).__init__(
base_scheduler, warmup_iters=warmup_iters, last_epoch=last_epoch)

def get_warmup_scale(self, cur_iter):
k = (1 - cur_iter / self.warmup_iters) * (1 - self.warmup_ratio)
return 1 - k


@LR_SCHEDULER.register_module()
class ExponentialWarmup(BaseWarmup):
"""Exponential warmup scheduler.

Args:
base_scheduler (torch.optim._LRScheduler): an instance of torch.optim._LRScheduler type
warmup_iters (int | list): Warmup iterations
warmup_ratio (float): Lr used at the beginning of warmup equals to warmup_ratio * initial_lr
last_epoch (int): The index of last epoch.
"""

def __init__(self,
base_scheduler,
warmup_iters,
warmup_ratio=0.1,
last_epoch=-1):
self.warmup_ratio = warmup_ratio
super(ExponentialWarmup, self).__init__(
base_scheduler, warmup_iters=warmup_iters, last_epoch=last_epoch)

def get_warmup_scale(self, cur_iter):
k = self.warmup_ratio**(1 - cur_iter / self.warmup_iters)
return k

+ 4
- 0
modelscope/trainers/optimizer/__init__.py View File

@@ -0,0 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .builder import OPTIMIZERS, build_optimizer

__all__ = ['OPTIMIZERS', 'build_optimizer']

+ 39
- 0
modelscope/trainers/optimizer/builder.py View File

@@ -0,0 +1,39 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import inspect

import torch

from modelscope.utils.config import ConfigDict
from modelscope.utils.registry import Registry, build_from_cfg, default_group

OPTIMIZERS = Registry('optimizer')


def build_optimizer(model: torch.nn.Module,
cfg: ConfigDict,
default_args: dict = None):
""" build optimizer from optimizer config dict

Args:
cfg (:obj:`ConfigDict`): config dict for optimizer object.
default_args (dict, optional): Default initialization arguments.
"""
if hasattr(model, 'module'):
model = model.module
cfg.params = model.parameters()

return build_from_cfg(
cfg, OPTIMIZERS, group_key=default_group, default_args=default_args)


def register_torch_optimizers():
for name, module in inspect.getmembers(torch.optim):
if name.startswith('__'):
continue
if inspect.isclass(module) and issubclass(module,
torch.optim.Optimizer):
OPTIMIZERS.register_module(
default_group, module_name=name, module_cls=module)


register_torch_optimizers()

+ 703
- 0
modelscope/trainers/trainer.py View File

@@ -0,0 +1,703 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path
import random
import time
from distutils.version import LooseVersion
from functools import partial
from typing import Callable, List, Optional, Tuple, Union

import numpy as np
import torch
from addict import Dict
from torch import distributed as dist
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.distributed import DistributedSampler

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.metrics import build_metric, task_default_metrics
from modelscope.models.base import Model
from modelscope.models.base_torch import TorchModel
from modelscope.msdatasets.ms_dataset import MsDataset
from modelscope.preprocessors import build_preprocessor
from modelscope.preprocessors.base import Preprocessor
from modelscope.task_datasets import TorchTaskDataset, build_task_dataset
from modelscope.trainers.hooks.builder import HOOKS
from modelscope.trainers.hooks.priority import Priority, get_priority
from modelscope.trainers.lrscheduler.builder import build_lr_scheduler
from modelscope.trainers.optimizer.builder import build_optimizer
from modelscope.utils.config import ConfigDict
from modelscope.utils.constant import Hubs, ModelFile, Tasks
from modelscope.utils.logger import get_logger
from modelscope.utils.registry import build_from_cfg
from modelscope.utils.tensor_utils import torch_default_data_collator
from modelscope.utils.torch_utils import get_dist_info
from .base import BaseTrainer
from .builder import TRAINERS
from .hooks.hook import Hook


@TRAINERS.register_module()
class EpochBasedTrainer(BaseTrainer):
"""Epoch based Trainer, a training helper for PyTorch.

Args:
cfg_file(str): The local config file.
model (:obj:`torch.nn.Module` or :obj:`TorchModel` or `str`): The model to be run, or a valid model dir
or a model id. If model is None, build_model method will be called.
data_collator (`Callable`, *optional*):
The function to use to form a batch from a list of elements of `train_dataset` or `eval_dataset`.
train_dataset (`torch.utils.data.Dataset` or `torch.utils.data.IterableDataset`, *optional*):
The dataset to use for training.

Note that if it's a `torch.utils.data.IterableDataset` with some randomization and you are training in a
distributed fashion, your iterable dataset should either use a internal attribute `generator` that is a
`torch.Generator` for the randomization that must be identical on all processes (and the Trainer will
manually set the seed of this `generator` at each epoch) or have a `set_epoch()` method that internally
sets the seed of the RNGs used.
eval_dataset (`torch.utils.data.Dataset`, *optional*): The dataset to use for evaluation.
preprocessor (:obj:`Preprocessor`, *optional*): The optional preprocessor.
NOTE: If the preprocessor has been called before the dataset fed into this trainer by user's custom code,
this parameter should be None, meanwhile remove the 'preprocessor' key from the cfg_file.
Else the preprocessor will be instantiated from the cfg_file or assigned from this parameter and
this preprocessing action will be executed every time the dataset's __getitem__ is called.
optimizers (`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler._LRScheduler]`, *optional*): A tuple
containing the optimizer and the scheduler to use.
max_epochs: (int, optional): Total training epochs.
"""

def __init__(
self,
model: Optional[Union[TorchModel, nn.Module, str]] = None,
cfg_file: Optional[str] = None,
arg_parse_fn: Optional[Callable] = None,
data_collator: Optional[Callable] = None,
train_dataset: Optional[Dataset] = None,
eval_dataset: Optional[Dataset] = None,
preprocessor: Optional[Preprocessor] = None,
optimizers: Tuple[torch.optim.Optimizer,
torch.optim.lr_scheduler._LRScheduler] = (None,
None),
**kwargs):
if isinstance(model, str):
if os.path.exists(model):
self.model_dir = model if os.path.isdir(
model) else os.path.dirname(model)
else:
self.model_dir = snapshot_download(model)
cfg_file = os.path.join(self.model_dir, ModelFile.CONFIGURATION)
self.model = self.build_model()
else:
assert cfg_file is not None, 'Config file should not be None if model is an nn.Module class'
assert isinstance(
model,
(TorchModel, nn.Module
)), 'model should be either str, TorchMode or nn.Module.'
self.model_dir = os.path.dirname(cfg_file)
self.model = model

super().__init__(cfg_file, arg_parse_fn)
if 'work_dir' in kwargs:
self.work_dir = kwargs['work_dir']
else:
self.work_dir = self.cfg.train.get('work_dir', './work_dir')

self.preprocessor = None
if isinstance(preprocessor, Preprocessor):
self.preprocessor = preprocessor
elif hasattr(self.cfg, 'preprocessor'):
self.preprocessor = self.build_preprocessor()
# TODO @wenmeng.zwm add data collator option
# TODO how to fill device option?
self.device = int(
os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else None
self.train_dataset = self.to_task_dataset(
train_dataset, mode='train', preprocessor=self.preprocessor)
self.eval_dataset = self.to_task_dataset(
eval_dataset, mode='eval', preprocessor=self.preprocessor)
self.data_collator = data_collator if data_collator is not None else torch_default_data_collator
self.metrics = self.get_metrics()
self.optimizers = optimizers
self.logger = get_logger(log_level=self.cfg.get('log_level', 'INFO'))
self._mode = 'train'
self._hooks: List[Hook] = []
self._epoch = 0
self._iter = 0
self._inner_iter = 0
if 'max_epochs' not in kwargs:
assert hasattr(
self.cfg.train,
'max_epochs'), 'max_epochs is missing in configuration file'
self._max_epochs = self.cfg.train.max_epochs
else:
self._max_epochs = kwargs['max_epochs']

# TODO @wenmeng.zwm add seed init fn
self._seed = 0

self._dist = get_dist_info()[1] > 1

@property
def mode(self):
return self._mode

@property
def hooks(self) -> List[Hook]:
"""list[:obj:`Hook`]: A list of registered hooks."""
return self._hooks

@property
def epoch(self) -> int:
"""int: Current epoch."""
return self._epoch

@property
def iter(self) -> int:
"""int: Current iteration."""
return self._iter

@property
def inner_iter(self) -> int:
"""int: Iteration in an epoch."""
return self._inner_iter

@property
def max_epochs(self):
"""int: Maximum training epochs."""
return self._max_epochs

@property
def max_iters(self):
"""int: Maximum training iterations."""
return self._max_epochs * len(self.data_loader)

def to_task_dataset(self,
datasets: Tuple[Dataset, List[Dataset]],
mode: str,
preprocessor: Optional[Preprocessor] = None):
"""Build the task specific dataset processor for this trainer.

Returns: The task dataset processor for the task. If no result for the very model-type and task,
the default TaskDataset will be returned.
"""
try:
if not datasets:
return datasets
if isinstance(datasets, TorchTaskDataset):
return datasets
task_dataset = build_task_dataset(
ConfigDict({
**self.cfg.model,
'mode': mode,
'preprocessor': preprocessor,
'datasets': datasets,
}), getattr(self.cfg, 'task', None))
return task_dataset
except Exception:
if isinstance(datasets, (List, Tuple)) or preprocessor is not None:
return TorchTaskDataset(
datasets,
mode=mode,
preprocessor=preprocessor,
**(self.cfg.model if hasattr(self.cfg, 'model') else {}))
else:
return datasets

def build_preprocessor(self) -> Preprocessor:
"""Build the preprocessor.

User can override this method to implement custom logits.

Returns: The preprocessor instance.

"""
# TODO @wenmeng.zwm @jiangnana.jnn add support for different preprocessor
# when they are different ones in training and evaluation
cfg = ConfigDict({
**getattr(self.cfg, 'preprocessor'), 'model_dir':
self.model_dir
})
return build_preprocessor(cfg, Tasks.find_field_by_task(self.cfg.task))

def get_metrics(self) -> List[str]:
"""Get the metric class types.

The first choice will be the metrics configured in the config file, if not found, the default metrics will be
used.
If no metrics is found and the eval dataset exists, the method will raise an error.

Returns: The metric types.

"""
metrics = self.cfg.evaluation.metrics if hasattr(
self.cfg, 'evaluation') and hasattr(self.cfg.evaluation,
'metrics') else None
metrics = metrics if metrics is not None else task_default_metrics.get(
self.cfg.task)
if metrics is None and self.eval_dataset is not None:
raise ValueError(
f'Metrics are needed in evaluation, please try to either '
f'add metrics in configuration.json or add the default metric for {self.cfg.task}.'
)
if isinstance(metrics, str):
metrics = [metrics]
return metrics

def train(self, *args, **kwargs):
self.model.train()
self._mode = 'train'

if self.train_dataset is None:
self.train_dataloader = self.get_train_dataloader()
else:
self.train_dataloader = self._build_dataloader_with_dataset(
self.train_dataset, **self.cfg.train.get('dataloader', {}))
self.data_loader = self.train_dataloader

self.register_optimizers_hook()
self.register_hook_from_cfg(self.cfg.train.hooks)

self.train_loop(self.train_dataloader)

def evaluate(self, checkpoint_path=None):
self.model.eval()
self._mode = 'val'

if self.eval_dataset is None:
self.eval_dataloader = self.get_eval_data_loader()
else:
self.eval_dataloader = self._build_dataloader_with_dataset(
self.eval_dataset, **self.cfg.evaluation.get('dataloader', {}))
self.data_loader = self.eval_dataloader
metric_classes = [build_metric(metric) for metric in self.metrics]
self.evaluation_loop(self.eval_dataloader, checkpoint_path,
metric_classes)

metric_values = {}
for metric_cls in metric_classes:
metric_values.update(metric_cls.evaluate())
return metric_values

def build_model(self) -> Union[nn.Module, TorchModel]:
""" Instantiate a pytorch model and return.

By default, we will create a model using config from configuration file. You can
subclass and override this method in a subclass.

"""
# TODO temp implementation, waiting for @zhangzhicheng
model = Model.from_pretrained(self.model_dir)
if not isinstance(model, nn.Module) and hasattr(model, 'model'):
return model.model

def collate_fn(self, data):
"""Prepare the input just before the forward function.
This method will move the tensors to the right device.
Usually this method does not need to be overridden.

Args:
data: The data out of the dataloader.

Returns: The processed data.

"""
if isinstance(data, dict):
return type(data)({k: self.collate_fn(v) for k, v in data.items()})
elif isinstance(data, (tuple, np.ndarray, list)):
return type(data)(self.collate_fn(v) for v in data)
elif isinstance(data, torch.Tensor) and self.device is not None:
kwargs = dict(device=self.device)
return data.to(**kwargs)
return data

def train_step(self, model, inputs):
""" Perform a training step on a batch of inputs.

Subclass and override to inject custom behavior.

Args:
model (`TorchModel`): The model to train.
inputs (`Dict[str, Union[torch.Tensor, Any]]`):
The inputs and targets of the model.

The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
argument `labels`. Check your model's documentation for all accepted arguments.

Return:
`torch.Tensor`: The tensor with training loss on this batch.
"""
# EvaluationHook will do evaluate and change mode to val, return to train mode
# TODO: find more pretty way to change mode
model.train()
self._mode = 'train'
inputs = self.collate_fn(inputs)
if isinstance(inputs, dict):
train_outputs = model.forward(**inputs)
else:
train_outputs = model.forward(inputs)

if not isinstance(train_outputs, dict):
raise TypeError(
'"model.train_step()" and "model.val_step()" must return a dict'
)

# add model output info to log
if 'log_vars' not in train_outputs:
default_keys_pattern = ['loss']
match_keys = set([])
for key_p in default_keys_pattern:
match_keys.update(
[key for key in train_outputs.keys() if key_p in key])

log_vars = {}
for key in match_keys:
value = train_outputs.get(key, None)
if value is not None:
if dist.is_available() and dist.is_initialized():
value = value.data.clone()
dist.all_reduce(value.div_(dist.get_world_size()))
log_vars.update({key: value.item()})
self.log_buffer.update(log_vars)
else:
self.log_buffer.update(train_outputs['log_vars'])

self.train_outputs = train_outputs

def prediction_step(self, model, inputs):
""" Perform forward step by `model` using `inputs`.

Args:
model (`TorchModel`): The model to evaluate.
inputs (`Dict[str, Union[torch.Tensor, Any]]`):
The inputs and targets of the model.

The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
argument `labels`. Check your model's documentation for all accepted arguments.
prediction_loss_only (`bool`):
Whether or not to return the loss only.
ignore_keys (`Lst[str]`, *optional*):
A list of keys in the output of your model (if it is a dictionary) that should be ignored when
gathering predictions.

Return:
Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss,
logits and labels (each being optional).
"""
raise NotImplementedError

def get_train_dataloader(self):
""" Builder torch dataloader for training.

We provide a reasonable default that works well. If you want to use something else, you can change
the config for data.train in configuration file, or subclass and override this method
(or `get_train_dataloader` in a subclass.
"""
train_data = self.cfg.dataset.train
if self.train_dataset is None:
self.train_dataset = self.build_dataset(train_data, mode='train')

data_loader = self._build_dataloader_with_dataset(
self.train_dataset, **self.cfg.train.get('dataloader', {}))
return data_loader

def get_eval_data_loader(self):
""" Builder torch dataloader for evaluation.

We provide a reasonable default that works well. If you want to use something else, you can change
the config for dataset.eval in configuration file, or subclass and override this method in a subclass.
pass
"""
val_data = self.cfg.dataset.val
if self.eval_dataset is None:
self.eval_dataset = self.build_dataset(val_data, mode='eval')

batch_size = self.cfg.evaluation.batch_size
workers = self.cfg.evaluation.workers
shuffle = self.cfg.evaluation.get('shuffle', False)
data_loader = self._build_dataloader_with_dataset(
self.eval_dataset,
batch_size_per_gpu=batch_size,
workers_per_gpu=workers,
shuffle=shuffle,
dist=self._dist,
seed=self._seed,
persistent_workers=True,
)
return data_loader

def build_dataset(self, data_cfg, mode):
""" Build torch dataset object using data config
"""
dataset = MsDataset.load(
dataset_name=data_cfg.name,
split=data_cfg.split,
subset_name=data_cfg.subset_name if hasattr(
data_cfg, 'subset_name') else None,
hub=data_cfg.hub if hasattr(data_cfg, 'hub') else Hubs.modelscope,
)
torch_dataset = dataset.to_torch_dataset(
preprocessors=self.preprocessor, )
dataset = self.to_task_dataset(
torch_dataset, mode, preprocessor=self.preprocessor)
return dataset

def create_optimizer_and_scheduler(self):
""" Create optimizer and lr scheduler

We provide a default implementation, if you want to customize your own optimizer
and lr scheduler, you can either pass a tuple through trainer init function or
subclass this class and override this method.


"""
optimizer, lr_scheduler = self.optimizers
if optimizer is None:
optimizer_cfg = self.cfg.train.get('optimizer', None)
else:
optimizer_cfg = None

optim_options = {}
if optimizer_cfg is not None:
optim_options = optimizer_cfg.pop('options', {})
optimizer = build_optimizer(self.model, cfg=optimizer_cfg)

if lr_scheduler is None:
lr_scheduler_cfg = self.cfg.train.get('lr_scheduler', None)
else:
lr_scheduler_cfg = None

lr_options = {}
if lr_scheduler_cfg is not None:
assert optimizer is not None
lr_options = lr_scheduler_cfg.pop('options', {})
lr_scheduler = build_lr_scheduler(
cfg=lr_scheduler_cfg, default_args={'optimizer': optimizer})

self.optimizer = optimizer
self.lr_scheduler = lr_scheduler
return self.optimizer, self.lr_scheduler, optim_options, lr_options

def register_optimizers_hook(self):
""" Register optimizer hook and lr scheduler hook.
"""
optimizer, lr_scheduler = self.optimizers
opti_error_msg = 'optimizers should be a tuple of `torch.optim.Optimizer`'\
' and `torch.optim.lr_scheduler._LRScheduler`'
if optimizer is not None:
assert isinstance(optimizer, torch.optim.Optimizer), opti_error_msg
if lr_scheduler is not None:
assert isinstance(
lr_scheduler,
torch.optim.lr_scheduler._LRScheduler), opti_error_msg

_, _, optim_options, lr_options = self.create_optimizer_and_scheduler()
lr_hook = dict(type='LrSchedulerHook', **lr_options)
optim_hook = dict(type='OptimizerHook', **optim_options)

self.register_hook_from_cfg([lr_hook, optim_hook])

def _build_dataloader_with_dataset(self,
dataset: Dataset,
batch_size_per_gpu: int,
workers_per_gpu: int,
dist: bool = False,
shuffle: bool = True,
seed: int = 0,
persistent_workers=False,
**kwargs) -> DataLoader:
"""Build dataloader using input dataset and cfg. Used by `EpochBasedTrainer.train()`
and `EpochBasedTrainer.evaluate()`.

In distributed training, each GPU/process has a dataloader.
In non-distributed training, there is only one dataloader for all GPUs.

Args:
dataset (Dataset): A PyTorch dataset.
batch_size_per_gpu (int): Number of training samples on each GPU, i.e.,
batch size of each GPU.
workers_per_gpu (int): How many subprocesses to use for data loading
for each GPU.
dist (bool): Distributed training/test or not. Default: True.
shuffle (bool): Whether to shuffle the data at every epoch.
Default: True.
seed (int, Optional): Seed to be used. Default: 0.
runner_type (str): Type of runner. Default: `EpochBasedRunner`
persistent_workers (bool): If True, the data loader will not shutdown
the worker processes after a dataset has been consumed once.
This allows to maintain the workers `Dataset` instances alive.
This argument is only valid when PyTorch>=1.7.0. Default: False.
kwargs: any keyword argument to be used to initialize DataLoader

Returns:
DataLoader: A PyTorch dataloader.
"""
rank, world_size = get_dist_info()

if dist:
# When model is :obj:`DistributedDataParallel`,
# `batch_size` of :obj:`dataloader` is the
# number of training samples on each GPU.
batch_size = batch_size_per_gpu
num_workers = workers_per_gpu
else:
batch_size = batch_size_per_gpu
num_workers = workers_per_gpu

if dist:
sampler = DistributedSampler(
dataset, world_size, rank, shuffle=shuffle, seed=seed)
else:
sampler = None

batch_sampler = None

init_fn = partial(
worker_init_fn, num_workers=num_workers, rank=rank,
seed=seed) if seed is not None else None

if LooseVersion(torch.__version__) >= LooseVersion('1.7.0'):
kwargs['persistent_workers'] = persistent_workers
elif persistent_workers is True:
self.logger.warning(
'persistent_workers is invalid because your pytorch '
'version is lower than 1.7.0')

data_loader = DataLoader(
dataset,
batch_size=batch_size,
sampler=sampler,
num_workers=num_workers,
batch_sampler=batch_sampler,
collate_fn=self.data_collator,
pin_memory=kwargs.pop('pin_memory', False),
worker_init_fn=init_fn,
**kwargs)

return data_loader

def train_loop(self, data_loader):
""" Training loop used by `EpochBasedTrainer.train()`
"""
self.invoke_hook('before_run')
self._epoch = 0
kwargs = {}
for _ in range(self._epoch, self._max_epochs):
self.invoke_hook('before_train_epoch')
time.sleep(2) # Prevent possible deadlock during epoch transition
for i, data_batch in enumerate(data_loader):
self.data_batch = data_batch
self._inner_iter = i
self.invoke_hook('before_train_iter')
self.train_step(self.model, data_batch, **kwargs)
self.invoke_hook('after_train_iter')
del self.data_batch
self._iter += 1

self.invoke_hook('after_train_epoch')
self._epoch += 1

time.sleep(1) # wait for some hooks like loggers to finish
self.invoke_hook('after_run')

def evaluation_loop(self, data_loader, checkpoint_path, metric_classes):
""" Evaluation loop used by `EpochBasedTrainer.evaluate()`.

"""
if self._dist:
from modelscope.trainers.utils.inference import multi_gpu_test
multi_gpu_test(
self.model,
data_loader,
tmpdir=None,
gpu_collect=False,
data_collate_fn=self.collate_fn,
metric_classes=metric_classes)
else:
from modelscope.trainers.utils.inference import single_gpu_test
single_gpu_test(
self.model,
data_loader,
data_collate_fn=self.collate_fn,
metric_classes=metric_classes)

def register_hook(self, hook: Hook) -> None:
"""Register a hook into the hook list.

The hook will be inserted into a priority queue, with the specified
priority (See :class:`Priority` for details of priorities).
For hooks with the same priority, they will be triggered in the same
order as they are registered.

Args:
hook (:obj:`Hook`): The hook to be registered.
"""
assert isinstance(hook, Hook)
# insert the hook to a sorted list
inserted = False
for i in range(len(self._hooks) - 1, -1, -1):
if get_priority(hook.PRIORITY) > get_priority(
self._hooks[i].PRIORITY):
self._hooks.insert(i + 1, hook)
inserted = True
break
if not inserted:
self._hooks.insert(0, hook)

def register_hook_from_cfg(self, hook_cfg: Dict) -> None:
"""Register a hook from its cfg.

Args:
hook_cfg (dict): Hook config. It should have at least keys 'type'
and 'priority' indicating its type and priority.

Note:
The specific hook class to register should not use 'type' and
'priority' arguments during initialization.
"""
hook_cfg = hook_cfg.copy()
assert isinstance(hook_cfg, list)
for cfg_i in hook_cfg:
hook = build_from_cfg(cfg_i, HOOKS)
self.register_hook(hook)

def invoke_hook(self, fn_name: str) -> None:
"""Call all hooks.

Args:
fn_name (str): The function name in each hook to be called, such as
"before_train_epoch".
"""
for hook in self._hooks:
getattr(hook, fn_name)(self)

def get_hook_info(self) -> str:
# Get hooks info in each stage
stage_hook_map: Dict[str, list] = {stage: [] for stage in Hook.stages}
for hook in self.hooks:
try:
priority = Priority(hook.priority).name # type: ignore
except ValueError:
priority = hook.priority # type: ignore
classname = hook.__class__.__name__
hook_info = f'({priority:<12}) {classname:<35}'
for trigger_stage in hook.get_triggered_stages():
stage_hook_map[trigger_stage].append(hook_info)

stage_hook_infos = []
for stage in Hook.stages:
hook_infos = stage_hook_map[stage]
if len(hook_infos) > 0:
info = f'{stage}:\n'
info += '\n'.join(hook_infos)
info += '\n -------------------- '
stage_hook_infos.append(info)
return '\n'.join(stage_hook_infos)


def worker_init_fn(worker_id, num_workers, rank, seed):
# The seed of each worker equals to
# num_worker * rank + worker_id + user_seed
worker_seed = num_workers * rank + worker_id + seed
np.random.seed(worker_seed)
random.seed(worker_seed)
torch.manual_seed(worker_seed)

+ 208
- 0
modelscope/trainers/utils/inference.py View File

@@ -0,0 +1,208 @@
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import pickle
import shutil
import tempfile
import time

import torch
from torch import distributed as dist
from tqdm import tqdm

from modelscope.utils.torch_utils import get_dist_info


def single_gpu_test(model,
data_loader,
data_collate_fn=None,
metric_classes=None):
"""Test model with a single gpu.

Args:
data_collate_fn: An optional data_collate_fn before fed into the model
model (nn.Module): Model to be tested.
data_loader (nn.Dataloader): Pytorch data loader.
metric_classes(List): List of Metric class that uses to collect metrics

Returns:
list: The prediction results.
"""
model.eval()
dataset = data_loader.dataset
with tqdm(total=len(dataset), desc='test samples') as pbar:
for data in data_loader:
if data_collate_fn is not None:
data = data_collate_fn(data)
with torch.no_grad():
result = model(**data)
if metric_classes is not None:
for metric_cls in metric_classes:
metric_cls.add(result, data)

batch_size = len(result)
for _ in range(batch_size):
pbar.update()


def multi_gpu_test(model,
data_loader,
tmpdir=None,
gpu_collect=False,
data_collate_fn=None,
metric_classes=None):
"""Test model with multiple gpus.

This method tests model with multiple gpus and collects the results
under two different modes: gpu and cpu modes. By setting
``gpu_collect=True``, it encodes results to gpu tensors and use gpu
communication for results collection. On cpu mode it saves the results on
different gpus to ``tmpdir`` and collects them by the rank 0 worker.

Args:
model (nn.Module): Model to be tested.
data_loader (nn.Dataloader): Pytorch data loader.
data_collate_fn: An optional data_collate_fn before fed into the model
tmpdir (str): Path of directory to save the temporary results from
different gpus under cpu mode.
gpu_collect (bool): Option to use either gpu or cpu to collect results.
metric_classes(List): List of Metric class that uses to collect metrics

Returns:
list: The prediction results.
"""
model.eval()
results = []
dataset = data_loader.dataset

time.sleep(2) # This line can prevent deadlock problem in some cases.

count = 0
with tqdm(total=len(dataset), desc='test samples with multi gpus') as pbar:
for _, data in enumerate(data_loader):
if data_collate_fn is not None:
data = data_collate_fn(data)
with torch.no_grad():
result = model(**data)
results.extend(result)

rank, world_size = get_dist_info()
if rank == 0:
batch_size = len(result)
batch_size_all = batch_size * world_size
count += batch_size_all
if count > len(dataset):
batch_size_all = len(dataset) - (count - batch_size_all)
for _ in range(batch_size_all):
pbar.update()

# collect results from all ranks
if gpu_collect:
results = collect_results_gpu(results, len(dataset))
else:
results = collect_results_cpu(results, len(dataset), tmpdir)
ground_truths = [dataset[i] for i in range(len(dataset))]
if metric_classes is not None:
for metric_cls in metric_classes:
metric_cls.add(results, ground_truths)


def collect_results_cpu(result_part, size, tmpdir=None):
"""Collect results under cpu mode.

On cpu mode, this function will save the results on different gpus to
``tmpdir`` and collect them by the rank 0 worker.

Args:
result_part (list): Result list containing result parts
to be collected.
size (int): Size of the results, commonly equal to length of
the results.
tmpdir (str | None): temporal directory for collected results to
store. If set to None, it will create a random temporal directory
for it.

Returns:
list: The collected results.
"""
rank, world_size = get_dist_info()
# TODO create a random tmp dir if it is not specified
if tmpdir is None:
tmpdir = tempfile.gettempdir()
if not os.path.exists(tmpdir):
os.makedirs(tmpdir)
# dump the part result to the dir
pickle.dump(result_part, os.path.join(tmpdir, f'part_{rank}.pkl'))
dist.barrier()
# collect all parts
if rank != 0:
return None
else:
# load results of all parts from tmp dir
part_list = []
for i in range(world_size):
part_file = os.path.join(tmpdir, f'part_{i}.pkl')
part_result = pickle.load(part_file)
# When data is severely insufficient, an empty part_result
# on a certain gpu could makes the overall outputs empty.
if part_result:
part_list.append(part_result)
# sort the results
ordered_results = []
for res in zip(*part_list):
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
# remove tmp dir
shutil.rmtree(tmpdir)
return ordered_results


def collect_results_gpu(result_part, size):
"""Collect results under gpu mode.

On gpu mode, this function will encode results to gpu tensors and use gpu
communication for results collection.

Args:
result_part (list): Result list containing result parts
to be collected.
size (int): Size of the results, commonly equal to length of
the results.

Returns:
list: The collected results.
"""
rank, world_size = get_dist_info()
# dump result part to tensor with pickle
part_tensor = torch.tensor(
bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
# gather all result part tensor shape
shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
shape_list = [shape_tensor.clone() for _ in range(world_size)]
dist.all_gather(shape_list, shape_tensor)
# padding result part tensor to max length
shape_max = torch.tensor(shape_list).max()
part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
part_send[:shape_tensor[0]] = part_tensor
part_recv_list = [
part_tensor.new_zeros(shape_max) for _ in range(world_size)
]
# gather all result part
dist.all_gather(part_recv_list, part_send)

if rank == 0:
part_list = []
for recv, shape in zip(part_recv_list, shape_list):
part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())
# When data is severely insufficient, an empty part_result
# on a certain gpu could makes the overall outputs empty.
if part_result:
part_list.append(part_result)
# sort the results
ordered_results = []
for res in zip(*part_list):
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
return ordered_results

+ 42
- 0
modelscope/trainers/utils/log_buffer.py View File

@@ -0,0 +1,42 @@
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) Alibaba, Inc. and its affiliates.
from collections import OrderedDict

import numpy as np


class LogBuffer:

def __init__(self):
self.val_history = OrderedDict()
self.n_history = OrderedDict()
self.output = OrderedDict()
self.ready = False

def clear(self) -> None:
self.val_history.clear()
self.n_history.clear()
self.clear_output()

def clear_output(self) -> None:
self.output.clear()
self.ready = False

def update(self, vars: dict, count: int = 1) -> None:
assert isinstance(vars, dict)
for key, var in vars.items():
if key not in self.val_history:
self.val_history[key] = []
self.n_history[key] = []
self.val_history[key].append(var)
self.n_history[key].append(count)

def average(self, n: int = 0) -> None:
"""Average latest n values or all values."""
assert n >= 0
for key in self.val_history:
values = np.array(self.val_history[key][-n:])
nums = np.array(self.n_history[key][-n:])
avg = np.sum(values * nums) / np.sum(nums)
self.output[key] = avg
self.ready = True

+ 74
- 0
modelscope/utils/checkpoint.py View File

@@ -0,0 +1,74 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import io
import time
from collections import OrderedDict
from typing import Optional

import torch
from torch.optim import Optimizer

from modelscope import __version__
from modelscope.fileio import File


def weights_to_cpu(state_dict):
"""Copy a model state_dict to cpu.

Args:
state_dict (OrderedDict): Model weights on GPU.

Returns:
OrderedDict: Model weights on GPU.
"""
state_dict_cpu = OrderedDict()
for key, val in state_dict.items():
state_dict_cpu[key] = val.cpu()
# Keep metadata in state_dict
state_dict_cpu._metadata = getattr(state_dict, '_metadata', OrderedDict())
return state_dict_cpu


def save_checkpoint(model: torch.nn.Module,
filename: str,
optimizer: Optional[Optimizer] = None,
meta: Optional[dict] = None) -> None:
"""Save checkpoint to file.

The checkpoint will have 3 fields: ``meta``, ``state_dict`` and
``optimizer``. By default ``meta`` will contain version and time info.

Args:
model (Module): Module whose params are to be saved.
filename (str): Checkpoint filename.
optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
meta (dict, optional): Metadata to be saved in checkpoint.
"""
if meta is None:
meta = {}
elif not isinstance(meta, dict):
raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
meta.update(modescope=__version__, time=time.asctime())

if isinstance(model, torch.nn.parallel.DistributedDataParallel):
model = model.module

if hasattr(model, 'CLASSES') and model.CLASSES is not None:
# save class name to the meta
meta.update(CLASSES=model.CLASSES)

checkpoint = {
'meta': meta,
'state_dict': weights_to_cpu(model.state_dict())
}
# save optimizer state dict in the checkpoint
if isinstance(optimizer, Optimizer):
checkpoint['optimizer'] = optimizer.state_dict()
elif isinstance(optimizer, dict):
checkpoint['optimizer'] = {}
for name, optim in optimizer.items():
checkpoint['optimizer'][name] = optim.state_dict()

with io.BytesIO() as f:
torch.save(checkpoint, f)
File.write(f.getvalue(), filename)

+ 49
- 6
modelscope/utils/constant.py View File

@@ -13,12 +13,7 @@ class Fields(object):
multi_modal = 'multi-modal' multi_modal = 'multi-modal'




class Tasks(object):
""" Names for tasks supported by modelscope.

Holds the standard task name to use for identifying different tasks.
This should be used to register models, pipelines, trainers.
"""
class CVTasks(object):
# vision tasks # vision tasks
image_to_text = 'image-to-text' image_to_text = 'image-to-text'
pose_estimation = 'pose-estimation' pose_estimation = 'pose-estimation'
@@ -33,6 +28,8 @@ class Tasks(object):
action_recognition = 'action-recognition' action_recognition = 'action-recognition'
video_embedding = 'video-embedding' video_embedding = 'video-embedding'



class NLPTasks(object):
# nlp tasks # nlp tasks
word_segmentation = 'word-segmentation' word_segmentation = 'word-segmentation'
nli = 'nli' nli = 'nli'
@@ -56,11 +53,15 @@ class Tasks(object):
question_answering = 'question-answering' question_answering = 'question-answering'
zero_shot_classification = 'zero-shot-classification' zero_shot_classification = 'zero-shot-classification'



class AudioTasks(object):
# audio tasks # audio tasks
auto_speech_recognition = 'auto-speech-recognition' auto_speech_recognition = 'auto-speech-recognition'
text_to_speech = 'text-to-speech' text_to_speech = 'text-to-speech'
speech_signal_process = 'speech-signal-process' speech_signal_process = 'speech-signal-process'



class MultiModalTasks(object):
# multi-modal tasks # multi-modal tasks
image_captioning = 'image-captioning' image_captioning = 'image-captioning'
visual_grounding = 'visual-grounding' visual_grounding = 'visual-grounding'
@@ -69,6 +70,47 @@ class Tasks(object):
visual_question_answering = 'visual-question-answering' visual_question_answering = 'visual-question-answering'




class Tasks(CVTasks, NLPTasks, AudioTasks, MultiModalTasks):
""" Names for tasks supported by modelscope.

Holds the standard task name to use for identifying different tasks.
This should be used to register models, pipelines, trainers.
"""

reverse_field_index = {}

@staticmethod
def find_field_by_task(task_name):
if len(Tasks.reverse_field_index) == 0:
# Lazy init, not thread safe
field_dict = {
Fields.cv: [
getattr(Tasks, attr) for attr in dir(CVTasks)
if not attr.startswith('__')
],
Fields.nlp: [
getattr(Tasks, attr) for attr in dir(NLPTasks)
if not attr.startswith('__')
],
Fields.audio: [
getattr(Tasks, attr) for attr in dir(AudioTasks)
if not attr.startswith('__')
],
Fields.multi_modal: [
getattr(Tasks, attr) for attr in dir(MultiModalTasks)
if not attr.startswith('__')
],
}

for field, tasks in field_dict.items():
for task in tasks:
if task in Tasks.reverse_field_index:
raise ValueError(f'Duplicate task: {task}')
Tasks.reverse_field_index[task] = field

return Tasks.reverse_field_index.get(task_name)


class InputFields(object): class InputFields(object):
""" Names for input data fields in the input data for pipelines """ Names for input data fields in the input data for pipelines
""" """
@@ -100,6 +142,7 @@ class ModelFile(object):
TF_CKPT_PREFIX = 'ckpt-' TF_CKPT_PREFIX = 'ckpt-'
TORCH_MODEL_FILE = 'pytorch_model.pt' TORCH_MODEL_FILE = 'pytorch_model.pt'
TORCH_MODEL_BIN_FILE = 'pytorch_model.bin' TORCH_MODEL_BIN_FILE = 'pytorch_model.bin'
LABEL_MAPPING = 'label_mapping.json'




class Requirements(object): class Requirements(object):


+ 13
- 0
modelscope/utils/hub.py View File

@@ -86,3 +86,16 @@ def get_model_type(model_dir):
return cfg.model_type if hasattr(cfg, 'model_type') else None return cfg.model_type if hasattr(cfg, 'model_type') else None
except Exception as e: except Exception as e:
logger.error(f'parse config file failed with error: {e}') logger.error(f'parse config file failed with error: {e}')


def parse_label_mapping(model_dir):
import os
import json
label2id = None
label_path = os.path.join(model_dir, ModelFile.LABEL_MAPPING)
if os.path.exists(label_path):
with open(label_path) as f:
label_mapping = json.load(f)
label2id = {name: idx for name, idx in label_mapping.items()}

return label2id

+ 32
- 0
modelscope/utils/import_utils.py View File

@@ -54,6 +54,38 @@ def import_modules_from_file(py_file: str):
return module_name, mod return module_name, mod




def is_method_overridden(method, base_class, derived_class):
"""Check if a method of base class is overridden in derived class.

Args:
method (str): the method name to check.
base_class (type): the class of the base class.
derived_class (type | Any): the class or instance of the derived class.
"""
assert isinstance(base_class, type), \
"base_class doesn't accept instance, Please pass class instead."

if not isinstance(derived_class, type):
derived_class = derived_class.__class__

base_method = getattr(base_class, method)
derived_method = getattr(derived_class, method)
return derived_method != base_method


def has_method(obj: object, method: str) -> bool:
"""Check whether the object has a method.

Args:
method (str): The method name to check.
obj (object): The object to check.

Returns:
bool: True if the object has the method else False.
"""
return hasattr(obj, method) and callable(getattr(obj, method))


def import_modules(imports, allow_failed_imports=False): def import_modules(imports, allow_failed_imports=False):
"""Import modules from the given list of strings. """Import modules from the given list of strings.




+ 77
- 0
modelscope/utils/tensor_utils.py View File

@@ -0,0 +1,77 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Part of the implementation is borrowed from huggingface/transformers.


def torch_nested_numpify(tensors):
import torch
"Numpify `tensors` (even if it's a nested list/tuple of tensors)."
if isinstance(tensors, (list, tuple)):
return type(tensors)(torch_nested_numpify(t) for t in tensors)
if isinstance(tensors, torch.Tensor):
t = tensors.cpu()
return t.numpy()
return tensors


def torch_nested_detach(tensors):
import torch
"Detach `tensors` (even if it's a nested list/tuple of tensors)."
if isinstance(tensors, (list, tuple)):
return type(tensors)(torch_nested_detach(t) for t in tensors)
if isinstance(tensors, torch.Tensor):
return tensors.detach()
return tensors


def torch_default_data_collator(features):
# TODO @jiangnana.jnn refine this default data collator
import torch

# if not isinstance(features[0], (dict, BatchEncoding)):
# features = [vars(f) for f in features]
first = features[0]

if isinstance(first, dict):
batch = {}
# Special handling for labels.
# Ensure that tensor is created with the correct type
# (it should be automatically the case, but let's make sure of it.)
if 'label' in first and first['label'] is not None:
label = first['label'].item() if isinstance(
first['label'], torch.Tensor) else first['label']
dtype = torch.long if isinstance(label, int) else torch.float
batch['labels'] = torch.tensor([f['label'] for f in features],
dtype=dtype)
elif 'label_ids' in first and first['label_ids'] is not None:
if isinstance(first['label_ids'], torch.Tensor):
batch['labels'] = torch.stack(
[f['label_ids'] for f in features])
else:
dtype = torch.long if type(
first['label_ids'][0]) is int else torch.float
batch['labels'] = torch.tensor(
[f['label_ids'] for f in features], dtype=dtype)

# Handling of all other possible keys.
# Again, we will use the first element to figure out which key/values are not None for this model.
for k, v in first.items():
if k not in ('label', 'label_ids'
) and v is not None and not isinstance(v, str):
if isinstance(v, torch.Tensor):
batch[k] = torch.stack([f[k] for f in features])
else:
batch[k] = torch.tensor([f[k] for f in features])
elif isinstance(first, tuple):
batch = []
for idx in range(len(first)):
if isinstance(first[idx], torch.Tensor):
batch.append(torch.stack([f[k] for f in features]))
else:
batch.append(torch.tensor([f[k] for f in features]))
else:
if isinstance(first, torch.Tensor):
batch = torch.stack(features)
else:
batch = torch.tensor(features)

return batch

+ 127
- 0
modelscope/utils/torch_utils.py View File

@@ -0,0 +1,127 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Following code is partialy borrowed from openmmlab/mmcv

import functools
import os
import socket
import subprocess
from collections import OrderedDict
from typing import Callable, List, Optional, Tuple

import torch
import torch.multiprocessing as mp
from torch import distributed as dist
from torch._utils import (_flatten_dense_tensors, _take_tensors,
_unflatten_dense_tensors)


def _find_free_port() -> str:
# Copied from https://github.com/facebookresearch/detectron2/blob/main/detectron2/engine/launch.py # noqa: E501
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# Binding to port 0 will cause the OS to find an available port for us
sock.bind(('', 0))
port = sock.getsockname()[1]
sock.close()
# NOTE: there is still a chance the port could be taken by other processes.
return port


def _is_free_port(port: int) -> bool:
ips = socket.gethostbyname_ex(socket.gethostname())[-1]
ips.append('localhost')
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return all(s.connect_ex((ip, port)) != 0 for ip in ips)


def init_dist(launcher: str, backend: str = 'nccl', **kwargs) -> None:
if mp.get_start_method(allow_none=True) is None:
mp.set_start_method('spawn')
if launcher == 'pytorch':
_init_dist_pytorch(backend, **kwargs)
elif launcher == 'mpi':
_init_dist_mpi(backend, **kwargs)
elif launcher == 'slurm':
_init_dist_slurm(backend, **kwargs)
else:
raise ValueError(f'Invalid launcher type: {launcher}')


def _init_dist_pytorch(backend: str, **kwargs) -> None:
# rank = int(os.environ['RANK'])
local_rank = int(os.environ['LOCAL_RANK'])

torch.cuda.set_device(local_rank)
dist.init_process_group(backend=backend, **kwargs)


def _init_dist_mpi(backend: str, **kwargs) -> None:
local_rank = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK'])
torch.cuda.set_device(local_rank)
if 'MASTER_PORT' not in os.environ:
# 29500 is torch.distributed default port
os.environ['MASTER_PORT'] = '29500'
if 'MASTER_ADDR' not in os.environ:
raise KeyError('The environment variable MASTER_ADDR is not set')
os.environ['WORLD_SIZE'] = os.environ['OMPI_COMM_WORLD_SIZE']
os.environ['RANK'] = os.environ['OMPI_COMM_WORLD_RANK']
dist.init_process_group(backend=backend, **kwargs)


def _init_dist_slurm(backend: str, port: Optional[int] = None) -> None:
"""Initialize slurm distributed training environment.

If argument ``port`` is not specified, then the master port will be system
environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
environment variable, then a default port ``29500`` will be used.

Args:
backend (str): Backend of torch.distributed.
port (int, optional): Master port. Defaults to None.
"""
proc_id = int(os.environ['SLURM_PROCID'])
ntasks = int(os.environ['SLURM_NTASKS'])
node_list = os.environ['SLURM_NODELIST']
num_gpus = torch.cuda.device_count()
torch.cuda.set_device(proc_id % num_gpus)
addr = subprocess.getoutput(
f'scontrol show hostname {node_list} | head -n1')
# specify master port
if port is not None:
os.environ['MASTER_PORT'] = str(port)
elif 'MASTER_PORT' in os.environ:
pass # use MASTER_PORT in the environment variable
else:
# if torch.distributed default port(29500) is available
# then use it, else find a free port
if _is_free_port(29500):
os.environ['MASTER_PORT'] = '29500'
else:
os.environ['MASTER_PORT'] = str(_find_free_port())
# use MASTER_ADDR in the environment variable if it already exists
if 'MASTER_ADDR' not in os.environ:
os.environ['MASTER_ADDR'] = addr
os.environ['WORLD_SIZE'] = str(ntasks)
os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
os.environ['RANK'] = str(proc_id)
dist.init_process_group(backend=backend)


def get_dist_info() -> Tuple[int, int]:
if dist.is_available() and dist.is_initialized():
rank = dist.get_rank()
world_size = dist.get_world_size()
else:
rank = 0
world_size = 1
return rank, world_size


def master_only(func: Callable) -> Callable:

@functools.wraps(func)
def wrapper(*args, **kwargs):
rank, _ = get_dist_info()
if rank == 0:
return func(*args, **kwargs)

return wrapper

+ 0
- 3
output.wav View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b4153d9ffc0b72eeaf162b5c9f4426f95dcea2bb0da9e7b5e1b72fd2643b1915
size 50444

+ 0
- 0
tests/models/__init__.py View File


+ 60
- 0
tests/models/test_base_torch.py View File

@@ -0,0 +1,60 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import unittest

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from modelscope.models.base_torch import TorchModel


class TorchBaseTest(unittest.TestCase):

def test_custom_model(self):

class MyTorchModel(TorchModel):

def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 20, 5)
self.conv2 = nn.Conv2d(20, 20, 5)

def forward(self, x):
x = F.relu(self.conv1(x))
return F.relu(self.conv2(x))

model = MyTorchModel()
model.train()
model.eval()
out = model.forward(torch.rand(1, 1, 10, 10))
self.assertEqual((1, 20, 2, 2), out.shape)

def test_custom_model_with_postprocess(self):
add_bias = 200

class MyTorchModel(TorchModel):

def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 20, 5)
self.conv2 = nn.Conv2d(20, 20, 5)

def forward(self, x):
x = F.relu(self.conv1(x))
return F.relu(self.conv2(x))

def postprocess(self, x):
return x + add_bias

model = MyTorchModel()
model.train()
model.eval()
out = model(torch.rand(1, 1, 10, 10))
self.assertEqual((1, 20, 2, 2), out.shape)
self.assertTrue(np.all(out.detach().numpy() > (add_bias - 10)))


if __name__ == '__main__':
unittest.main()

+ 1
- 1
tests/pipelines/test_base.py View File

@@ -6,9 +6,9 @@ from typing import Any, Dict, List, Tuple, Union
import numpy as np import numpy as np
import PIL import PIL


from modelscope.outputs import OutputKeys
from modelscope.pipelines import Pipeline, pipeline from modelscope.pipelines import Pipeline, pipeline
from modelscope.pipelines.builder import PIPELINES, add_default_pipeline_info from modelscope.pipelines.builder import PIPELINES, add_default_pipeline_info
from modelscope.pipelines.outputs import OutputKeys
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from modelscope.utils.registry import default_group from modelscope.utils.registry import default_group


+ 1
- 1
tests/pipelines/test_image_captioning.py View File

@@ -2,8 +2,8 @@


import unittest import unittest


from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline from modelscope.pipelines import pipeline
from modelscope.pipelines.outputs import OutputKeys
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level from modelscope.utils.test_utils import test_level




+ 1
- 1
tests/pipelines/test_image_matting.py View File

@@ -7,8 +7,8 @@ import cv2


from modelscope.fileio import File from modelscope.fileio import File
from modelscope.msdatasets import MsDataset from modelscope.msdatasets import MsDataset
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline from modelscope.pipelines import pipeline
from modelscope.pipelines.outputs import OutputKeys
from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.test_utils import test_level from modelscope.utils.test_utils import test_level




+ 1
- 1
tests/pipelines/test_person_image_cartoon.py View File

@@ -5,9 +5,9 @@ import unittest


import cv2 import cv2


from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline from modelscope.pipelines import pipeline
from modelscope.pipelines.base import Pipeline from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.outputs import OutputKeys
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level from modelscope.utils.test_utils import test_level




+ 1
- 1
tests/pipelines/test_text_to_image_synthesis.py View File

@@ -5,8 +5,8 @@ import unittest
import numpy as np import numpy as np


from modelscope.models import Model from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline from modelscope.pipelines import pipeline
from modelscope.pipelines.outputs import OutputKeys
from modelscope.utils.constant import Tasks from modelscope.utils.constant import Tasks
from modelscope.utils.test_utils import test_level from modelscope.utils.test_utils import test_level




+ 1
- 1
tests/pipelines/test_text_to_speech.py View File

@@ -9,8 +9,8 @@ from scipy.io.wavfile import write


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
from modelscope.models import Model from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline from modelscope.pipelines import pipeline
from modelscope.pipelines.outputs import OutputKeys
from modelscope.utils.constant import Fields, Tasks from modelscope.utils.constant import Fields, Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level from modelscope.utils.test_utils import test_level


+ 0
- 0
tests/trainers/hooks/__init__.py View File


+ 108
- 0
tests/trainers/hooks/test_checkpoint_hook.py View File

@@ -0,0 +1,108 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
import tempfile
import unittest
from abc import ABCMeta

import json
import torch
from torch import nn
from torch.utils.data import Dataset

from modelscope.trainers import build_trainer
from modelscope.utils.constant import ModelFile


class DummyDataset(Dataset, metaclass=ABCMeta):

def __len__(self):
return 20

def __getitem__(self, idx):
return dict(feat=torch.rand((5, )), label=torch.randint(0, 4, (1, )))


class DummyModel(nn.Module):

def __init__(self):
super().__init__()
self.linear = nn.Linear(5, 4)
self.bn = nn.BatchNorm1d(4)

def forward(self, feat, labels):
x = self.linear(feat)

x = self.bn(x)
loss = torch.sum(x)
return dict(logits=x, loss=loss)


class CheckpointHookTest(unittest.TestCase):

def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.tmp_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(self.tmp_dir):
os.makedirs(self.tmp_dir)

def tearDown(self):
super().tearDown()
shutil.rmtree(self.tmp_dir)

def test_checkpoint_hook(self):
json_cfg = {
'task': 'image_classification',
'train': {
'work_dir': self.tmp_dir,
'dataloader': {
'batch_size_per_gpu': 2,
'workers_per_gpu': 1
},
'optimizer': {
'type': 'SGD',
'lr': 0.01,
'options': {
'grad_clip': {
'max_norm': 2.0
}
}
},
'lr_scheduler': {
'type': 'StepLR',
'step_size': 2,
'options': {
'warmup': {
'type': 'LinearWarmup',
'warmup_iters': 2
}
}
},
'hooks': [{
'type': 'CheckpointHook',
'interval': 1
}]
}
}

config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

trainer_name = 'EpochBasedTrainer'
kwargs = dict(
cfg_file=config_path,
model=DummyModel(),
data_collator=None,
train_dataset=DummyDataset(),
max_epochs=2)

trainer = build_trainer(trainer_name, kwargs)
trainer.train()
results_files = os.listdir(self.tmp_dir)
self.assertIn('epoch_1.pth', results_files)
self.assertIn('epoch_2.pth', results_files)


if __name__ == '__main__':
unittest.main()

+ 188
- 0
tests/trainers/hooks/test_lr_scheduler_hook.py View File

@@ -0,0 +1,188 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
import tempfile
import unittest
from abc import ABCMeta

import json
import torch
from torch import nn
from torch.optim import SGD
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.data import Dataset

from modelscope.trainers import build_trainer
from modelscope.utils.constant import ModelFile


class DummyDataset(Dataset, metaclass=ABCMeta):
"""Base Dataset
"""

def __len__(self):
return 10

def __getitem__(self, idx):
return dict(feat=torch.rand((5, )), label=torch.randint(0, 4, (1, )))


class DummyModel(nn.Module):

def __init__(self):
super().__init__()
self.linear = nn.Linear(5, 4)
self.bn = nn.BatchNorm1d(4)

def forward(self, feat, labels):
x = self.linear(feat)

x = self.bn(x)
loss = torch.sum(x)
return dict(logits=x, loss=loss)


class LrSchedulerHookTest(unittest.TestCase):

def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.tmp_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(self.tmp_dir):
os.makedirs(self.tmp_dir)

def tearDown(self):
super().tearDown()
shutil.rmtree(self.tmp_dir)

def test_lr_scheduler_hook(self):
json_cfg = {
'task': 'image_classification',
'train': {
'work_dir': self.tmp_dir,
'dataloader': {
'batch_size_per_gpu': 2,
'workers_per_gpu': 1
}
}
}

config_path = os.path.join(self.tmp_dir, 'config.json')
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

model = DummyModel()
optimizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4])
trainer_name = 'EpochBasedTrainer'
kwargs = dict(
cfg_file=config_path,
model=model,
train_dataset=DummyDataset(),
optimizers=(optimizer, lr_scheduler),
max_epochs=5)

trainer = build_trainer(trainer_name, kwargs)
train_dataloader = trainer._build_dataloader_with_dataset(
trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
trainer.register_optimizers_hook()

trainer.invoke_hook('before_run')
log_lrs = []
optim_lrs = []
for _ in range(trainer._epoch, trainer._max_epochs):
trainer.invoke_hook('before_train_epoch')
for _, data_batch in enumerate(train_dataloader):
trainer.invoke_hook('before_train_iter')

log_lrs.append(trainer.log_buffer.output['lr'])
optim_lrs.append(optimizer.param_groups[0]['lr'])

trainer.train_step(trainer.model, data_batch)
trainer.invoke_hook('after_train_iter')

trainer.invoke_hook('after_train_epoch')
trainer._epoch += 1
trainer.invoke_hook('after_run')

iters = 5
target_lrs = [0.01] * iters * 1 + [0.001] * iters * 2 + [0.0001
] * iters * 2

self.assertListEqual(log_lrs, target_lrs)
self.assertListEqual(optim_lrs, target_lrs)

def test_warmup_lr_scheduler_hook(self):
json_cfg = {
'task': 'image_classification',
'train': {
'work_dir': self.tmp_dir,
'dataloader': {
'batch_size_per_gpu': 2,
'workers_per_gpu': 1
},
'optimizer': {
'type': 'SGD',
'lr': 0.01
},
'lr_scheduler': {
'type': 'MultiStepLR',
'milestones': [4, 6],
'options': {
'warmup': {
'type': 'LinearWarmup',
'warmup_iters': 3
}
}
}
}
}

config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

model = DummyModel()
# optimmizer = SGD(model.parameters(), lr=0.01)
# lr_scheduler = MultiStepLR(optimmizer, milestones=[2, 4])
trainer_name = 'EpochBasedTrainer'
kwargs = dict(
cfg_file=config_path,
model=model,
train_dataset=DummyDataset(),
# optimizers=(optimmizer, lr_scheduler),
max_epochs=7)

trainer = build_trainer(trainer_name, kwargs)
train_dataloader = trainer._build_dataloader_with_dataset(
trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
trainer.register_optimizers_hook()

trainer.invoke_hook('before_run')
log_lrs = []
optim_lrs = []
for _ in range(trainer._epoch, trainer._max_epochs):
trainer.invoke_hook('before_train_epoch')
for _, data_batch in enumerate(train_dataloader):
trainer.invoke_hook('before_train_iter')

log_lrs.append(round(trainer.log_buffer.output['lr'], 5))
optim_lrs.append(
round(trainer.optimizer.param_groups[0]['lr'], 5))

trainer.train_step(trainer.model, data_batch)
trainer.invoke_hook('after_train_iter')

trainer.invoke_hook('after_train_epoch')
trainer.invoke_hook('after_run')

iters = 5
target_lrs = [0.004] * iters * 1 + [0.007] * iters * 1 + [
0.01
] * iters * 1 + [0.001] * iters * 2 + [0.0001] * iters * 2

self.assertListEqual(log_lrs, target_lrs)
self.assertListEqual(optim_lrs, target_lrs)


if __name__ == '__main__':
unittest.main()

+ 128
- 0
tests/trainers/hooks/test_timer_hook.py View File

@@ -0,0 +1,128 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
import tempfile
import unittest
from abc import ABCMeta

import json
import torch
from torch import nn
from torch.optim import SGD
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.data import Dataset

from modelscope.trainers import build_trainer
from modelscope.utils.constant import ModelFile


class DummyDataset(Dataset, metaclass=ABCMeta):
"""Base Dataset
"""

def __len__(self):
return 10

def __getitem__(self, idx):
return dict(feat=torch.rand((5, )), label=torch.randint(0, 4, (1, )))


class DummyModel(nn.Module):

def __init__(self):
super().__init__()
self.linear = nn.Linear(5, 4)
self.bn = nn.BatchNorm1d(4)

def forward(self, feat, labels):
x = self.linear(feat)

x = self.bn(x)
loss = torch.sum(x)
return dict(logits=x, loss=loss)


class IterTimerHookTest(unittest.TestCase):

def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.tmp_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(self.tmp_dir):
os.makedirs(self.tmp_dir)

def tearDown(self):
super().tearDown()
shutil.rmtree(self.tmp_dir)

def test_iter_time_hook(self):
json_cfg = {
'task': 'image_classification',
'train': {
'work_dir': self.tmp_dir,
'dataloader': {
'batch_size_per_gpu': 2,
'workers_per_gpu': 1
},
'hooks': [{
'type': 'IterTimerHook',
}]
}
}

config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

model = DummyModel()
optimizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = MultiStepLR(optimizer, milestones=[2, 4])
trainer_name = 'EpochBasedTrainer'
kwargs = dict(
cfg_file=config_path,
model=model,
train_dataset=DummyDataset(),
optimizers=(optimizer, lr_scheduler),
max_epochs=5)

trainer = build_trainer(trainer_name, kwargs)
train_dataloader = trainer._build_dataloader_with_dataset(
trainer.train_dataset, **trainer.cfg.train.get('dataloader', {}))
trainer.register_optimizers_hook()
trainer.register_hook_from_cfg(trainer.cfg.train.hooks)

trainer.invoke_hook('before_run')
for i in range(trainer._epoch, trainer._max_epochs):
trainer.invoke_hook('before_train_epoch')
for _, data_batch in enumerate(train_dataloader):
trainer.invoke_hook('before_train_iter')
trainer.train_step(trainer.model, data_batch)
trainer.invoke_hook('after_train_iter')

self.assertIn('data_load_time', trainer.log_buffer.val_history)
self.assertIn('time', trainer.log_buffer.val_history)
self.assertIn('loss', trainer.log_buffer.val_history)

trainer.invoke_hook('after_train_epoch')

target_len = 5 * (i + 1)
self.assertEqual(
len(trainer.log_buffer.val_history['data_load_time']),
target_len)
self.assertEqual(
len(trainer.log_buffer.val_history['time']), target_len)
self.assertEqual(
len(trainer.log_buffer.val_history['loss']), target_len)

self.assertEqual(
len(trainer.log_buffer.n_history['data_load_time']),
target_len)
self.assertEqual(
len(trainer.log_buffer.n_history['time']), target_len)
self.assertEqual(
len(trainer.log_buffer.n_history['loss']), target_len)

trainer.invoke_hook('after_run')


if __name__ == '__main__':
unittest.main()

+ 0
- 0
tests/trainers/lrscheduler/__init__.py View File


+ 0
- 0
tests/trainers/lrscheduler/warmup/__init__.py View File


+ 79
- 0
tests/trainers/lrscheduler/warmup/test_warmup_base.py View File

@@ -0,0 +1,79 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest

import torch
from torch import nn
from torch.optim.lr_scheduler import MultiStepLR


class WarmupTest(unittest.TestCase):

def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))

def test_constant_warmup(self):
from modelscope.trainers.lrscheduler.warmup import ConstantWarmup

net = nn.Linear(2, 2)
base_lr = 0.02
warmup_iters = 3
warmup_ratio = 0.2
optimizer = torch.optim.SGD(net.parameters(), lr=base_lr, momentum=0.9)
lr_scheduler = MultiStepLR(optimizer, milestones=[7, 9])
lr_scheduler_with_warmup = ConstantWarmup(
lr_scheduler, warmup_iters=warmup_iters, warmup_ratio=warmup_ratio)

res = []
for _ in range(10):
lr_scheduler_with_warmup.step()
for _, group in enumerate(optimizer.param_groups):
res.append(group['lr'])

base_lrs = [0.02, 0.02, 0.02, 0.002, 0.002, 0.0002, 0.0002]
self.assertListEqual(res, [0.004, 0.004, 0.02] + base_lrs)

def test_linear_warmup(self):
from modelscope.trainers.lrscheduler.warmup import LinearWarmup

net = nn.Linear(2, 2)
base_lr = 0.02
warmup_iters = 3
warmup_ratio = 0.1
optimizer = torch.optim.SGD(net.parameters(), lr=base_lr, momentum=0.9)
lr_scheduler = MultiStepLR(optimizer, milestones=[7, 9])
lr_scheduler_with_warmup = LinearWarmup(
lr_scheduler, warmup_iters=warmup_iters, warmup_ratio=warmup_ratio)

res = []
for _ in range(10):
lr_scheduler_with_warmup.step()
for _, group in enumerate(optimizer.param_groups):
res.append(round(group['lr'], 5))

base_lrs = [0.02, 0.02, 0.02, 0.002, 0.002, 0.0002, 0.0002]
self.assertListEqual(res, [0.0080, 0.0140, 0.02] + base_lrs)

def test_exp_warmup(self):
from modelscope.trainers.lrscheduler.warmup import ExponentialWarmup

net = nn.Linear(2, 2)
base_lr = 0.02
warmup_iters = 3
warmup_ratio = 0.1
optimizer = torch.optim.SGD(net.parameters(), lr=base_lr, momentum=0.9)
lr_scheduler = MultiStepLR(optimizer, milestones=[7, 9])
lr_scheduler_with_warmup = ExponentialWarmup(
lr_scheduler, warmup_iters=warmup_iters, warmup_ratio=warmup_ratio)

res = []
for _ in range(10):
lr_scheduler_with_warmup.step()
for _, group in enumerate(optimizer.param_groups):
res.append(round(group['lr'], 5))

base_lrs = [0.02, 0.02, 0.02, 0.002, 0.002, 0.0002, 0.0002]
self.assertListEqual(res, [0.00431, 0.00928, 0.02] + base_lrs)


if __name__ == '__main__':
unittest.main()

+ 209
- 0
tests/trainers/test_trainer.py View File

@@ -0,0 +1,209 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
import tempfile
import unittest
from abc import ABCMeta

import json
import torch
from torch import nn
from torch.optim import SGD
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Dataset

from modelscope.trainers import build_trainer
from modelscope.utils.constant import ModelFile
from modelscope.utils.test_utils import test_level


class DummyMetric:

def __call__(self, ground_truth, predict_results):
return {'accuracy': 0.5}


class DummyDataset(Dataset, metaclass=ABCMeta):
"""Base Dataset
"""

def __len__(self):
return 20

def __getitem__(self, idx):
return dict(feat=torch.rand((5, )), label=torch.randint(0, 4, (1, )))


class DummyModel(nn.Module):

def __init__(self):
super().__init__()
self.linear = nn.Linear(5, 4)
self.bn = nn.BatchNorm1d(4)

def forward(self, feat, labels):
x = self.linear(feat)

x = self.bn(x)
loss = torch.sum(x)
return dict(logits=x, loss=loss)


class TrainerTest(unittest.TestCase):

def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.tmp_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(self.tmp_dir):
os.makedirs(self.tmp_dir)

def tearDown(self):
super().tearDown()
shutil.rmtree(self.tmp_dir)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_train_0(self):
json_cfg = {
'train': {
'work_dir':
self.tmp_dir,
'dataloader': {
'batch_size_per_gpu': 2,
'workers_per_gpu': 1
},
'optimizer': {
'type': 'SGD',
'lr': 0.01,
'options': {
'grad_clip': {
'max_norm': 2.0
}
}
},
'lr_scheduler': {
'type': 'StepLR',
'step_size': 2,
'options': {
'warmup': {
'type': 'LinearWarmup',
'warmup_iters': 2
}
}
},
'hooks': [{
'type': 'CheckpointHook',
'interval': 1
}, {
'type': 'TextLoggerHook',
'interval': 1
}, {
'type': 'IterTimerHook'
}, {
'type': 'EvaluationHook',
'interval': 1
}]
},
'evaluation': {
'dataloader': {
'batch_size_per_gpu': 2,
'workers_per_gpu': 1,
'shuffle': False
},
'metrics': ['seq_cls_metric']
}
}
config_path = os.path.join(self.tmp_dir, ModelFile.CONFIGURATION)
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

trainer_name = 'EpochBasedTrainer'
kwargs = dict(
cfg_file=config_path,
model=DummyModel(),
data_collator=None,
train_dataset=DummyDataset(),
eval_dataset=DummyDataset(),
max_epochs=3)

trainer = build_trainer(trainer_name, kwargs)
trainer.train()
results_files = os.listdir(self.tmp_dir)

self.assertIn(f'{trainer.timestamp}.log.json', results_files)
self.assertIn('epoch_1.pth', results_files)
self.assertIn('epoch_2.pth', results_files)
self.assertIn('epoch_3.pth', results_files)

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_train_1(self):
json_cfg = {
'train': {
'work_dir':
self.tmp_dir,
'dataloader': {
'batch_size_per_gpu': 2,
'workers_per_gpu': 1
},
'hooks': [{
'type': 'CheckpointHook',
'interval': 1
}, {
'type': 'TextLoggerHook',
'interval': 1
}, {
'type': 'IterTimerHook'
}, {
'type': 'EvaluationHook',
'interval': 1
}]
},
'evaluation': {
'dataloader': {
'batch_size_per_gpu': 2,
'workers_per_gpu': 1,
'shuffle': False
},
'metrics': ['seq_cls_metric']
}
}

config_path = os.path.join(self.tmp_dir, 'config.json')
with open(config_path, 'w') as f:
json.dump(json_cfg, f)

model = DummyModel()
optimmizer = SGD(model.parameters(), lr=0.01)
lr_scheduler = StepLR(optimmizer, 2)
trainer_name = 'EpochBasedTrainer'
kwargs = dict(
cfg_file=config_path,
model=model,
data_collator=None,
train_dataset=DummyDataset(),
eval_dataset=DummyDataset(),
optimizers=(optimmizer, lr_scheduler),
max_epochs=3)

trainer = build_trainer(trainer_name, kwargs)
trainer.train()
results_files = os.listdir(self.tmp_dir)

self.assertIn(f'{trainer.timestamp}.log.json', results_files)
self.assertIn('epoch_1.pth', results_files)
self.assertIn('epoch_2.pth', results_files)
self.assertIn('epoch_3.pth', results_files)


class DummyTrainerTest(unittest.TestCase):

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_dummy(self):
default_args = dict(cfg_file='configs/examples/train.json')
trainer = build_trainer('dummy', default_args)

trainer.train()
trainer.evaluate()


if __name__ == '__main__':
unittest.main()

+ 0
- 19
tests/trainers/test_trainer_base.py View File

@@ -1,19 +0,0 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import unittest

from modelscope.trainers import build_trainer


class DummyTrainerTest(unittest.TestCase):

def test_dummy(self):
default_args = dict(cfg_file='configs/examples/train.json')
trainer = build_trainer('dummy', default_args)

trainer.train()
trainer.evaluate()


if __name__ == '__main__':
unittest.main()

+ 91
- 0
tests/trainers/test_trainer_with_nlp.py View File

@@ -0,0 +1,91 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import shutil
import tempfile
import unittest

from modelscope.hub.snapshot_download import snapshot_download
from modelscope.models.nlp.sbert_for_sequence_classification import \
SbertTextClassfier
from modelscope.msdatasets import MsDataset
from modelscope.trainers import build_trainer
from modelscope.utils.constant import ModelFile
from modelscope.utils.test_utils import test_level


class TestTrainerWithNlp(unittest.TestCase):

def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.tmp_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(self.tmp_dir):
os.makedirs(self.tmp_dir)

from datasets import Dataset
dataset_dict = {
'sentence1': [
'This is test sentence1-1', 'This is test sentence2-1',
'This is test sentence3-1'
],
'sentence2': [
'This is test sentence1-2', 'This is test sentence2-2',
'This is test sentence3-2'
],
'label': [0, 1, 1]
}
dataset = Dataset.from_dict(dataset_dict)

class MsDatasetDummy(MsDataset):

def __len__(self):
return len(self._hf_ds)

self.dataset = MsDatasetDummy(dataset)

def tearDown(self):
shutil.rmtree(self.tmp_dir)
super().tearDown()

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_trainer(self):
model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base'
kwargs = dict(
model=model_id,
train_dataset=self.dataset,
eval_dataset=self.dataset,
work_dir=self.tmp_dir)

trainer = build_trainer(default_args=kwargs)
trainer.train()
results_files = os.listdir(self.tmp_dir)
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
for i in range(10):
self.assertIn(f'epoch_{i+1}.pth', results_files)

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_trainer_with_model_and_args(self):
tmp_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)

model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base'
cache_path = snapshot_download(model_id)
model = SbertTextClassfier.from_pretrained(cache_path)
kwargs = dict(
cfg_file=os.path.join(cache_path, ModelFile.CONFIGURATION),
model=model,
train_dataset=self.dataset,
eval_dataset=self.dataset,
max_epochs=2,
work_dir=self.tmp_dir)

trainer = build_trainer(default_args=kwargs)
trainer.train()
results_files = os.listdir(self.tmp_dir)
self.assertIn(f'{trainer.timestamp}.log.json', results_files)
for i in range(2):
self.assertIn(f'epoch_{i+1}.pth', results_files)


if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save