Browse Source

[to #42322933] make decord and tf.contrib lazy load and clean import

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9409213
master
yingda.chen 3 years ago
parent
commit
a68e3e526a
10 changed files with 32 additions and 52 deletions
  1. +1
    -3
      modelscope/metrics/text_generation_metric.py
  2. +2
    -3
      modelscope/models/audio/tts/models/am_models.py
  3. +3
    -4
      modelscope/models/audio/tts/models/helpers.py
  4. +11
    -14
      modelscope/models/audio/tts/models/rnn_wrappers.py
  5. +9
    -9
      modelscope/models/audio/tts/models/robutrans.py
  6. +2
    -6
      modelscope/models/audio/tts/sambert_hifi.py
  7. +1
    -1
      modelscope/pipelines/base.py
  8. +2
    -3
      modelscope/pipelines/cv/cmdssl_video_embedding_pipleline.py
  9. +0
    -5
      modelscope/pipelines/cv/ocr_detection_pipeline.py
  10. +1
    -4
      modelscope/preprocessors/video.py

+ 1
- 3
modelscope/metrics/text_generation_metric.py View File

@@ -1,8 +1,5 @@
from typing import Dict from typing import Dict


import numpy as np
from rouge_score import rouge_scorer

from ..metainfo import Metrics from ..metainfo import Metrics
from ..utils.registry import default_group from ..utils.registry import default_group
from .base import Metric from .base import Metric
@@ -18,6 +15,7 @@ class TextGenerationMetric(Metric):
def __init__(self): def __init__(self):
self.preds = [] self.preds = []
self.tgts = [] self.tgts = []
from rouge_score import rouge_scorer
self.scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) self.scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)


def add(self, outputs: Dict, inputs: Dict): def add(self, outputs: Dict, inputs: Dict):


+ 2
- 3
modelscope/models/audio/tts/models/am_models.py View File

@@ -1,7 +1,4 @@
import tensorflow as tf import tensorflow as tf
from tensorflow.contrib.cudnn_rnn import CudnnLSTM
from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
from tensorflow.contrib.rnn import LSTMBlockCell




def encoder_prenet(inputs, def encoder_prenet(inputs,
@@ -207,6 +204,7 @@ def conv_and_lstm(inputs,
embedded_inputs_speaker, embedded_inputs_speaker,
mask=None, mask=None,
scope='conv_and_lstm'): scope='conv_and_lstm'):
from tensorflow.contrib.rnn import LSTMBlockCell
x = inputs x = inputs
with tf.variable_scope(scope): with tf.variable_scope(scope):
for i in range(n_conv_layers): for i in range(n_conv_layers):
@@ -244,6 +242,7 @@ def conv_and_lstm_dec(inputs,
mask=None, mask=None,
scope='conv_and_lstm'): scope='conv_and_lstm'):
x = inputs x = inputs
from tensorflow.contrib.rnn import LSTMBlockCell
with tf.variable_scope(scope): with tf.variable_scope(scope):
for i in range(n_conv_layers): for i in range(n_conv_layers):
x = conv1d( x = conv1d(


+ 3
- 4
modelscope/models/audio/tts/models/helpers.py View File

@@ -1,9 +1,8 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.contrib.seq2seq import Helper




class VarTestHelper(Helper):
class VarTestHelper(tf.contrib.seq2seq.Helper):


def __init__(self, batch_size, inputs, dim): def __init__(self, batch_size, inputs, dim):
with tf.name_scope('VarTestHelper'): with tf.name_scope('VarTestHelper'):
@@ -44,7 +43,7 @@ class VarTestHelper(Helper):
return (finished, next_inputs, state) return (finished, next_inputs, state)




class VarTrainingHelper(Helper):
class VarTrainingHelper(tf.contrib.seq2seq.Helper):


def __init__(self, targets, inputs, dim): def __init__(self, targets, inputs, dim):
with tf.name_scope('VarTrainingHelper'): with tf.name_scope('VarTrainingHelper'):
@@ -86,7 +85,7 @@ class VarTrainingHelper(Helper):
return (finished, next_inputs, state) return (finished, next_inputs, state)




class VarTrainingSSHelper(Helper):
class VarTrainingSSHelper(tf.contrib.seq2seq.Helper):


def __init__(self, targets, inputs, dim, global_step, schedule_begin, def __init__(self, targets, inputs, dim, global_step, schedule_begin,
alpha, decay_steps): alpha, decay_steps):


+ 11
- 14
modelscope/models/audio/tts/models/rnn_wrappers.py View File

@@ -1,14 +1,11 @@
import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.contrib.rnn import RNNCell
from tensorflow.contrib.seq2seq import AttentionWrapperState
from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import rnn_cell_impl


from .am_models import prenet from .am_models import prenet




class VarPredictorCell(RNNCell):
'''Wrapper wrapper knock knock.'''
class VarPredictorCell(tf.contrib.rnn.RNNCell):
"""Wrapper wrapper knock knock."""


def __init__(self, var_predictor_cell, is_training, dim, prenet_units): def __init__(self, var_predictor_cell, is_training, dim, prenet_units):
super(VarPredictorCell, self).__init__() super(VarPredictorCell, self).__init__()
@@ -33,7 +30,7 @@ class VarPredictorCell(RNNCell):
]) ])


def call(self, inputs, state): def call(self, inputs, state):
'''Run the Tacotron2 super decoder cell.'''
"""Run the Tacotron2 super decoder cell."""
super_cell_out, decoder_state = state super_cell_out, decoder_state = state


# split # split
@@ -61,8 +58,8 @@ class VarPredictorCell(RNNCell):
return new_super_cell_out, new_states return new_super_cell_out, new_states




class DurPredictorCell(RNNCell):
'''Wrapper wrapper knock knock.'''
class DurPredictorCell(tf.contrib.rnn.RNNCell):
"""Wrapper wrapper knock knock."""


def __init__(self, var_predictor_cell, is_training, dim, prenet_units): def __init__(self, var_predictor_cell, is_training, dim, prenet_units):
super(DurPredictorCell, self).__init__() super(DurPredictorCell, self).__init__()
@@ -87,7 +84,7 @@ class DurPredictorCell(RNNCell):
]) ])


def call(self, inputs, state): def call(self, inputs, state):
'''Run the Tacotron2 super decoder cell.'''
"""Run the Tacotron2 super decoder cell."""
super_cell_out, decoder_state = state super_cell_out, decoder_state = state


# split # split
@@ -117,8 +114,8 @@ class DurPredictorCell(RNNCell):
return new_super_cell_out, new_states return new_super_cell_out, new_states




class DurPredictorCECell(RNNCell):
'''Wrapper wrapper knock knock.'''
class DurPredictorCECell(tf.contrib.rnn.RNNCell):
"""Wrapper wrapper knock knock."""


def __init__(self, var_predictor_cell, is_training, dim, prenet_units, def __init__(self, var_predictor_cell, is_training, dim, prenet_units,
max_dur, dur_embedding_dim): max_dur, dur_embedding_dim):
@@ -146,7 +143,7 @@ class DurPredictorCECell(RNNCell):
]) ])


def call(self, inputs, state): def call(self, inputs, state):
'''Run the Tacotron2 super decoder cell.'''
"""Run the Tacotron2 super decoder cell."""
super_cell_out, decoder_state = state super_cell_out, decoder_state = state


# split # split
@@ -181,8 +178,8 @@ class DurPredictorCECell(RNNCell):
return new_super_cell_out, new_states return new_super_cell_out, new_states




class VarPredictorCell2(RNNCell):
'''Wrapper wrapper knock knock.'''
class VarPredictorCell2(tf.contrib.rnn.RNNCell):
"""Wrapper wrapper knock knock."""


def __init__(self, var_predictor_cell, is_training, dim, prenet_units): def __init__(self, var_predictor_cell, is_training, dim, prenet_units):
super(VarPredictorCell2, self).__init__() super(VarPredictorCell2, self).__init__()


+ 9
- 9
modelscope/models/audio/tts/models/robutrans.py View File

@@ -1,14 +1,8 @@
import tensorflow as tf import tensorflow as tf
from tensorflow.contrib.rnn import LSTMBlockCell, MultiRNNCell
from tensorflow.contrib.seq2seq import BasicDecoder
from tensorflow.python.ops.ragged.ragged_util import repeat from tensorflow.python.ops.ragged.ragged_util import repeat


from .am_models import conv_prenet, decoder_prenet, encoder_prenet
from .fsmn_encoder import FsmnEncoderV2 from .fsmn_encoder import FsmnEncoderV2
from .helpers import VarTestHelper, VarTrainingHelper
from .position import (BatchSinusodalPositionalEncoding,
SinusodalPositionalEncoding)
from .rnn_wrappers import DurPredictorCell, VarPredictorCell
from .position import BatchSinusodalPositionalEncoding
from .self_attention_decoder import SelfAttentionDecoder from .self_attention_decoder import SelfAttentionDecoder
from .self_attention_encoder import SelfAttentionEncoder from .self_attention_encoder import SelfAttentionEncoder


@@ -32,7 +26,7 @@ class RobuTrans():
duration_scales=None, duration_scales=None,
energy_contours=None, energy_contours=None,
energy_scales=None): energy_scales=None):
'''Initializes the model for inference.
"""Initializes the model for inference.


Sets "mel_outputs", "linear_outputs", "stop_token_outputs", and "alignments" fields. Sets "mel_outputs", "linear_outputs", "stop_token_outputs", and "alignments" fields.


@@ -46,7 +40,10 @@ class RobuTrans():
mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number
of steps in the output time series, M is num_mels, and values are entries in the mel of steps in the output time series, M is num_mels, and values are entries in the mel
spectrogram. Only needed for training. spectrogram. Only needed for training.
'''
"""
from tensorflow.contrib.rnn import LSTMBlockCell, MultiRNNCell
from tensorflow.contrib.seq2seq import BasicDecoder

with tf.variable_scope('inference') as _: with tf.variable_scope('inference') as _:
is_training = mel_targets is not None is_training = mel_targets is not None
batch_size = tf.shape(inputs)[0] batch_size = tf.shape(inputs)[0]
@@ -229,17 +226,20 @@ class RobuTrans():
LSTMBlockCell(hp.predictor_lstm_units), LSTMBlockCell(hp.predictor_lstm_units),
LSTMBlockCell(hp.predictor_lstm_units) LSTMBlockCell(hp.predictor_lstm_units)
], state_is_tuple=True) # yapf:disable ], state_is_tuple=True) # yapf:disable
from .rnn_wrappers import DurPredictorCell
duration_output_cell = DurPredictorCell( duration_output_cell = DurPredictorCell(
duration_predictor_cell, is_training, 1, duration_predictor_cell, is_training, 1,
hp.predictor_prenet_units) hp.predictor_prenet_units)
duration_predictor_init_state = duration_output_cell.zero_state( duration_predictor_init_state = duration_output_cell.zero_state(
batch_size=batch_size, dtype=tf.float32) batch_size=batch_size, dtype=tf.float32)
if is_training: if is_training:
from .helpers import VarTrainingHelper
duration_helper = VarTrainingHelper( duration_helper = VarTrainingHelper(
tf.expand_dims( tf.expand_dims(
tf.log(tf.cast(durations, tf.float32) + 1), tf.log(tf.cast(durations, tf.float32) + 1),
axis=2), dur_inputs, 1) axis=2), dur_inputs, 1)
else: else:
from .helpers import VarTestHelper
duration_helper = VarTestHelper(batch_size, dur_inputs, 1) duration_helper = VarTestHelper(batch_size, dur_inputs, 1)
( (
duration_outputs, _ duration_outputs, _


+ 2
- 6
modelscope/models/audio/tts/sambert_hifi.py View File

@@ -1,14 +1,10 @@
from __future__ import (absolute_import, division, print_function, from __future__ import (absolute_import, division, print_function,
unicode_literals) unicode_literals)
import io
import os import os
import time
import zipfile import zipfile
from typing import Any, Dict, Optional, Union


import json import json
import numpy as np import numpy as np
import torch


from modelscope.metainfo import Models from modelscope.metainfo import Models
from modelscope.models.base import Model from modelscope.models.base import Model
@@ -16,8 +12,8 @@ from modelscope.models.builder import MODELS
from modelscope.utils.audio.tts_exceptions import ( from modelscope.utils.audio.tts_exceptions import (
TtsFrontendInitializeFailedException, TtsFrontendInitializeFailedException,
TtsFrontendLanguageTypeInvalidException, TtsModelConfigurationExcetion, TtsFrontendLanguageTypeInvalidException, TtsModelConfigurationExcetion,
TtsVocoderMelspecShapeMismatchException, TtsVoiceNotExistsException)
from modelscope.utils.constant import ModelFile, Tasks
TtsVoiceNotExistsException)
from modelscope.utils.constant import Tasks
from .voice import Voice from .voice import Voice


import tensorflow as tf # isort:skip import tensorflow as tf # isort:skip


+ 1
- 1
modelscope/pipelines/base.py View File

@@ -23,8 +23,8 @@ logger = get_logger()
class Pipeline(ABC): class Pipeline(ABC):


def initiate_single_model(self, model): def initiate_single_model(self, model):
logger.info(f'initiate model from {model}')
if isinstance(model, str) and is_official_hub_path(model): if isinstance(model, str) and is_official_hub_path(model):
logger.info(f'initiate model from location {model}.')
# expecting model has been prefetched to local cache beforehand # expecting model has been prefetched to local cache beforehand
return Model.from_pretrained( return Model.from_pretrained(
model, model_prefetched=True) if is_model(model) else model model, model_prefetched=True) if is_model(model) else model


+ 2
- 3
modelscope/pipelines/cv/cmdssl_video_embedding_pipleline.py View File

@@ -1,11 +1,9 @@
import os.path as osp import os.path as osp
from typing import Any, Dict from typing import Any, Dict


import decord
import numpy as np import numpy as np
import torch import torch
import torchvision.transforms.functional as TF import torchvision.transforms.functional as TF
from decord import VideoReader, cpu
from PIL import Image from PIL import Image


from modelscope.metainfo import Pipelines from modelscope.metainfo import Pipelines
@@ -49,6 +47,7 @@ class CMDSSLVideoEmbeddingPipeline(Pipeline):
logger.info('load model done') logger.info('load model done')


def preprocess(self, input: Input) -> Dict[str, Any]: def preprocess(self, input: Input) -> Dict[str, Any]:
import decord
decord.bridge.set_bridge('native') decord.bridge.set_bridge('native')


transforms = VCompose([ transforms = VCompose([
@@ -60,7 +59,7 @@ class CMDSSLVideoEmbeddingPipeline(Pipeline):


clip_len = (self.cfg.DATA.video_frames clip_len = (self.cfg.DATA.video_frames
- 1) * self.cfg.DATA.video_stride + 1 - 1) * self.cfg.DATA.video_stride + 1
vr = VideoReader(input, ctx=cpu(0))
vr = decord.VideoReader(input, ctx=decord.cpu(0))
if len(vr) <= clip_len: if len(vr) <= clip_len:
init_frames = np.zeros(self.cfg.DATA.multi_crop, dtype=int) init_frames = np.zeros(self.cfg.DATA.multi_crop, dtype=int)
else: else:


+ 0
- 5
modelscope/pipelines/cv/ocr_detection_pipeline.py View File

@@ -16,11 +16,6 @@ from ..base import Pipeline
from ..builder import PIPELINES from ..builder import PIPELINES
from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils


if tf.__version__ >= '2.0':
import tf_slim as slim
else:
from tensorflow.contrib import slim

if tf.__version__ >= '2.0': if tf.__version__ >= '2.0':
tf = tf.compat.v1 tf = tf.compat.v1
tf.compat.v1.disable_eager_execution() tf.compat.v1.disable_eager_execution()


+ 1
- 4
modelscope/preprocessors/video.py View File

@@ -1,15 +1,11 @@
import math import math
import os
import random import random


import decord
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn
import torch.utils.data import torch.utils.data
import torch.utils.dlpack as dlpack import torch.utils.dlpack as dlpack
import torchvision.transforms._transforms_video as transforms import torchvision.transforms._transforms_video as transforms
from decord import VideoReader
from torchvision.transforms import Compose from torchvision.transforms import Compose




@@ -128,6 +124,7 @@ def _decode_video(cfg, path):
Returns: Returns:
frames (Tensor): video tensor data frames (Tensor): video tensor data
""" """
from decord import VideoReader
vr = VideoReader(path) vr = VideoReader(path)


num_clips_per_video = cfg.TEST.NUM_ENSEMBLE_VIEWS num_clips_per_video = cfg.TEST.NUM_ENSEMBLE_VIEWS


Loading…
Cancel
Save