Browse Source

[to #42322933] make decord and tf.contrib lazy load and clean import

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9409213
master
yingda.chen 3 years ago
parent
commit
a68e3e526a
10 changed files with 32 additions and 52 deletions
  1. +1
    -3
      modelscope/metrics/text_generation_metric.py
  2. +2
    -3
      modelscope/models/audio/tts/models/am_models.py
  3. +3
    -4
      modelscope/models/audio/tts/models/helpers.py
  4. +11
    -14
      modelscope/models/audio/tts/models/rnn_wrappers.py
  5. +9
    -9
      modelscope/models/audio/tts/models/robutrans.py
  6. +2
    -6
      modelscope/models/audio/tts/sambert_hifi.py
  7. +1
    -1
      modelscope/pipelines/base.py
  8. +2
    -3
      modelscope/pipelines/cv/cmdssl_video_embedding_pipleline.py
  9. +0
    -5
      modelscope/pipelines/cv/ocr_detection_pipeline.py
  10. +1
    -4
      modelscope/preprocessors/video.py

+ 1
- 3
modelscope/metrics/text_generation_metric.py View File

@@ -1,8 +1,5 @@
from typing import Dict

import numpy as np
from rouge_score import rouge_scorer

from ..metainfo import Metrics
from ..utils.registry import default_group
from .base import Metric
@@ -18,6 +15,7 @@ class TextGenerationMetric(Metric):
def __init__(self):
self.preds = []
self.tgts = []
from rouge_score import rouge_scorer
self.scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

def add(self, outputs: Dict, inputs: Dict):


+ 2
- 3
modelscope/models/audio/tts/models/am_models.py View File

@@ -1,7 +1,4 @@
import tensorflow as tf
from tensorflow.contrib.cudnn_rnn import CudnnLSTM
from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
from tensorflow.contrib.rnn import LSTMBlockCell


def encoder_prenet(inputs,
@@ -207,6 +204,7 @@ def conv_and_lstm(inputs,
embedded_inputs_speaker,
mask=None,
scope='conv_and_lstm'):
from tensorflow.contrib.rnn import LSTMBlockCell
x = inputs
with tf.variable_scope(scope):
for i in range(n_conv_layers):
@@ -244,6 +242,7 @@ def conv_and_lstm_dec(inputs,
mask=None,
scope='conv_and_lstm'):
x = inputs
from tensorflow.contrib.rnn import LSTMBlockCell
with tf.variable_scope(scope):
for i in range(n_conv_layers):
x = conv1d(


+ 3
- 4
modelscope/models/audio/tts/models/helpers.py View File

@@ -1,9 +1,8 @@
import numpy as np
import tensorflow as tf
from tensorflow.contrib.seq2seq import Helper


class VarTestHelper(Helper):
class VarTestHelper(tf.contrib.seq2seq.Helper):

def __init__(self, batch_size, inputs, dim):
with tf.name_scope('VarTestHelper'):
@@ -44,7 +43,7 @@ class VarTestHelper(Helper):
return (finished, next_inputs, state)


class VarTrainingHelper(Helper):
class VarTrainingHelper(tf.contrib.seq2seq.Helper):

def __init__(self, targets, inputs, dim):
with tf.name_scope('VarTrainingHelper'):
@@ -86,7 +85,7 @@ class VarTrainingHelper(Helper):
return (finished, next_inputs, state)


class VarTrainingSSHelper(Helper):
class VarTrainingSSHelper(tf.contrib.seq2seq.Helper):

def __init__(self, targets, inputs, dim, global_step, schedule_begin,
alpha, decay_steps):


+ 11
- 14
modelscope/models/audio/tts/models/rnn_wrappers.py View File

@@ -1,14 +1,11 @@
import numpy as np
import tensorflow as tf
from tensorflow.contrib.rnn import RNNCell
from tensorflow.contrib.seq2seq import AttentionWrapperState
from tensorflow.python.ops import rnn_cell_impl

from .am_models import prenet


class VarPredictorCell(RNNCell):
'''Wrapper wrapper knock knock.'''
class VarPredictorCell(tf.contrib.rnn.RNNCell):
"""Wrapper wrapper knock knock."""

def __init__(self, var_predictor_cell, is_training, dim, prenet_units):
super(VarPredictorCell, self).__init__()
@@ -33,7 +30,7 @@ class VarPredictorCell(RNNCell):
])

def call(self, inputs, state):
'''Run the Tacotron2 super decoder cell.'''
"""Run the Tacotron2 super decoder cell."""
super_cell_out, decoder_state = state

# split
@@ -61,8 +58,8 @@ class VarPredictorCell(RNNCell):
return new_super_cell_out, new_states


class DurPredictorCell(RNNCell):
'''Wrapper wrapper knock knock.'''
class DurPredictorCell(tf.contrib.rnn.RNNCell):
"""Wrapper wrapper knock knock."""

def __init__(self, var_predictor_cell, is_training, dim, prenet_units):
super(DurPredictorCell, self).__init__()
@@ -87,7 +84,7 @@ class DurPredictorCell(RNNCell):
])

def call(self, inputs, state):
'''Run the Tacotron2 super decoder cell.'''
"""Run the Tacotron2 super decoder cell."""
super_cell_out, decoder_state = state

# split
@@ -117,8 +114,8 @@ class DurPredictorCell(RNNCell):
return new_super_cell_out, new_states


class DurPredictorCECell(RNNCell):
'''Wrapper wrapper knock knock.'''
class DurPredictorCECell(tf.contrib.rnn.RNNCell):
"""Wrapper wrapper knock knock."""

def __init__(self, var_predictor_cell, is_training, dim, prenet_units,
max_dur, dur_embedding_dim):
@@ -146,7 +143,7 @@ class DurPredictorCECell(RNNCell):
])

def call(self, inputs, state):
'''Run the Tacotron2 super decoder cell.'''
"""Run the Tacotron2 super decoder cell."""
super_cell_out, decoder_state = state

# split
@@ -181,8 +178,8 @@ class DurPredictorCECell(RNNCell):
return new_super_cell_out, new_states


class VarPredictorCell2(RNNCell):
'''Wrapper wrapper knock knock.'''
class VarPredictorCell2(tf.contrib.rnn.RNNCell):
"""Wrapper wrapper knock knock."""

def __init__(self, var_predictor_cell, is_training, dim, prenet_units):
super(VarPredictorCell2, self).__init__()


+ 9
- 9
modelscope/models/audio/tts/models/robutrans.py View File

@@ -1,14 +1,8 @@
import tensorflow as tf
from tensorflow.contrib.rnn import LSTMBlockCell, MultiRNNCell
from tensorflow.contrib.seq2seq import BasicDecoder
from tensorflow.python.ops.ragged.ragged_util import repeat

from .am_models import conv_prenet, decoder_prenet, encoder_prenet
from .fsmn_encoder import FsmnEncoderV2
from .helpers import VarTestHelper, VarTrainingHelper
from .position import (BatchSinusodalPositionalEncoding,
SinusodalPositionalEncoding)
from .rnn_wrappers import DurPredictorCell, VarPredictorCell
from .position import BatchSinusodalPositionalEncoding
from .self_attention_decoder import SelfAttentionDecoder
from .self_attention_encoder import SelfAttentionEncoder

@@ -32,7 +26,7 @@ class RobuTrans():
duration_scales=None,
energy_contours=None,
energy_scales=None):
'''Initializes the model for inference.
"""Initializes the model for inference.

Sets "mel_outputs", "linear_outputs", "stop_token_outputs", and "alignments" fields.

@@ -46,7 +40,10 @@ class RobuTrans():
mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number
of steps in the output time series, M is num_mels, and values are entries in the mel
spectrogram. Only needed for training.
'''
"""
from tensorflow.contrib.rnn import LSTMBlockCell, MultiRNNCell
from tensorflow.contrib.seq2seq import BasicDecoder

with tf.variable_scope('inference') as _:
is_training = mel_targets is not None
batch_size = tf.shape(inputs)[0]
@@ -229,17 +226,20 @@ class RobuTrans():
LSTMBlockCell(hp.predictor_lstm_units),
LSTMBlockCell(hp.predictor_lstm_units)
], state_is_tuple=True) # yapf:disable
from .rnn_wrappers import DurPredictorCell
duration_output_cell = DurPredictorCell(
duration_predictor_cell, is_training, 1,
hp.predictor_prenet_units)
duration_predictor_init_state = duration_output_cell.zero_state(
batch_size=batch_size, dtype=tf.float32)
if is_training:
from .helpers import VarTrainingHelper
duration_helper = VarTrainingHelper(
tf.expand_dims(
tf.log(tf.cast(durations, tf.float32) + 1),
axis=2), dur_inputs, 1)
else:
from .helpers import VarTestHelper
duration_helper = VarTestHelper(batch_size, dur_inputs, 1)
(
duration_outputs, _


+ 2
- 6
modelscope/models/audio/tts/sambert_hifi.py View File

@@ -1,14 +1,10 @@
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import io
import os
import time
import zipfile
from typing import Any, Dict, Optional, Union

import json
import numpy as np
import torch

from modelscope.metainfo import Models
from modelscope.models.base import Model
@@ -16,8 +12,8 @@ from modelscope.models.builder import MODELS
from modelscope.utils.audio.tts_exceptions import (
TtsFrontendInitializeFailedException,
TtsFrontendLanguageTypeInvalidException, TtsModelConfigurationExcetion,
TtsVocoderMelspecShapeMismatchException, TtsVoiceNotExistsException)
from modelscope.utils.constant import ModelFile, Tasks
TtsVoiceNotExistsException)
from modelscope.utils.constant import Tasks
from .voice import Voice

import tensorflow as tf # isort:skip


+ 1
- 1
modelscope/pipelines/base.py View File

@@ -23,8 +23,8 @@ logger = get_logger()
class Pipeline(ABC):

def initiate_single_model(self, model):
logger.info(f'initiate model from {model}')
if isinstance(model, str) and is_official_hub_path(model):
logger.info(f'initiate model from location {model}.')
# expecting model has been prefetched to local cache beforehand
return Model.from_pretrained(
model, model_prefetched=True) if is_model(model) else model


+ 2
- 3
modelscope/pipelines/cv/cmdssl_video_embedding_pipleline.py View File

@@ -1,11 +1,9 @@
import os.path as osp
from typing import Any, Dict

import decord
import numpy as np
import torch
import torchvision.transforms.functional as TF
from decord import VideoReader, cpu
from PIL import Image

from modelscope.metainfo import Pipelines
@@ -49,6 +47,7 @@ class CMDSSLVideoEmbeddingPipeline(Pipeline):
logger.info('load model done')

def preprocess(self, input: Input) -> Dict[str, Any]:
import decord
decord.bridge.set_bridge('native')

transforms = VCompose([
@@ -60,7 +59,7 @@ class CMDSSLVideoEmbeddingPipeline(Pipeline):

clip_len = (self.cfg.DATA.video_frames
- 1) * self.cfg.DATA.video_stride + 1
vr = VideoReader(input, ctx=cpu(0))
vr = decord.VideoReader(input, ctx=decord.cpu(0))
if len(vr) <= clip_len:
init_frames = np.zeros(self.cfg.DATA.multi_crop, dtype=int)
else:


+ 0
- 5
modelscope/pipelines/cv/ocr_detection_pipeline.py View File

@@ -16,11 +16,6 @@ from ..base import Pipeline
from ..builder import PIPELINES
from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils

if tf.__version__ >= '2.0':
import tf_slim as slim
else:
from tensorflow.contrib import slim

if tf.__version__ >= '2.0':
tf = tf.compat.v1
tf.compat.v1.disable_eager_execution()


+ 1
- 4
modelscope/preprocessors/video.py View File

@@ -1,15 +1,11 @@
import math
import os
import random

import decord
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data
import torch.utils.dlpack as dlpack
import torchvision.transforms._transforms_video as transforms
from decord import VideoReader
from torchvision.transforms import Compose


@@ -128,6 +124,7 @@ def _decode_video(cfg, path):
Returns:
frames (Tensor): video tensor data
"""
from decord import VideoReader
vr = VideoReader(path)

num_clips_per_video = cfg.TEST.NUM_ENSEMBLE_VIEWS


Loading…
Cancel
Save