diff --git a/modelscope/metrics/text_generation_metric.py b/modelscope/metrics/text_generation_metric.py
index ae61d225..3e5c1f93 100644
--- a/modelscope/metrics/text_generation_metric.py
+++ b/modelscope/metrics/text_generation_metric.py
@@ -1,8 +1,5 @@
 from typing import Dict
 
-import numpy as np
-from rouge_score import rouge_scorer
-
 from ..metainfo import Metrics
 from ..utils.registry import default_group
 from .base import Metric
@@ -18,6 +15,7 @@ class TextGenerationMetric(Metric):
     def __init__(self):
         self.preds = []
         self.tgts = []
+        from rouge_score import rouge_scorer
         self.scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
 
     def add(self, outputs: Dict, inputs: Dict):
diff --git a/modelscope/models/audio/tts/models/am_models.py b/modelscope/models/audio/tts/models/am_models.py
index 1433fd7e..cd43ff12 100755
--- a/modelscope/models/audio/tts/models/am_models.py
+++ b/modelscope/models/audio/tts/models/am_models.py
@@ -1,7 +1,4 @@
 import tensorflow as tf
-from tensorflow.contrib.cudnn_rnn import CudnnLSTM
-from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
-from tensorflow.contrib.rnn import LSTMBlockCell
 
 
 def encoder_prenet(inputs,
@@ -207,6 +204,7 @@ def conv_and_lstm(inputs,
                   embedded_inputs_speaker,
                   mask=None,
                   scope='conv_and_lstm'):
+    from tensorflow.contrib.rnn import LSTMBlockCell
     x = inputs
     with tf.variable_scope(scope):
         for i in range(n_conv_layers):
@@ -244,6 +242,7 @@ def conv_and_lstm_dec(inputs,
                       mask=None,
                       scope='conv_and_lstm'):
     x = inputs
+    from tensorflow.contrib.rnn import LSTMBlockCell
     with tf.variable_scope(scope):
         for i in range(n_conv_layers):
             x = conv1d(
diff --git a/modelscope/models/audio/tts/models/helpers.py b/modelscope/models/audio/tts/models/helpers.py
index f3e53277..371000a4 100755
--- a/modelscope/models/audio/tts/models/helpers.py
+++ b/modelscope/models/audio/tts/models/helpers.py
@@ -1,9 +1,8 @@
 import numpy as np
 import tensorflow as tf
-from tensorflow.contrib.seq2seq import Helper
 
 
-class VarTestHelper(Helper):
+class VarTestHelper(tf.contrib.seq2seq.Helper):
 
     def __init__(self, batch_size, inputs, dim):
         with tf.name_scope('VarTestHelper'):
@@ -44,7 +43,7 @@ class VarTestHelper(Helper):
             return (finished, next_inputs, state)
 
 
-class VarTrainingHelper(Helper):
+class VarTrainingHelper(tf.contrib.seq2seq.Helper):
 
     def __init__(self, targets, inputs, dim):
         with tf.name_scope('VarTrainingHelper'):
@@ -86,7 +85,7 @@ class VarTrainingHelper(Helper):
             return (finished, next_inputs, state)
 
 
-class VarTrainingSSHelper(Helper):
+class VarTrainingSSHelper(tf.contrib.seq2seq.Helper):
 
     def __init__(self, targets, inputs, dim, global_step, schedule_begin,
                  alpha, decay_steps):
diff --git a/modelscope/models/audio/tts/models/rnn_wrappers.py b/modelscope/models/audio/tts/models/rnn_wrappers.py
index 85a6b335..6c487bab 100755
--- a/modelscope/models/audio/tts/models/rnn_wrappers.py
+++ b/modelscope/models/audio/tts/models/rnn_wrappers.py
@@ -1,14 +1,11 @@
-import numpy as np
 import tensorflow as tf
-from tensorflow.contrib.rnn import RNNCell
-from tensorflow.contrib.seq2seq import AttentionWrapperState
 from tensorflow.python.ops import rnn_cell_impl
 
 from .am_models import prenet
 
 
-class VarPredictorCell(RNNCell):
-    '''Wrapper wrapper knock knock.'''
+class VarPredictorCell(tf.contrib.rnn.RNNCell):
+    """Wrapper wrapper knock knock."""
 
     def __init__(self, var_predictor_cell, is_training, dim, prenet_units):
         super(VarPredictorCell, self).__init__()
@@ -33,7 +30,7 @@ class VarPredictorCell(RNNCell):
         ])
 
     def call(self, inputs, state):
-        '''Run the Tacotron2 super decoder cell.'''
+        """Run the Tacotron2 super decoder cell."""
         super_cell_out, decoder_state = state
 
         # split
@@ -61,8 +58,8 @@ class VarPredictorCell(RNNCell):
         return new_super_cell_out, new_states
 
 
-class DurPredictorCell(RNNCell):
-    '''Wrapper wrapper knock knock.'''
+class DurPredictorCell(tf.contrib.rnn.RNNCell):
+    """Wrapper wrapper knock knock."""
 
     def __init__(self, var_predictor_cell, is_training, dim, prenet_units):
         super(DurPredictorCell, self).__init__()
@@ -87,7 +84,7 @@ class DurPredictorCell(RNNCell):
         ])
 
     def call(self, inputs, state):
-        '''Run the Tacotron2 super decoder cell.'''
+        """Run the Tacotron2 super decoder cell."""
         super_cell_out, decoder_state = state
 
         # split
@@ -117,8 +114,8 @@ class DurPredictorCell(RNNCell):
         return new_super_cell_out, new_states
 
 
-class DurPredictorCECell(RNNCell):
-    '''Wrapper wrapper knock knock.'''
+class DurPredictorCECell(tf.contrib.rnn.RNNCell):
+    """Wrapper wrapper knock knock."""
 
     def __init__(self, var_predictor_cell, is_training, dim, prenet_units,
                  max_dur, dur_embedding_dim):
@@ -146,7 +143,7 @@ class DurPredictorCECell(RNNCell):
         ])
 
     def call(self, inputs, state):
-        '''Run the Tacotron2 super decoder cell.'''
+        """Run the Tacotron2 super decoder cell."""
         super_cell_out, decoder_state = state
 
         # split
@@ -181,8 +178,8 @@ class DurPredictorCECell(RNNCell):
         return new_super_cell_out, new_states
 
 
-class VarPredictorCell2(RNNCell):
-    '''Wrapper wrapper knock knock.'''
+class VarPredictorCell2(tf.contrib.rnn.RNNCell):
+    """Wrapper wrapper knock knock."""
 
     def __init__(self, var_predictor_cell, is_training, dim, prenet_units):
         super(VarPredictorCell2, self).__init__()
diff --git a/modelscope/models/audio/tts/models/robutrans.py b/modelscope/models/audio/tts/models/robutrans.py
index d5bafcec..ab9fdfcc 100755
--- a/modelscope/models/audio/tts/models/robutrans.py
+++ b/modelscope/models/audio/tts/models/robutrans.py
@@ -1,14 +1,8 @@
 import tensorflow as tf
-from tensorflow.contrib.rnn import LSTMBlockCell, MultiRNNCell
-from tensorflow.contrib.seq2seq import BasicDecoder
 from tensorflow.python.ops.ragged.ragged_util import repeat
 
-from .am_models import conv_prenet, decoder_prenet, encoder_prenet
 from .fsmn_encoder import FsmnEncoderV2
-from .helpers import VarTestHelper, VarTrainingHelper
-from .position import (BatchSinusodalPositionalEncoding,
-                       SinusodalPositionalEncoding)
-from .rnn_wrappers import DurPredictorCell, VarPredictorCell
+from .position import BatchSinusodalPositionalEncoding
 from .self_attention_decoder import SelfAttentionDecoder
 from .self_attention_encoder import SelfAttentionEncoder
 
@@ -32,7 +26,7 @@ class RobuTrans():
                    duration_scales=None,
                    energy_contours=None,
                    energy_scales=None):
-        '''Initializes the model for inference.
+        """Initializes the model for inference.
 
         Sets "mel_outputs", "linear_outputs", "stop_token_outputs", and "alignments" fields.
 
@@ -46,7 +40,10 @@ class RobuTrans():
           mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number
             of steps in the output time series, M is num_mels, and values are entries in the mel
             spectrogram. Only needed for training.
-        '''
+        """
+        from tensorflow.contrib.rnn import LSTMBlockCell, MultiRNNCell
+        from tensorflow.contrib.seq2seq import BasicDecoder
+
         with tf.variable_scope('inference') as _:
             is_training = mel_targets is not None
             batch_size = tf.shape(inputs)[0]
@@ -229,17 +226,20 @@ class RobuTrans():
                     LSTMBlockCell(hp.predictor_lstm_units),
                     LSTMBlockCell(hp.predictor_lstm_units)
                 ], state_is_tuple=True)  # yapf:disable
+                from .rnn_wrappers import DurPredictorCell
                 duration_output_cell = DurPredictorCell(
                     duration_predictor_cell, is_training, 1,
                     hp.predictor_prenet_units)
                 duration_predictor_init_state = duration_output_cell.zero_state(
                     batch_size=batch_size, dtype=tf.float32)
                 if is_training:
+                    from .helpers import VarTrainingHelper
                     duration_helper = VarTrainingHelper(
                         tf.expand_dims(
                             tf.log(tf.cast(durations, tf.float32) + 1),
                             axis=2), dur_inputs, 1)
                 else:
+                    from .helpers import VarTestHelper
                     duration_helper = VarTestHelper(batch_size, dur_inputs, 1)
                 (
                     duration_outputs, _
diff --git a/modelscope/models/audio/tts/sambert_hifi.py b/modelscope/models/audio/tts/sambert_hifi.py
index 401e32c9..79f8068e 100644
--- a/modelscope/models/audio/tts/sambert_hifi.py
+++ b/modelscope/models/audio/tts/sambert_hifi.py
@@ -1,14 +1,10 @@
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
-import io
 import os
-import time
 import zipfile
-from typing import Any, Dict, Optional, Union
 
 import json
 import numpy as np
-import torch
 
 from modelscope.metainfo import Models
 from modelscope.models.base import Model
@@ -16,8 +12,8 @@ from modelscope.models.builder import MODELS
 from modelscope.utils.audio.tts_exceptions import (
     TtsFrontendInitializeFailedException,
     TtsFrontendLanguageTypeInvalidException, TtsModelConfigurationExcetion,
-    TtsVocoderMelspecShapeMismatchException, TtsVoiceNotExistsException)
-from modelscope.utils.constant import ModelFile, Tasks
+    TtsVoiceNotExistsException)
+from modelscope.utils.constant import Tasks
 from .voice import Voice
 
 import tensorflow as tf  # isort:skip
diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py
index 8c260ece..d674052d 100644
--- a/modelscope/pipelines/base.py
+++ b/modelscope/pipelines/base.py
@@ -23,8 +23,8 @@ logger = get_logger()
 class Pipeline(ABC):
 
     def initiate_single_model(self, model):
-        logger.info(f'initiate model from {model}')
         if isinstance(model, str) and is_official_hub_path(model):
+            logger.info(f'initiate model from location {model}.')
             # expecting model has been prefetched to local cache beforehand
             return Model.from_pretrained(
                 model, model_prefetched=True) if is_model(model) else model
diff --git a/modelscope/pipelines/cv/cmdssl_video_embedding_pipleline.py b/modelscope/pipelines/cv/cmdssl_video_embedding_pipleline.py
index 47d90d71..1d208841 100644
--- a/modelscope/pipelines/cv/cmdssl_video_embedding_pipleline.py
+++ b/modelscope/pipelines/cv/cmdssl_video_embedding_pipleline.py
@@ -1,11 +1,9 @@
 import os.path as osp
 from typing import Any, Dict
 
-import decord
 import numpy as np
 import torch
 import torchvision.transforms.functional as TF
-from decord import VideoReader, cpu
 from PIL import Image
 
 from modelscope.metainfo import Pipelines
@@ -49,6 +47,7 @@ class CMDSSLVideoEmbeddingPipeline(Pipeline):
         logger.info('load model done')
 
     def preprocess(self, input: Input) -> Dict[str, Any]:
+        import decord
         decord.bridge.set_bridge('native')
 
         transforms = VCompose([
@@ -60,7 +59,7 @@ class CMDSSLVideoEmbeddingPipeline(Pipeline):
 
         clip_len = (self.cfg.DATA.video_frames
                     - 1) * self.cfg.DATA.video_stride + 1
-        vr = VideoReader(input, ctx=cpu(0))
+        vr = decord.VideoReader(input, ctx=decord.cpu(0))
         if len(vr) <= clip_len:
             init_frames = np.zeros(self.cfg.DATA.multi_crop, dtype=int)
         else:
diff --git a/modelscope/pipelines/cv/ocr_detection_pipeline.py b/modelscope/pipelines/cv/ocr_detection_pipeline.py
index d8b31389..ed8bcccb 100644
--- a/modelscope/pipelines/cv/ocr_detection_pipeline.py
+++ b/modelscope/pipelines/cv/ocr_detection_pipeline.py
@@ -16,11 +16,6 @@ from ..base import Pipeline
 from ..builder import PIPELINES
 from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils
 
-if tf.__version__ >= '2.0':
-    import tf_slim as slim
-else:
-    from tensorflow.contrib import slim
-
 if tf.__version__ >= '2.0':
     tf = tf.compat.v1
 tf.compat.v1.disable_eager_execution()
diff --git a/modelscope/preprocessors/video.py b/modelscope/preprocessors/video.py
index 262fdaa5..33a92c1c 100644
--- a/modelscope/preprocessors/video.py
+++ b/modelscope/preprocessors/video.py
@@ -1,15 +1,11 @@
 import math
-import os
 import random
 
-import decord
 import numpy as np
 import torch
-import torch.nn as nn
 import torch.utils.data
 import torch.utils.dlpack as dlpack
 import torchvision.transforms._transforms_video as transforms
-from decord import VideoReader
 from torchvision.transforms import Compose
 
 
@@ -128,6 +124,7 @@ def _decode_video(cfg, path):
         Returns:
             frames            (Tensor): video tensor data
     """
+    from decord import VideoReader
     vr = VideoReader(path)
 
     num_clips_per_video = cfg.TEST.NUM_ENSEMBLE_VIEWS