diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py
index afe05cbe..08f56c8a 100644
--- a/modelscope/pipelines/base.py
+++ b/modelscope/pipelines/base.py
@@ -168,7 +168,6 @@ class Pipeline(ABC):
         kwargs['preprocess_params'] = preprocess_params
         kwargs['forward_params'] = forward_params
         kwargs['postprocess_params'] = postprocess_params
-
         if isinstance(input, list):
             if batch_size is None:
                 output = []
diff --git a/modelscope/preprocessors/ofa/visual_question_answering.py b/modelscope/preprocessors/ofa/visual_question_answering.py
index b83cf935..f5afabe3 100644
--- a/modelscope/preprocessors/ofa/visual_question_answering.py
+++ b/modelscope/preprocessors/ofa/visual_question_answering.py
@@ -83,8 +83,10 @@ class OfaVisualQuestionAnsweringPreprocessor(OfaBasePreprocessor):
     def _build_infer_sample(self, data: Dict[str, Any]) -> Dict[str, Any]:
         image = self.get_img_pil(data[self.column_map['image']])
         patch_image = self.patch_resize_transform(image)
-        text = ' {}'.format(data[self.column_map['text']])
-        inputs = self.tokenize_text(text)
+        text = data[self.column_map['text']]
+        text = self.pre_question(text, self.max_src_length)
+        text = text + '?' if not text.endswith('?') else text
+        inputs = self.tokenize_text(f' {text}')
         if self.prompt_type == 'none':
             decoder_prompt = self.bos_item
         elif self.prompt_type == 'src':
diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt
index 9c144a99..457fe2b0 100644
--- a/requirements/multi-modal.txt
+++ b/requirements/multi-modal.txt
@@ -1,6 +1,5 @@
 ftfy>=6.0.3
 librosa
-ofa>=0.0.2
 pycocoevalcap>=1.2
 pycocotools>=2.0.4
 # compatible with taming-transformers-rom1504