diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index afe05cbe..08f56c8a 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -168,7 +168,6 @@ class Pipeline(ABC): kwargs['preprocess_params'] = preprocess_params kwargs['forward_params'] = forward_params kwargs['postprocess_params'] = postprocess_params - if isinstance(input, list): if batch_size is None: output = [] diff --git a/modelscope/preprocessors/ofa/visual_question_answering.py b/modelscope/preprocessors/ofa/visual_question_answering.py index b83cf935..f5afabe3 100644 --- a/modelscope/preprocessors/ofa/visual_question_answering.py +++ b/modelscope/preprocessors/ofa/visual_question_answering.py @@ -83,8 +83,10 @@ class OfaVisualQuestionAnsweringPreprocessor(OfaBasePreprocessor): def _build_infer_sample(self, data: Dict[str, Any]) -> Dict[str, Any]: image = self.get_img_pil(data[self.column_map['image']]) patch_image = self.patch_resize_transform(image) - text = ' {}'.format(data[self.column_map['text']]) - inputs = self.tokenize_text(text) + text = data[self.column_map['text']] + text = self.pre_question(text, self.max_src_length) + text = text + '?' if not text.endswith('?') else text + inputs = self.tokenize_text(f' {text}') if self.prompt_type == 'none': decoder_prompt = self.bos_item elif self.prompt_type == 'src': diff --git a/requirements/multi-modal.txt b/requirements/multi-modal.txt index 9c144a99..457fe2b0 100644 --- a/requirements/multi-modal.txt +++ b/requirements/multi-modal.txt @@ -1,6 +1,5 @@ ftfy>=6.0.3 librosa -ofa>=0.0.2 pycocoevalcap>=1.2 pycocotools>=2.0.4 # compatible with taming-transformers-rom1504