diff --git a/modelscope/preprocessors/audio.py b/modelscope/preprocessors/audio.py
index cdee968b..10057034 100644
--- a/modelscope/preprocessors/audio.py
+++ b/modelscope/preprocessors/audio.py
@@ -1,12 +1,13 @@
 import io
 import os
-from typing import Any, Dict
+from typing import Any, Dict, Tuple, Union
 
 import numpy as np
 import scipy.io.wavfile as wav
 import torch
 
 from modelscope.utils.constant import Fields
+from . import Preprocessor
 from .builder import PREPROCESSORS
 
 
@@ -115,7 +116,7 @@ class Feature:
 
 
 @PREPROCESSORS.register_module(Fields.audio)
-class LinearAECAndFbank:
+class LinearAECAndFbank(Preprocessor):
     SAMPLE_RATE = 16000
 
     def __init__(self, io_config):
@@ -127,18 +128,27 @@ class LinearAECAndFbank:
         self.mitaec = MinDAEC.load()
         self.mask_on_mic = io_config['mask_on'] == 'nearend_mic'
 
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        """ linear filtering the near end mic and far end audio, then extract the feature
-        :param data: dict with two keys and correspond audios: "nearend_mic" and "farend_speech"
-        :return: dict with two keys and Tensor values: "base" linear filtered audio，and "feature"
+    def __call__(self, data: Union[Tuple, Dict[str, Any]]) -> Dict[str, Any]:
+        """ Linear filtering the near end mic and far end audio, then extract the feature.
+
+        Args:
+            data: Dict with two keys and correspond audios: "nearend_mic" and "farend_speech".
+
+        Returns:
+            Dict with two keys and Tensor values: "base" linear filtered audio，and "feature"
         """
-        # read files
-        nearend_mic, fs = self.load_wav(data['nearend_mic'])
-        farend_speech, fs = self.load_wav(data['farend_speech'])
-        if 'nearend_speech' in data:
-            nearend_speech, fs = self.load_wav(data['nearend_speech'])
-        else:
+        if isinstance(data, tuple):
+            nearend_mic, fs = self.load_wav(data[0])
+            farend_speech, fs = self.load_wav(data[1])
             nearend_speech = np.zeros_like(nearend_mic)
+        else:
+            # read files
+            nearend_mic, fs = self.load_wav(data['nearend_mic'])
+            farend_speech, fs = self.load_wav(data['farend_speech'])
+            if 'nearend_speech' in data:
+                nearend_speech, fs = self.load_wav(data['nearend_speech'])
+            else:
+                nearend_speech = np.zeros_like(nearend_mic)
 
         out_mic, out_ref, out_linear, out_echo = self.mitaec.do_linear_aec(
             nearend_mic, farend_speech)
diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py
index 22dac2b6..4c056a86 100644
--- a/tests/pipelines/test_speech_signal_process.py
+++ b/tests/pipelines/test_speech_signal_process.py
@@ -68,6 +68,25 @@ class SpeechSignalProcessTest(unittest.TestCase):
         aec(input, output_path=output_path)
         print(f'Processed audio saved to {output_path}')
 
+    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
+    def test_aec_tuple_bytes(self):
+        # Download audio files
+        download(NEAREND_MIC_URL, NEAREND_MIC_FILE)
+        download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE)
+        model_id = 'damo/speech_dfsmn_aec_psm_16k'
+        with open(NEAREND_MIC_FILE, 'rb') as f:
+            nearend_bytes = f.read()
+        with open(FAREND_SPEECH_FILE, 'rb') as f:
+            farend_bytes = f.read()
+        inputs = (nearend_bytes, farend_bytes)
+        aec = pipeline(
+            Tasks.acoustic_echo_cancellation,
+            model=model_id,
+            pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k)
+        output_path = os.path.abspath('output.wav')
+        aec(inputs, output_path=output_path)
+        print(f'Processed audio saved to {output_path}')
+
     @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
     def test_ans(self):
         # Download audio files