[to #42322933] feat: aec pipeline also accept tuple and add test

3 years ago · b2be1abcad
--- a/modelscope/preprocessors/audio.py
+++ b/modelscope/preprocessors/audio.py
@@ -1,12 +1,13 @@
 import io
 import os
 from typing import Any, Dict
 from typing import Any, Dict, Tuple, Union

 import numpy as np
 import scipy.io.wavfile as wav
 import torch

 from modelscope.utils.constant import Fields
 from . import Preprocessor
 from .builder import PREPROCESSORS


@@ -115,7 +116,7 @@ class Feature:


@PREPROCESSORS.register_module(Fields.audio)
 class LinearAECAndFbank:
 class LinearAECAndFbank(Preprocessor):
    SAMPLE_RATE = 16000

    def __init__(self, io_config):
@@ -127,18 +128,27 @@ class LinearAECAndFbank:
        self.mitaec = MinDAEC.load()
        self.mask_on_mic = io_config['mask_on'] == 'nearend_mic'

    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """ linear filtering the near end mic and far end audio, then extract the feature
        :param data: dict with two keys and correspond audios: "nearend_mic" and "farend_speech"
        :return: dict with two keys and Tensor values: "base" linear filtered audio，and "feature"
    def __call__(self, data: Union[Tuple, Dict[str, Any]]) -> Dict[str, Any]:
        """ Linear filtering the near end mic and far end audio, then extract the feature.

        Args:
            data: Dict with two keys and correspond audios: "nearend_mic" and "farend_speech".

        Returns:
            Dict with two keys and Tensor values: "base" linear filtered audio，and "feature"
        """
        # read files
        nearend_mic, fs = self.load_wav(data['nearend_mic'])
        farend_speech, fs = self.load_wav(data['farend_speech'])
        if 'nearend_speech' in data:
            nearend_speech, fs = self.load_wav(data['nearend_speech'])
        else:
        if isinstance(data, tuple):
            nearend_mic, fs = self.load_wav(data[0])
            farend_speech, fs = self.load_wav(data[1])
            nearend_speech = np.zeros_like(nearend_mic)
        else:
            # read files
            nearend_mic, fs = self.load_wav(data['nearend_mic'])
            farend_speech, fs = self.load_wav(data['farend_speech'])
            if 'nearend_speech' in data:
                nearend_speech, fs = self.load_wav(data['nearend_speech'])
            else:
                nearend_speech = np.zeros_like(nearend_mic)

        out_mic, out_ref, out_linear, out_echo = self.mitaec.do_linear_aec(
            nearend_mic, farend_speech)
--- a/tests/pipelines/test_speech_signal_process.py
+++ b/tests/pipelines/test_speech_signal_process.py
@@ -68,6 +68,25 @@ class SpeechSignalProcessTest(unittest.TestCase):
        aec(input, output_path=output_path)
        print(f'Processed audio saved to {output_path}')

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_aec_tuple_bytes(self):
        # Download audio files
        download(NEAREND_MIC_URL, NEAREND_MIC_FILE)
        download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE)
        model_id = 'damo/speech_dfsmn_aec_psm_16k'
        with open(NEAREND_MIC_FILE, 'rb') as f:
            nearend_bytes = f.read()
        with open(FAREND_SPEECH_FILE, 'rb') as f:
            farend_bytes = f.read()
        inputs = (nearend_bytes, farend_bytes)
        aec = pipeline(
            Tasks.acoustic_echo_cancellation,
            model=model_id,
            pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k)
        output_path = os.path.abspath('output.wav')
        aec(inputs, output_path=output_path)
        print(f'Processed audio saved to {output_path}')

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_ans(self):
        # Download audio files