Browse Source

support asr new models

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10919277

* support new asr paraformer model

* support asr conformer model
master^2
jiangyu.xzy wenmeng.zwm 2 years ago
parent
commit
9bfc77c178
4 changed files with 59 additions and 13 deletions
  1. +9
    -7
      modelscope/pipelines/audio/asr_inference_pipeline.py
  2. +14
    -5
      modelscope/preprocessors/asr.py
  3. +1
    -1
      requirements/audio.txt
  4. +35
    -0
      tests/pipelines/test_automatic_speech_recognition.py

+ 9
- 7
modelscope/pipelines/audio/asr_inference_pipeline.py View File

@@ -110,6 +110,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
'sampled_lengths': 'seq2seq/sampled_lengths',
'lang': 'zh-cn',
'code_base': inputs['code_base'],
'mode': inputs['mode'],
'fs': {
'audio_fs': inputs['audio_fs'],
'model_fs': 16000
@@ -233,15 +234,16 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
def run_inference(self, cmd):
asr_result = []
if self.framework == Frameworks.torch and cmd['code_base'] == 'funasr':
from funasr.bin import asr_inference_paraformer_modelscope
if cmd['mode'] == 'asr':
from funasr.bin import asr_inference_modelscope as asr_inference
else:
from funasr.bin import asr_inference_paraformer_modelscope as asr_inference

if hasattr(asr_inference_paraformer_modelscope, 'set_parameters'):
asr_inference_paraformer_modelscope.set_parameters(
sample_rate=cmd['fs'])
asr_inference_paraformer_modelscope.set_parameters(
language=cmd['lang'])
if hasattr(asr_inference, 'set_parameters'):
asr_inference.set_parameters(sample_rate=cmd['fs'])
asr_inference.set_parameters(language=cmd['lang'])

asr_result = asr_inference_paraformer_modelscope.asr_inference(
asr_result = asr_inference.asr_inference(
batch_size=cmd['batch_size'],
maxlenratio=cmd['maxlenratio'],
minlenratio=cmd['minlenratio'],


+ 14
- 5
modelscope/preprocessors/asr.py View File

@@ -103,6 +103,12 @@ class WavToScp(Preprocessor):
else:
code_base = None
inputs['code_base'] = code_base
# decoding mode
if 'mode' in inputs['model_config']:
mode = inputs['model_config']['mode']
else:
mode = None
inputs['mode'] = mode

if inputs['model_type'] == Frameworks.torch:
assert inputs['model_config'].__contains__(
@@ -111,8 +117,6 @@ class WavToScp(Preprocessor):
'am_model_config'), 'am_model_config does not exist'
assert inputs['model_config'].__contains__(
'asr_model_config'), 'asr_model_config does not exist'
assert inputs['model_config'].__contains__(
'asr_model_wav_config'), 'asr_model_wav_config does not exist'

am_model_config: str = os.path.join(
inputs['model_workspace'],
@@ -127,9 +131,14 @@ class WavToScp(Preprocessor):
assert os.path.exists(
asr_model_config), 'asr_model_config does not exist'

asr_model_wav_config: str = os.path.join(
inputs['model_workspace'],
inputs['model_config']['asr_model_wav_config'])
if 'asr_model_wav_config' in inputs['model_config']:
asr_model_wav_config: str = os.path.join(
inputs['model_workspace'],
inputs['model_config']['asr_model_wav_config'])
else:
asr_model_wav_config: str = os.path.join(
inputs['model_workspace'],
inputs['model_config']['asr_model_config'])
assert os.path.exists(
asr_model_wav_config), 'asr_model_wav_config does not exist'



+ 1
- 1
requirements/audio.txt View File

@@ -1,6 +1,6 @@
easyasr>=0.0.2
espnet==202204
funasr>=0.1.0
funasr>=0.1.3
h5py
inflect
keras


+ 35
- 0
tests/pipelines/test_automatic_speech_recognition.py View File

@@ -217,6 +217,41 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase,
'damo/speech_UniASR_asr_2pass-id-16k-common-vocab1067-tensorflow1-offline',
'wav_path': 'data/test/audios/asr_example_id.wav'
},
{
'model_id':
'damo/speech_conformer_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch',
'wav_path': 'data/test/audios/asr_example_id.wav'
},
{
'model_id':
'damo/speech_conformer_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch',
'wav_path': 'data/test/audios/asr_example_id.wav'
},
{
'model_id':
'damo/speech_paraformer_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch',
'wav_path': 'data/test/audios/asr_example_id.wav'
},
{
'model_id':
'damo/speech_paraformer_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch',
'wav_path': 'data/test/audios/asr_example_id.wav'
},
{
'model_id':
'damo/speech_paraformerbert_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch',
'wav_path': 'data/test/audios/asr_example_id.wav'
},
{
'model_id':
'damo/speech_paraformerbert_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch',
'wav_path': 'data/test/audios/asr_example_id.wav'
},
{
'model_id':
'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
'wav_path': 'data/test/audios/asr_example_id.wav'
},
]

def setUp(self) -> None:


Loading…
Cancel
Save