|
- # Copyright (c) Alibaba, Inc. and its affiliates.
-
- import os
- import uuid
- from typing import Any, Dict, Union
-
- from maas_lib.data.nlp.space.fields.intent_field import IntentBPETextField
- from maas_lib.utils.config import Config
- from maas_lib.utils.constant import Fields, InputFields
- from maas_lib.utils.type_assert import type_assert
- from ..base import Preprocessor
- from ..builder import PREPROCESSORS
-
- __all__ = ['DialogIntentPreprocessor']
-
-
- @PREPROCESSORS.register_module(Fields.nlp, module_name=r'space-intent')
- class DialogIntentPreprocessor(Preprocessor):
-
- def __init__(self, model_dir: str, *args, **kwargs):
- """preprocess the data via the vocab.txt from the `model_dir` path
-
- Args:
- model_dir (str): model path
- """
- super().__init__(*args, **kwargs)
-
- self.model_dir: str = model_dir
- self.config = Config.from_file(
- os.path.join(self.model_dir, 'configuration.json'))
- self.text_field = IntentBPETextField(
- self.model_dir, config=self.config)
-
- @type_assert(object, str)
- def __call__(self, data: str) -> Dict[str, Any]:
- """process the raw input data
-
- Args:
- data (str): a sentence
- Example:
- 'you are so handsome.'
-
- Returns:
- Dict[str, Any]: the preprocessed data
- """
- samples = self.text_field.preprocessor([data])
- samples, _ = self.text_field.collate_fn_multi_turn(samples)
-
- return samples
|