You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_multilingual_word_segmentation.py 2.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import unittest
  3. from modelscope.hub.snapshot_download import snapshot_download
  4. from modelscope.models import Model
  5. from modelscope.models.nlp import TransformerCRFForWordSegmentation
  6. from modelscope.pipelines import pipeline
  7. from modelscope.pipelines.nlp import WordSegmentationThaiPipeline
  8. from modelscope.preprocessors import WordSegmentationPreprocessorThai
  9. from modelscope.utils.constant import Tasks
  10. from modelscope.utils.demo_utils import DemoCompatibilityCheck
  11. from modelscope.utils.regress_test_utils import MsRegressTool
  12. from modelscope.utils.test_utils import test_level
  13. class WordSegmentationTest(unittest.TestCase, DemoCompatibilityCheck):
  14. def setUp(self) -> None:
  15. self.task = Tasks.word_segmentation
  16. self.model_id = 'damo/nlp_xlmr_word-segmentation_thai'
  17. sentence = 'รถคันเก่าก็ยังเก็บเอาไว้ยังไม่ได้ขาย'
  18. regress_tool = MsRegressTool(baseline=False)
  19. @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
  20. def test_run_by_direct_model_download(self):
  21. cache_path = snapshot_download(self.model_id)
  22. tokenizer = WordSegmentationPreprocessorThai(cache_path)
  23. model = TransformerCRFForWordSegmentation.from_pretrained(cache_path)
  24. pipeline1 = WordSegmentationThaiPipeline(model, preprocessor=tokenizer)
  25. pipeline2 = pipeline(
  26. Tasks.word_segmentation, model=model, preprocessor=tokenizer)
  27. print(f'sentence: {self.sentence}\n'
  28. f'pipeline1:{pipeline1(input=self.sentence)}')
  29. print(f'pipeline2: {pipeline2(input=self.sentence)}')
  30. @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
  31. def test_run_with_model_from_modelhub(self):
  32. model = Model.from_pretrained(self.model_id)
  33. tokenizer = WordSegmentationPreprocessorThai(model.model_dir)
  34. pipeline_ins = pipeline(
  35. task=Tasks.word_segmentation, model=model, preprocessor=tokenizer)
  36. print(pipeline_ins(input=self.sentence))
  37. @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
  38. def test_run_with_model_name(self):
  39. pipeline_ins = pipeline(
  40. task=Tasks.word_segmentation, model=self.model_id)
  41. print(pipeline_ins(input=self.sentence))
  42. @unittest.skip('demo compatibility test is only enabled on a needed-basis')
  43. def test_demo_compatibility(self):
  44. self.compatibility_check()
  45. if __name__ == '__main__':
  46. unittest.main()