You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_multilingual_named_entity_recognition.py 5.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import unittest
  3. from modelscope.hub.snapshot_download import snapshot_download
  4. from modelscope.models import Model
  5. from modelscope.models.nlp import (LSTMCRFForNamedEntityRecognition,
  6. TransformerCRFForNamedEntityRecognition)
  7. from modelscope.pipelines import pipeline
  8. from modelscope.pipelines.nlp import (NamedEntityRecognitionThaiPipeline,
  9. NamedEntityRecognitionVietPipeline)
  10. from modelscope.preprocessors import NERPreprocessorThai, NERPreprocessorViet
  11. from modelscope.utils.constant import Tasks
  12. from modelscope.utils.demo_utils import DemoCompatibilityCheck
  13. from modelscope.utils.test_utils import test_level
  14. class MultilingualNamedEntityRecognitionTest(unittest.TestCase,
  15. DemoCompatibilityCheck):
  16. def setUp(self) -> None:
  17. self.task = Tasks.named_entity_recognition
  18. self.model_id = 'damo/nlp_xlmr_named-entity-recognition_thai-ecommerce-title'
  19. thai_tcrf_model_id = 'damo/nlp_xlmr_named-entity-recognition_thai-ecommerce-title'
  20. thai_sentence = 'เครื่องชั่งดิจิตอลแบบตั้งพื้น150kg.'
  21. viet_tcrf_model_id = 'damo/nlp_xlmr_named-entity-recognition_viet-ecommerce-title'
  22. viet_sentence = 'Nón vành dễ thương cho bé gái'
  23. multilingual_model_id = 'damo/nlp_raner_named-entity-recognition_multilingual-large-generic'
  24. ml_stc = 'সমস্ত বেতন নিলামের সাধারণ ব্যবহারিক উদাহরণ বিভিন্ন পেনি নিলাম / বিডিং ফি নিলাম ওয়েবসাইটে পাওয়া যাবে।'
  25. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  26. def test_run_tcrf_by_direct_model_download_thai(self):
  27. cache_path = snapshot_download(self.thai_tcrf_model_id)
  28. tokenizer = NERPreprocessorThai(cache_path)
  29. model = TransformerCRFForNamedEntityRecognition(
  30. cache_path, tokenizer=tokenizer)
  31. pipeline1 = NamedEntityRecognitionThaiPipeline(
  32. model, preprocessor=tokenizer)
  33. pipeline2 = pipeline(
  34. Tasks.named_entity_recognition,
  35. model=model,
  36. preprocessor=tokenizer)
  37. print(f'thai_sentence: {self.thai_sentence}\n'
  38. f'pipeline1:{pipeline1(input=self.thai_sentence)}')
  39. print()
  40. print(f'pipeline2: {pipeline2(input=self.thai_sentence)}')
  41. @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
  42. def test_run_tcrf_with_model_from_modelhub_thai(self):
  43. model = Model.from_pretrained(self.thai_tcrf_model_id)
  44. tokenizer = NERPreprocessorThai(model.model_dir)
  45. pipeline_ins = pipeline(
  46. task=Tasks.named_entity_recognition,
  47. model=model,
  48. preprocessor=tokenizer)
  49. print(pipeline_ins(input=self.thai_sentence))
  50. @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
  51. def test_run_tcrf_with_model_name_thai(self):
  52. pipeline_ins = pipeline(
  53. task=Tasks.named_entity_recognition, model=self.thai_tcrf_model_id)
  54. print(pipeline_ins(input=self.thai_sentence))
  55. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  56. def test_run_tcrf_with_model_name_multilingual(self):
  57. pipeline_ins = pipeline(
  58. task=Tasks.named_entity_recognition,
  59. model=self.multilingual_model_id)
  60. print(pipeline_ins(input=self.ml_stc))
  61. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  62. def test_run_tcrf_by_direct_model_download_viet(self):
  63. cache_path = snapshot_download(self.viet_tcrf_model_id)
  64. tokenizer = NERPreprocessorViet(cache_path)
  65. model = TransformerCRFForNamedEntityRecognition(
  66. cache_path, tokenizer=tokenizer)
  67. pipeline1 = NamedEntityRecognitionVietPipeline(
  68. model, preprocessor=tokenizer)
  69. pipeline2 = pipeline(
  70. Tasks.named_entity_recognition,
  71. model=model,
  72. preprocessor=tokenizer)
  73. print(f'viet_sentence: {self.viet_sentence}\n'
  74. f'pipeline1:{pipeline1(input=self.viet_sentence)}')
  75. print()
  76. print(f'pipeline2: {pipeline2(input=self.viet_sentence)}')
  77. @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
  78. def test_run_tcrf_with_model_from_modelhub_viet(self):
  79. model = Model.from_pretrained(self.viet_tcrf_model_id)
  80. tokenizer = NERPreprocessorViet(model.model_dir)
  81. pipeline_ins = pipeline(
  82. task=Tasks.named_entity_recognition,
  83. model=model,
  84. preprocessor=tokenizer)
  85. print(pipeline_ins(input=self.viet_sentence))
  86. @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
  87. def test_run_tcrf_with_model_name_viet(self):
  88. pipeline_ins = pipeline(
  89. task=Tasks.named_entity_recognition, model=self.viet_tcrf_model_id)
  90. print(pipeline_ins(input=self.viet_sentence))
  91. @unittest.skip('demo compatibility test is only enabled on a needed-basis')
  92. def test_demo_compatibility(self):
  93. self.compatibility_check()
  94. if __name__ == '__main__':
  95. unittest.main()