You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_named_entity_recognition.py 5.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import unittest
  3. from modelscope.hub.snapshot_download import snapshot_download
  4. from modelscope.models import Model
  5. from modelscope.models.nlp import (LSTMCRFForNamedEntityRecognition,
  6. TransformerCRFForNamedEntityRecognition)
  7. from modelscope.pipelines import pipeline
  8. from modelscope.pipelines.nlp import NamedEntityRecognitionPipeline
  9. from modelscope.preprocessors import TokenClassificationPreprocessor
  10. from modelscope.utils.constant import Tasks
  11. from modelscope.utils.demo_utils import DemoCompatibilityCheck
  12. from modelscope.utils.test_utils import test_level
  13. class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
  14. def setUp(self) -> None:
  15. self.task = Tasks.named_entity_recognition
  16. self.model_id = 'damo/nlp_raner_named-entity-recognition_chinese-base-news'
  17. english_model_id = 'damo/nlp_raner_named-entity-recognition_english-large-ecom'
  18. chinese_model_id = 'damo/nlp_raner_named-entity-recognition_chinese-large-generic'
  19. tcrf_model_id = 'damo/nlp_raner_named-entity-recognition_chinese-base-news'
  20. lcrf_model_id = 'damo/nlp_lstm_named-entity-recognition_chinese-news'
  21. sentence = '这与温岭市新河镇的一个神秘的传说有关。'
  22. sentence_en = 'pizza shovel'
  23. sentence_zh = '他 继 续 与 貝 塞 斯 達 遊 戲 工 作 室 在 接 下 来 辐 射 4 游 戏 。'
  24. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  25. def test_run_tcrf_by_direct_model_download(self):
  26. cache_path = snapshot_download(self.tcrf_model_id)
  27. tokenizer = TokenClassificationPreprocessor(cache_path)
  28. model = TransformerCRFForNamedEntityRecognition(
  29. cache_path, tokenizer=tokenizer)
  30. pipeline1 = NamedEntityRecognitionPipeline(
  31. model, preprocessor=tokenizer)
  32. pipeline2 = pipeline(
  33. Tasks.named_entity_recognition,
  34. model=model,
  35. preprocessor=tokenizer)
  36. print(f'sentence: {self.sentence}\n'
  37. f'pipeline1:{pipeline1(input=self.sentence)}')
  38. print()
  39. print(f'pipeline2: {pipeline2(input=self.sentence)}')
  40. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  41. def test_run_lcrf_by_direct_model_download(self):
  42. cache_path = snapshot_download(self.lcrf_model_id)
  43. tokenizer = TokenClassificationPreprocessor(cache_path)
  44. model = LSTMCRFForNamedEntityRecognition(
  45. cache_path, tokenizer=tokenizer)
  46. pipeline1 = NamedEntityRecognitionPipeline(
  47. model, preprocessor=tokenizer)
  48. pipeline2 = pipeline(
  49. Tasks.named_entity_recognition,
  50. model=model,
  51. preprocessor=tokenizer)
  52. print(f'sentence: {self.sentence}\n'
  53. f'pipeline1:{pipeline1(input=self.sentence)}')
  54. print()
  55. print(f'pipeline2: {pipeline2(input=self.sentence)}')
  56. @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
  57. def test_run_tcrf_with_model_from_modelhub(self):
  58. model = Model.from_pretrained(self.tcrf_model_id)
  59. tokenizer = TokenClassificationPreprocessor(model.model_dir)
  60. pipeline_ins = pipeline(
  61. task=Tasks.named_entity_recognition,
  62. model=model,
  63. preprocessor=tokenizer)
  64. print(pipeline_ins(input=self.sentence))
  65. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  66. def test_run_lcrf_with_model_from_modelhub(self):
  67. model = Model.from_pretrained(self.lcrf_model_id)
  68. tokenizer = TokenClassificationPreprocessor(model.model_dir)
  69. pipeline_ins = pipeline(
  70. task=Tasks.named_entity_recognition,
  71. model=model,
  72. preprocessor=tokenizer)
  73. print(pipeline_ins(input=self.sentence))
  74. @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
  75. def test_run_tcrf_with_model_name(self):
  76. pipeline_ins = pipeline(
  77. task=Tasks.named_entity_recognition, model=self.tcrf_model_id)
  78. print(pipeline_ins(input=self.sentence))
  79. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  80. def test_run_lcrf_with_model_name(self):
  81. pipeline_ins = pipeline(
  82. task=Tasks.named_entity_recognition, model=self.lcrf_model_id)
  83. print(pipeline_ins(input=self.sentence))
  84. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  85. def test_run_lcrf_with_chinese_model_name(self):
  86. pipeline_ins = pipeline(
  87. task=Tasks.named_entity_recognition, model=self.chinese_model_id)
  88. print(pipeline_ins(input=self.sentence_zh))
  89. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  90. def test_run_english_with_model_name(self):
  91. pipeline_ins = pipeline(
  92. task=Tasks.named_entity_recognition, model=self.english_model_id)
  93. print(pipeline_ins(input=self.sentence_en))
  94. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  95. def test_run_with_default_model(self):
  96. pipeline_ins = pipeline(task=Tasks.named_entity_recognition)
  97. print(pipeline_ins(input=self.sentence))
  98. @unittest.skip('demo compatibility test is only enabled on a needed-basis')
  99. def test_demo_compatibility(self):
  100. self.compatibility_check()
  101. if __name__ == '__main__':
  102. unittest.main()