pangda yingda.chen 2 years ago
parent
commit
54d219e90b
3 changed files with 295 additions and 14 deletions
  1. +260
    -14
      tests/pipelines/test_named_entity_recognition.py
  2. +16
    -0
      tests/pipelines/test_sentence_embedding.py
  3. +19
    -0
      tests/pipelines/test_text_ranking.py

+ 260
- 14
tests/pipelines/test_named_entity_recognition.py View File

@@ -15,24 +15,260 @@ from modelscope.utils.test_utils import test_level


class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
language_examples = {
'zh':
'新华社北京二月十一日电(记者唐虹)',
'en':
'Italy recalled Marcello Cuttitta',
'ru':
'важным традиционным промыслом является производство пальмового масла .',
'fr':
'fer à souder électronique',
'es':
'el primer avistamiento por europeos de esta zona fue en 1606 , '
'en la expedición española mandada por luis váez de torres .',
'nl':
'in het vorige seizoen promoveerden sc cambuur , dat kampioen werd en go ahead eagles via de play offs .',
'tr':
'köyün pırasa kavurması ve içi yağlama ve akıtma adındaki hamur işleri meşhurdur . ; çörek ekmeği ; '
'diye adlandırdıkları mayasız ekmeği unutmamaklazım .',
'ko':
'국립진주박물관은 1984년 11월 2일 개관하였으며 한국 전통목조탑을 석조 건물로 형상화한 것으로 건축가 김수근 선생의 대표적 작품이다 .',
'fa':
'ﺞﻤﻋیﺕ ﺍیﻥ ﺎﺴﺗﺎﻧ ۳۰ ﻩﺯﺍﺭ ﻦﻓﺭ ﺎﺴﺗ ﻭ ﻢﻧﺎﺒﻋ ﻢﻬﻣی ﺍﺯ ﺲﻧگ ﺂﻬﻧ ﺩﺍﺭﺩ .',
'de':
'die szene beinhaltete lenny baker und christopher walken .',
'hi':
'१४९२ में एक चार्टर के आधार पर, उसके पिता ने उसे वाडोविस के उत्तराधिकारी के रूप में छोड़ दिया।',
'bn':
'যদিও গির্জার সবসময় রাজকীয় পিউ থাকত, তবে গির্জায় রাজকীয়ভাবে এটিই ছিল প্রথম দেখা।',
'multi':
'新华社北京二月十一日电(记者唐虹)',
}

all_modelcards_info = [
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-news',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-social_media',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-generic',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-resume',
'language': 'zh'
},
{
'model_id': 'damo/nlp_lstm_named-entity-recognition_chinese-news',
'language': 'zh'
},
{
'model_id':
'damo/nlp_lstm_named-entity-recognition_chinese-social_media',
'language': 'zh'
},
{
'model_id':
'damo/nlp_lstm_named-entity-recognition_chinese-generic',
'language': 'zh'
},
{
'model_id':
'damo/nlp_lstm_named-entity-recognition_chinese-resume',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-book',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-finance',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-game',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-bank',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-literature',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-cmeee',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_english-large-news',
'language': 'en'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_english-large-social_media',
'language': 'en'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_english-large-literature',
'language': 'en'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_english-large-politics',
'language': 'en'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_english-large-music',
'language': 'en'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_english-large-science',
'language': 'en'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_english-large-ai',
'language': 'en'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_english-large-wiki',
'language': 'en'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-large-generic',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_english-large-generic',
'language': 'en'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_multilingual-large-generic',
'language': 'multi'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_russian-large-generic',
'language': 'ru'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_spanish-large-generic',
'language': 'es'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_dutch-large-generic',
'language': 'nl'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_turkish-large-generic',
'language': 'tr'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_korean-large-generic',
'language': 'ko'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_farsi-large-generic',
'language': 'fa'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_german-large-generic',
'language': 'de'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_hindi-large-generic',
'language': 'hi'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_bangla-large-generic',
'language': 'bn'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-ecom',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_chinese-base-ecom-50cls',
'language': 'zh'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_english-large-ecom',
'language': 'en'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_russian-large-ecom',
'language': 'ru'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_french-large-ecom',
'language': 'fr'
},
{
'model_id':
'damo/nlp_raner_named-entity-recognition_spanish-large-ecom',
'language': 'es'
},
{
'model_id':
'damo/nlp_structbert_keyphrase-extraction_base-icassp2023-mug-track4-baseline',
'language': 'zh'
},
]

def setUp(self) -> None:
self.task = Tasks.named_entity_recognition
self.model_id = 'damo/nlp_raner_named-entity-recognition_chinese-base-news'

english_model_id = 'damo/nlp_raner_named-entity-recognition_english-large-ecom'
chinese_model_id = 'damo/nlp_raner_named-entity-recognition_chinese-large-generic'
tcrf_model_id = 'damo/nlp_raner_named-entity-recognition_chinese-base-news'
lcrf_model_id = 'damo/nlp_lstm_named-entity-recognition_chinese-news'
addr_model_id = 'damo/nlp_structbert_address-parsing_chinese_base'
lstm_model_id = 'damo/nlp_lstm_named-entity-recognition_chinese-generic'
sentence = '这与温岭市新河镇的一个神秘的传说有关。'
sentence_en = 'pizza shovel'
sentence_zh = '他 继 续 与 貝 塞 斯 達 遊 戲 工 作 室 在 接 下 来 辐 射 4 游 戏 。'
addr = '浙江省杭州市余杭区文一西路969号亲橙里'
addr1 = '浙江省西湖区灵隐隧道'
addr2 = '内蒙古自治区巴彦淖尔市'
ecom = '欧美单 秋季女装时尚百搭休闲修身 亚麻混纺短款 外套西装'
self.english_model_id = 'damo/nlp_raner_named-entity-recognition_english-large-ecom'
self.chinese_model_id = 'damo/nlp_raner_named-entity-recognition_chinese-large-generic'
self.tcrf_model_id = 'damo/nlp_raner_named-entity-recognition_chinese-base-news'
self.lcrf_model_id = 'damo/nlp_lstm_named-entity-recognition_chinese-news'
self.addr_model_id = 'damo/nlp_structbert_address-parsing_chinese_base'
self.lstm_model_id = 'damo/nlp_lstm_named-entity-recognition_chinese-generic'
self.sentence = '这与温岭市新河镇的一个神秘的传说有关。'
self.sentence_en = 'pizza shovel'
self.sentence_zh = '他 继 续 与 貝 塞 斯 達 遊 戲 工 作 室 在 接 下 来 辐 射 4 游 戏 。'
self.addr = '浙江省杭州市余杭区文一西路969号亲橙里'
self.addr1 = '浙江省西湖区灵隐隧道'
self.addr2 = '内蒙古自治区巴彦淖尔市'
self.ecom = '欧美单 秋季女装时尚百搭休闲修身 亚麻混纺短款 外套西装'

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_tcrf_by_direct_model_download(self):
@@ -222,6 +458,16 @@ class NamedEntityRecognitionTest(unittest.TestCase, DemoCompatibilityCheck):
pipeline_ins = pipeline(task=Tasks.named_entity_recognition)
print(pipeline_ins(input=self.sentence))

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_all_modelcards(self):
for item in self.all_modelcards_info:
model_id = item['model_id']
sentence = self.language_examples[item['language']]
with self.subTest(model_id=model_id):
pipeline_ins = pipeline(Tasks.named_entity_recognition,
model_id)
print(pipeline_ins(input=sentence))

@unittest.skip('demo compatibility test is only enabled on a needed-basis')
def test_demo_compatibility(self):
self.compatibility_check()


+ 16
- 0
tests/pipelines/test_sentence_embedding.py View File

@@ -36,6 +36,16 @@ class SentenceEmbeddingTest(unittest.TestCase):
'sentences_to_compare': []
}

el_model_id = 'damo/nlp_bert_entity-embedding_chinese-base'
el_inputs = {
'source_sentence': ['宋小宝小品《美人鱼》, [ENT_S] 大鹏 [ENT_E] 上演生死离别,关键时刻美人鱼登场'],
'sentences_to_compare': [
'董成鹏; 类型: Person; 别名: Da Peng, 大鹏;',
'超级飞侠; 类型: Work; 别名: 超飞, 출동!슈퍼윙스, Super Wings;',
'王源; 类型: Person; 别名: Roy;',
]
}

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_by_direct_model_download(self):
cache_path = snapshot_download(self.model_id)
@@ -77,6 +87,12 @@ class SentenceEmbeddingTest(unittest.TestCase):
pipeline_ins = pipeline(task=Tasks.sentence_embedding)
print(pipeline_ins(input=self.inputs))

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_el_model(self):
pipeline_ins = pipeline(
task=Tasks.sentence_embedding, model=self.el_model_id)
print(pipeline_ins(input=self.el_inputs))


if __name__ == '__main__':
unittest.main()

+ 19
- 0
tests/pipelines/test_text_ranking.py View File

@@ -28,6 +28,19 @@ class TextRankingTest(unittest.TestCase):
]
}

el_model_id = 'damo/nlp_bert_entity-matching_chinese-base'
el_inputs = {
'source_sentence': ['我是猫》([日]夏目漱石)【摘要 [ENT_S] 书评 [ENT_E] 试读】'],
'sentences_to_compare': [
'书评; 类型: Other; 别名: Book review; 三元组: 书评 # 外文名 # Book review $ 书评 # 摘要 # '
'书评,即评论并介绍书籍的文章,是以“书”为对象,实事求是的、有见识的分析书籍的形式和内容,探求创作的思想性、学术性、知识性和艺术性,从而在作者、读者和出版商之间构建信息交流的渠道。 $ 书评 # 定义 # '
'评论并介绍书籍的文章 $ 书评 # 中文名 # 书评 $ 书评 # 义项描述 # 书评 $ 书评 # 类型 # 应用写作的一种重要文体 $ 书评 # 标签 # 文学作品、文化、出版物、小说、书籍 $',
'摘要; 类型: Other; 别名: 摘, abstract, 书评; 三元组: 摘要 # 读音 # zhāi yào $ 摘要 # 外文名 # abstract $ 摘要 # 摘要 # '
'摘要又称概要、内容提要,意思是摘录要点或摘录下来的要点。 $ 摘要 # 词目 # 摘要 $ 摘要 # 词性 # 动词,名词 $ 摘要 # 中文名 # 摘要 $ 摘要 # 别称 # 概要、内容提要 $ 摘要 '
'# 义项描述 # 摘要 $ 摘要 # 标签 # 文化、文学家、行业人物、法律术语、小说 $',
]
}

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_by_direct_model_download(self):
for model_id in self.models:
@@ -62,6 +75,12 @@ class TextRankingTest(unittest.TestCase):
pipeline_ins = pipeline(task=Tasks.text_ranking)
print(pipeline_ins(input=self.inputs))

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
def test_run_with_el_model(self):
pipeline_ins = pipeline(
task=Tasks.text_ranking, model=self.el_model_id)
print(pipeline_ins(input=self.el_inputs))


if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save