You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_table_question_answering.py 5.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import os
  3. import unittest
  4. from typing import List
  5. import json
  6. from transformers import BertTokenizer
  7. from modelscope.hub.snapshot_download import snapshot_download
  8. from modelscope.models import Model
  9. from modelscope.outputs import OutputKeys
  10. from modelscope.pipelines import pipeline
  11. from modelscope.pipelines.nlp import TableQuestionAnsweringPipeline
  12. from modelscope.preprocessors import TableQuestionAnsweringPreprocessor
  13. from modelscope.preprocessors.star3.fields.database import Database
  14. from modelscope.utils.constant import ModelFile, Tasks
  15. from modelscope.utils.test_utils import test_level
  16. def tableqa_tracking_and_print_results_with_history(
  17. pipelines: List[TableQuestionAnsweringPipeline]):
  18. test_case = {
  19. 'utterance': [
  20. '有哪些风险类型?',
  21. '风险类型有多少种?',
  22. '珠江流域的小(2)型水库的库容总量是多少?',
  23. '那平均值是多少?',
  24. '那水库的名称呢?',
  25. '换成中型的呢?',
  26. '枣庄营业厅的电话',
  27. '那地址呢?',
  28. '枣庄营业厅的电话和地址',
  29. ]
  30. }
  31. for p in pipelines:
  32. historical_queries = None
  33. for question in test_case['utterance']:
  34. output_dict = p({
  35. 'question': question,
  36. 'history_sql': historical_queries
  37. })
  38. print('question', question)
  39. print('sql text:', output_dict[OutputKeys.SQL_STRING])
  40. print('sql query:', output_dict[OutputKeys.SQL_QUERY])
  41. print('query result:', output_dict[OutputKeys.QUERT_RESULT])
  42. print('json dumps', json.dumps(output_dict))
  43. print()
  44. historical_queries = output_dict[OutputKeys.HISTORY]
  45. def tableqa_tracking_and_print_results_without_history(
  46. pipelines: List[TableQuestionAnsweringPipeline]):
  47. test_case = {
  48. 'utterance': [
  49. '有哪些风险类型?',
  50. '风险类型有多少种?',
  51. '珠江流域的小(2)型水库的库容总量是多少?',
  52. '枣庄营业厅的电话',
  53. '枣庄营业厅的电话和地址',
  54. ]
  55. }
  56. for p in pipelines:
  57. for question in test_case['utterance']:
  58. output_dict = p({'question': question})
  59. print('question', question)
  60. print('sql text:', output_dict[OutputKeys.SQL_STRING])
  61. print('sql query:', output_dict[OutputKeys.SQL_QUERY])
  62. print('query result:', output_dict[OutputKeys.QUERT_RESULT])
  63. print('json dumps', json.dumps(output_dict))
  64. print()
  65. class TableQuestionAnswering(unittest.TestCase):
  66. def setUp(self) -> None:
  67. self.task = Tasks.table_question_answering
  68. self.model_id = 'damo/nlp_convai_text2sql_pretrain_cn'
  69. model_id = 'damo/nlp_convai_text2sql_pretrain_cn'
  70. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  71. def test_run_by_direct_model_download(self):
  72. cache_path = snapshot_download(self.model_id)
  73. preprocessor = TableQuestionAnsweringPreprocessor(model_dir=cache_path)
  74. pipelines = [
  75. pipeline(
  76. Tasks.table_question_answering,
  77. model=cache_path,
  78. preprocessor=preprocessor)
  79. ]
  80. tableqa_tracking_and_print_results_with_history(pipelines)
  81. @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
  82. def test_run_with_model_from_modelhub(self):
  83. model = Model.from_pretrained(self.model_id)
  84. preprocessor = TableQuestionAnsweringPreprocessor(
  85. model_dir=model.model_dir)
  86. pipelines = [
  87. pipeline(
  88. Tasks.table_question_answering,
  89. model=model,
  90. preprocessor=preprocessor)
  91. ]
  92. tableqa_tracking_and_print_results_with_history(pipelines)
  93. @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
  94. def test_run_with_model_from_task(self):
  95. pipelines = [pipeline(Tasks.table_question_answering, self.model_id)]
  96. tableqa_tracking_and_print_results_with_history(pipelines)
  97. @unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
  98. def test_run_with_model_from_modelhub_with_other_classes(self):
  99. model = Model.from_pretrained(self.model_id)
  100. self.tokenizer = BertTokenizer(
  101. os.path.join(model.model_dir, ModelFile.VOCAB_FILE))
  102. db = Database(
  103. tokenizer=self.tokenizer,
  104. table_file_path=[
  105. os.path.join(model.model_dir, 'databases', fname)
  106. for fname in os.listdir(
  107. os.path.join(model.model_dir, 'databases'))
  108. ],
  109. syn_dict_file_path=os.path.join(model.model_dir, 'synonym.txt'),
  110. is_use_sqlite=True)
  111. preprocessor = TableQuestionAnsweringPreprocessor(
  112. model_dir=model.model_dir, db=db)
  113. pipelines = [
  114. pipeline(
  115. Tasks.table_question_answering,
  116. model=model,
  117. preprocessor=preprocessor,
  118. db=db)
  119. ]
  120. tableqa_tracking_and_print_results_without_history(pipelines)
  121. tableqa_tracking_and_print_results_with_history(pipelines)
  122. if __name__ == '__main__':
  123. unittest.main()