You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_squad.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. '''
  16. Bert finetune and evaluation script.
  17. '''
  18. import os
  19. import argparse
  20. import collections
  21. from src.bert_for_finetune import BertSquadCell, BertSquad
  22. from src.finetune_eval_config import optimizer_cfg, bert_net_cfg
  23. from src.dataset import create_squad_dataset
  24. from src import tokenization
  25. from src.create_squad_data import read_squad_examples, convert_examples_to_features
  26. from src.run_squad import write_predictions
  27. from src.utils import make_directory, LossCallBack, LoadNewestCkpt
  28. import mindspore.common.dtype as mstype
  29. from mindspore import context
  30. from mindspore import log as logger
  31. from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
  32. from mindspore.nn.optim import AdamWeightDecayDynamicLR, Lamb, Momentum
  33. from mindspore.common.tensor import Tensor
  34. from mindspore.train.model import Model
  35. from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
  36. from mindspore.train.serialization import load_checkpoint, load_param_into_net
  37. _cur_dir = os.getcwd()
  38. def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path=""):
  39. """ do train """
  40. if load_checkpoint_path == "":
  41. raise ValueError("Pretrain model missed, finetune task must load pretrain model!")
  42. steps_per_epoch = dataset.get_dataset_size()
  43. epoch_num = dataset.get_repeat_count()
  44. # optimizer
  45. if optimizer_cfg.optimizer == 'AdamWeightDecayDynamicLR':
  46. optimizer = AdamWeightDecayDynamicLR(network.trainable_params(),
  47. decay_steps=steps_per_epoch * epoch_num,
  48. learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.learning_rate,
  49. end_learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.end_learning_rate,
  50. power=optimizer_cfg.AdamWeightDecayDynamicLR.power,
  51. warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
  52. weight_decay=optimizer_cfg.AdamWeightDecayDynamicLR.weight_decay,
  53. eps=optimizer_cfg.AdamWeightDecayDynamicLR.eps)
  54. elif optimizer_cfg.optimizer == 'Lamb':
  55. optimizer = Lamb(network.trainable_params(), decay_steps=steps_per_epoch * epoch_num,
  56. start_learning_rate=optimizer_cfg.Lamb.start_learning_rate,
  57. end_learning_rate=optimizer_cfg.Lamb.end_learning_rate,
  58. power=optimizer_cfg.Lamb.power, weight_decay=optimizer_cfg.Lamb.weight_decay,
  59. warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
  60. decay_filter=optimizer_cfg.Lamb.decay_filter)
  61. elif optimizer_cfg.optimizer == 'Momentum':
  62. optimizer = Momentum(network.trainable_params(), learning_rate=optimizer_cfg.Momentum.learning_rate,
  63. momentum=optimizer_cfg.Momentum.momentum)
  64. else:
  65. raise Exception("Optimizer not supported. support: [AdamWeightDecayDynamicLR, Lamb, Momentum]")
  66. # load checkpoint into network
  67. ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
  68. ckpoint_cb = ModelCheckpoint(prefix="squad", directory=save_checkpoint_path, config=ckpt_config)
  69. param_dict = load_checkpoint(load_checkpoint_path)
  70. load_param_into_net(network, param_dict)
  71. update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000)
  72. netwithgrads = BertSquadCell(network, optimizer=optimizer, scale_update_cell=update_cell)
  73. model = Model(netwithgrads)
  74. callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(), ckpoint_cb]
  75. model.train(epoch_num, dataset, callbacks=callbacks)
  76. def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="", seq_length=384):
  77. """ do eval """
  78. if load_checkpoint_path == "":
  79. raise ValueError("Finetune model missed, evaluation task must load finetune model!")
  80. tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True)
  81. eval_examples = read_squad_examples(eval_json, False)
  82. eval_features = convert_examples_to_features(
  83. examples=eval_examples,
  84. tokenizer=tokenizer,
  85. max_seq_length=seq_length,
  86. doc_stride=128,
  87. max_query_length=64,
  88. is_training=False,
  89. output_fn=None,
  90. verbose_logging=False)
  91. net = BertSquad(bert_net_cfg, False, 2)
  92. net.set_train(False)
  93. param_dict = load_checkpoint(load_checkpoint_path)
  94. load_param_into_net(net, param_dict)
  95. model = Model(net)
  96. output = []
  97. RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
  98. columns_list = ["input_ids", "input_mask", "segment_ids", "unique_ids"]
  99. for data in dataset.create_dict_iterator():
  100. input_data = []
  101. for i in columns_list:
  102. input_data.append(Tensor(data[i]))
  103. input_ids, input_mask, segment_ids, unique_ids = input_data
  104. start_positions = Tensor([1], mstype.float32)
  105. end_positions = Tensor([1], mstype.float32)
  106. is_impossible = Tensor([1], mstype.float32)
  107. logits = model.predict(input_ids, input_mask, segment_ids, start_positions,
  108. end_positions, unique_ids, is_impossible)
  109. ids = logits[0].asnumpy()
  110. start = logits[1].asnumpy()
  111. end = logits[2].asnumpy()
  112. for i in range(bert_net_cfg.batch_size):
  113. unique_id = int(ids[i])
  114. start_logits = [float(x) for x in start[i].flat]
  115. end_logits = [float(x) for x in end[i].flat]
  116. output.append(RawResult(
  117. unique_id=unique_id,
  118. start_logits=start_logits,
  119. end_logits=end_logits))
  120. write_predictions(eval_examples, eval_features, output, 20, 30, True, "./predictions.json", None, None)
  121. def run_squad():
  122. """run squad task"""
  123. parser = argparse.ArgumentParser(description="run classifier")
  124. parser.add_argument("--device_target", type=str, default="Ascend", help="Device type, default is Ascend")
  125. parser.add_argument("--do_train", type=str, default="false", help="Eable train, default is false")
  126. parser.add_argument("--do_eval", type=str, default="false", help="Eable eval, default is false")
  127. parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
  128. parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
  129. parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.")
  130. parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path")
  131. parser.add_argument("--eval_json_path", type=str, default="", help="Evaluation json file path, can be eval.json")
  132. parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
  133. parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path")
  134. parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path")
  135. parser.add_argument("--train_data_file_path", type=str, default="",
  136. help="Data path, it is better to use absolute path")
  137. parser.add_argument("--eval_data_file_path", type=str, default="",
  138. help="Data path, it is better to use absolute path")
  139. parser.add_argument("--schema_file_path", type=str, default="",
  140. help="Schema path, it is better to use absolute path")
  141. args_opt = parser.parse_args()
  142. epoch_num = args_opt.epoch_num
  143. load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path
  144. save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path
  145. load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path
  146. if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower() == "false":
  147. raise ValueError("At least one of 'do_train' or 'do_eval' must be true")
  148. if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "":
  149. raise ValueError("'train_data_file_path' must be set when do finetune task")
  150. if args_opt.do_eval.lower() == "true":
  151. if args_opt.eval_data_file_path == "":
  152. raise ValueError("'eval_data_file_path' must be set when do evaluation task")
  153. if args_opt.vocab_file_path == "":
  154. raise ValueError("'vocab_file_path' must be set when do evaluation task")
  155. if args_opt.eval_json_path == "":
  156. raise ValueError("'tokenization_file_path' must be set when do evaluation task")
  157. target = args_opt.device_target
  158. if target == "Ascend":
  159. context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
  160. elif target == "GPU":
  161. context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  162. if bert_net_cfg.compute_type != mstype.float32:
  163. logger.warning('GPU only support fp32 temporarily, run with fp32.')
  164. bert_net_cfg.compute_type = mstype.float32
  165. else:
  166. raise Exception("Target error, GPU or Ascend is supported.")
  167. netwithloss = BertSquad(bert_net_cfg, True, 2, dropout_prob=0.1)
  168. if args_opt.do_train.lower() == "true":
  169. ds = create_squad_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
  170. data_file_path=args_opt.train_data_file_path,
  171. schema_file_path=args_opt.schema_file_path)
  172. do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path)
  173. if args_opt.do_eval.lower() == "true":
  174. if save_finetune_checkpoint_path == "":
  175. load_finetune_checkpoint_dir = _cur_dir
  176. else:
  177. load_finetune_checkpoint_dir = make_directory(save_finetune_checkpoint_path)
  178. load_finetune_checkpoint_path = LoadNewestCkpt(load_finetune_checkpoint_dir,
  179. ds.get_dataset_size(), epoch_num, "squad")
  180. if args_opt.do_eval.lower() == "true":
  181. ds = create_squad_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
  182. data_file_path=args_opt.eval_data_file_path,
  183. schema_file_path=args_opt.schema_file_path, is_training=False)
  184. do_eval(ds, args_opt.vocab_file_path, args_opt.eval_json_path,
  185. load_finetune_checkpoint_path, bert_net_cfg.seq_length)
  186. if __name__ == "__main__":
  187. run_squad()