You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_classifier.py 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. '''
  16. Bert finetune and evaluation script.
  17. '''
  18. import os
  19. import argparse
  20. from src.bert_for_finetune import BertFinetuneCell, BertCLS
  21. from src.finetune_eval_config import optimizer_cfg, bert_net_cfg
  22. from src.dataset import create_classification_dataset
  23. from src.assessment_method import Accuracy, F1, MCC, Spearman_Correlation
  24. from src.utils import make_directory, LossCallBack, LoadNewestCkpt
  25. import mindspore.common.dtype as mstype
  26. from mindspore import context
  27. from mindspore import log as logger
  28. from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
  29. from mindspore.nn.optim import AdamWeightDecayDynamicLR, Lamb, Momentum
  30. from mindspore.common.tensor import Tensor
  31. from mindspore.train.model import Model
  32. from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
  33. from mindspore.train.serialization import load_checkpoint, load_param_into_net
  34. _cur_dir = os.getcwd()
  35. def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path=""):
  36. """ do train """
  37. if load_checkpoint_path == "":
  38. raise ValueError("Pretrain model missed, finetune task must load pretrain model!")
  39. steps_per_epoch = dataset.get_dataset_size()
  40. epoch_num = dataset.get_repeat_count()
  41. # optimizer
  42. if optimizer_cfg.optimizer == 'AdamWeightDecayDynamicLR':
  43. optimizer = AdamWeightDecayDynamicLR(network.trainable_params(),
  44. decay_steps=steps_per_epoch * epoch_num,
  45. learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.learning_rate,
  46. end_learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.end_learning_rate,
  47. power=optimizer_cfg.AdamWeightDecayDynamicLR.power,
  48. warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
  49. weight_decay=optimizer_cfg.AdamWeightDecayDynamicLR.weight_decay,
  50. eps=optimizer_cfg.AdamWeightDecayDynamicLR.eps)
  51. elif optimizer_cfg.optimizer == 'Lamb':
  52. optimizer = Lamb(network.trainable_params(), decay_steps=steps_per_epoch * epoch_num,
  53. start_learning_rate=optimizer_cfg.Lamb.start_learning_rate,
  54. end_learning_rate=optimizer_cfg.Lamb.end_learning_rate,
  55. power=optimizer_cfg.Lamb.power, weight_decay=optimizer_cfg.Lamb.weight_decay,
  56. warmup_steps=int(steps_per_epoch * epoch_num * 0.1),
  57. decay_filter=optimizer_cfg.Lamb.decay_filter)
  58. elif optimizer_cfg.optimizer == 'Momentum':
  59. optimizer = Momentum(network.trainable_params(), learning_rate=optimizer_cfg.Momentum.learning_rate,
  60. momentum=optimizer_cfg.Momentum.momentum)
  61. else:
  62. raise Exception("Optimizer not supported. support: [AdamWeightDecayDynamicLR, Lamb, Momentum]")
  63. # load checkpoint into network
  64. ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1)
  65. ckpoint_cb = ModelCheckpoint(prefix="classifier", directory=save_checkpoint_path, config=ckpt_config)
  66. param_dict = load_checkpoint(load_checkpoint_path)
  67. load_param_into_net(network, param_dict)
  68. update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000)
  69. netwithgrads = BertFinetuneCell(network, optimizer=optimizer, scale_update_cell=update_cell)
  70. model = Model(netwithgrads)
  71. callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(), ckpoint_cb]
  72. model.train(epoch_num, dataset, callbacks=callbacks)
  73. def eval_result_print(assessment_method="accuracy", callback=None):
  74. """ print eval result """
  75. if assessment_method == "accuracy":
  76. print("acc_num {} , total_num {}, accuracy {:.6f}".format(callback.acc_num, callback.total_num,
  77. callback.acc_num / callback.total_num))
  78. elif assessment_method == "f1":
  79. print("Precision {:.6f} ".format(callback.TP / (callback.TP + callback.FP)))
  80. print("Recall {:.6f} ".format(callback.TP / (callback.TP + callback.FN)))
  81. print("F1 {:.6f} ".format(2 * callback.TP / (2 * callback.TP + callback.FP + callback.FN)))
  82. elif assessment_method == "mcc":
  83. print("MCC {:.6f} ".format(callback.cal()))
  84. elif assessment_method == "spearman_correlation":
  85. print("Spearman Correlation is {:.6f} ".format(callback.cal()[0]))
  86. else:
  87. raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
  88. def do_eval(dataset=None, network=None, num_class=2, assessment_method="accuracy", load_checkpoint_path=""):
  89. """ do eval """
  90. if load_checkpoint_path == "":
  91. raise ValueError("Finetune model missed, evaluation task must load finetune model!")
  92. net_for_pretraining = network(bert_net_cfg, False, num_class)
  93. net_for_pretraining.set_train(False)
  94. param_dict = load_checkpoint(load_checkpoint_path)
  95. load_param_into_net(net_for_pretraining, param_dict)
  96. model = Model(net_for_pretraining)
  97. if assessment_method == "accuracy":
  98. callback = Accuracy()
  99. elif assessment_method == "f1":
  100. callback = F1(False, num_class)
  101. elif assessment_method == "mcc":
  102. callback = MCC()
  103. elif assessment_method == "spearman_correlation":
  104. callback = Spearman_Correlation()
  105. else:
  106. raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")
  107. columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"]
  108. for data in dataset.create_dict_iterator():
  109. input_data = []
  110. for i in columns_list:
  111. input_data.append(Tensor(data[i]))
  112. input_ids, input_mask, token_type_id, label_ids = input_data
  113. logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
  114. callback.update(logits, label_ids)
  115. print("==============================================================")
  116. eval_result_print(assessment_method, callback)
  117. print("==============================================================")
  118. def run_classifier():
  119. """run classifier task"""
  120. parser = argparse.ArgumentParser(description="run classifier")
  121. parser.add_argument("--device_target", type=str, default="Ascend", help="Device type, default is Ascend")
  122. parser.add_argument("--assessment_method", type=str, default="accuracy", help="assessment_method include: "
  123. "[MCC, Spearman_correlation, "
  124. "Accuracy], default is accuracy")
  125. parser.add_argument("--do_train", type=str, default="false", help="Eable train, default is false")
  126. parser.add_argument("--do_eval", type=str, default="false", help="Eable eval, default is false")
  127. parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
  128. parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
  129. parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.")
  130. parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
  131. parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path")
  132. parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path")
  133. parser.add_argument("--train_data_file_path", type=str, default="",
  134. help="Data path, it is better to use absolute path")
  135. parser.add_argument("--eval_data_file_path", type=str, default="",
  136. help="Data path, it is better to use absolute path")
  137. parser.add_argument("--schema_file_path", type=str, default="",
  138. help="Schema path, it is better to use absolute path")
  139. args_opt = parser.parse_args()
  140. epoch_num = args_opt.epoch_num
  141. assessment_method = args_opt.assessment_method.lower()
  142. load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path
  143. save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path
  144. load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path
  145. if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower() == "false":
  146. raise ValueError("At least one of 'do_train' or 'do_eval' must be true")
  147. if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "":
  148. raise ValueError("'train_data_file_path' must be set when do finetune task")
  149. if args_opt.do_eval.lower() == "true" and args_opt.eval_data_file_path == "":
  150. raise ValueError("'eval_data_file_path' must be set when do evaluation task")
  151. target = args_opt.device_target
  152. if target == "Ascend":
  153. context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
  154. elif target == "GPU":
  155. context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  156. if bert_net_cfg.compute_type != mstype.float32:
  157. logger.warning('GPU only support fp32 temporarily, run with fp32.')
  158. bert_net_cfg.compute_type = mstype.float32
  159. else:
  160. raise Exception("Target error, GPU or Ascend is supported.")
  161. netwithloss = BertCLS(bert_net_cfg, True, num_labels=args_opt.num_class, dropout_prob=0.1,
  162. assessment_method=assessment_method)
  163. if args_opt.do_train.lower() == "true":
  164. ds = create_classification_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
  165. assessment_method=assessment_method,
  166. data_file_path=args_opt.train_data_file_path,
  167. schema_file_path=args_opt.schema_file_path)
  168. do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path)
  169. if args_opt.do_eval.lower() == "true":
  170. if save_finetune_checkpoint_path == "":
  171. load_finetune_checkpoint_dir = _cur_dir
  172. else:
  173. load_finetune_checkpoint_dir = make_directory(save_finetune_checkpoint_path)
  174. load_finetune_checkpoint_path = LoadNewestCkpt(load_finetune_checkpoint_dir,
  175. ds.get_dataset_size(), epoch_num, "classifier")
  176. if args_opt.do_eval.lower() == "true":
  177. ds = create_classification_dataset(batch_size=bert_net_cfg.batch_size, repeat_count=epoch_num,
  178. assessment_method=assessment_method,
  179. data_file_path=args_opt.eval_data_file_path,
  180. schema_file_path=args_opt.schema_file_path)
  181. do_eval(ds, BertCLS, args_opt.num_class, assessment_method, load_finetune_checkpoint_path)
  182. if __name__ == "__main__":
  183. run_classifier()