You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_ofa_trainer.py 5.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import os
  3. import shutil
  4. import unittest
  5. import json
  6. from modelscope.metainfo import Trainers
  7. from modelscope.msdatasets import MsDataset
  8. from modelscope.trainers import build_trainer
  9. from modelscope.utils.constant import DownloadMode, ModelFile
  10. from modelscope.utils.hub import read_config
  11. from modelscope.utils.test_utils import test_level
  12. class TestOfaTrainer(unittest.TestCase):
  13. def setUp(self) -> None:
  14. self.finetune_cfg = \
  15. {'framework': 'pytorch',
  16. 'task': 'ocr-recognition',
  17. 'model': {'type': 'ofa',
  18. 'beam_search': {'beam_size': 5,
  19. 'max_len_b': 64,
  20. 'min_len': 1,
  21. 'no_repeat_ngram_size': 0},
  22. 'seed': 7,
  23. 'max_src_length': 128,
  24. 'language': 'zh',
  25. 'gen_type': 'generation',
  26. 'patch_image_size': 480,
  27. 'is_document': False,
  28. 'max_image_size': 480,
  29. 'imagenet_default_mean_and_std': False},
  30. 'pipeline': {'type': 'ofa-ocr-recognition'},
  31. 'dataset': {'column_map': {'text': 'label'}},
  32. 'train': {'work_dir': 'work/ckpts/recognition',
  33. # 'launcher': 'pytorch',
  34. 'max_epochs': 1,
  35. 'use_fp16': True,
  36. 'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0},
  37. 'lr_scheduler': {'name': 'polynomial_decay',
  38. 'warmup_proportion': 0.01,
  39. 'lr_end': 1e-07},
  40. 'lr_scheduler_hook': {'type': 'LrSchedulerHook', 'by_epoch': False},
  41. 'optimizer': {'type': 'AdamW', 'lr': 5e-05, 'weight_decay': 0.01},
  42. 'optimizer_hook': {'type': 'TorchAMPOptimizerHook',
  43. 'cumulative_iters': 1,
  44. 'grad_clip': {'max_norm': 1.0, 'norm_type': 2},
  45. 'loss_keys': 'loss'},
  46. 'criterion': {'name': 'AdjustLabelSmoothedCrossEntropyCriterion',
  47. 'constraint_range': None,
  48. 'drop_worst_after': 0,
  49. 'drop_worst_ratio': 0.0,
  50. 'ignore_eos': False,
  51. 'ignore_prefix_size': 0,
  52. 'label_smoothing': 0.1,
  53. 'reg_alpha': 1.0,
  54. 'report_accuracy': False,
  55. 'sample_patch_num': 196,
  56. 'sentence_avg': False,
  57. 'use_rdrop': True},
  58. 'hooks': [{'type': 'BestCkptSaverHook',
  59. 'metric_key': 'accuracy',
  60. 'interval': 100},
  61. {'type': 'TextLoggerHook', 'interval': 1},
  62. {'type': 'IterTimerHook'},
  63. {'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1}]},
  64. 'evaluation': {'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0},
  65. 'metrics': [{'type': 'accuracy'}]},
  66. 'preprocessor': []}
  67. @unittest.skipUnless(test_level() >= 0, 'skip test in current test level')
  68. def test_trainer_std(self):
  69. WORKSPACE = './workspace/ckpts/recognition'
  70. os.makedirs(WORKSPACE, exist_ok=True)
  71. config_file = os.path.join(WORKSPACE, ModelFile.CONFIGURATION)
  72. with open(config_file, 'w') as writer:
  73. json.dump(self.finetune_cfg, writer)
  74. pretrained_model = 'damo/ofa_ocr-recognition_scene_base_zh'
  75. args = dict(
  76. model=pretrained_model,
  77. work_dir=WORKSPACE,
  78. train_dataset=MsDataset.load(
  79. 'ocr_fudanvi_zh',
  80. subset_name='scene',
  81. namespace='modelscope',
  82. split='train[800:900]',
  83. download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS),
  84. eval_dataset=MsDataset.load(
  85. 'ocr_fudanvi_zh',
  86. subset_name='scene',
  87. namespace='modelscope',
  88. split='test[:20]',
  89. download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS),
  90. cfg_file=config_file)
  91. trainer = build_trainer(name=Trainers.ofa, default_args=args)
  92. trainer.train()
  93. self.assertIn(
  94. ModelFile.TORCH_MODEL_BIN_FILE,
  95. os.listdir(os.path.join(WORKSPACE, ModelFile.TRAIN_OUTPUT_DIR)))
  96. shutil.rmtree(WORKSPACE)
  97. if __name__ == '__main__':
  98. unittest.main()