|
- # Copyright (c) Microsoft Corporation.
- # Licensed under the MIT License.
- # Written by Hao Du and Houwen Peng
- # email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
-
- import os
- import warnings
- import datetime
- import torch
- import torch.nn as nn
-
- # from torch.utils.tensorboard import SummaryWriter
-
- # import timm packages
- from timm.utils import ModelEma
- from timm.models import resume_checkpoint
- from timm.data import Dataset, create_loader
-
- # import apex as distributed package
- try:
- from apex.parallel import convert_syncbn_model
- from apex.parallel import DistributedDataParallel as DDP
-
- HAS_APEX = True
- except ImportError as e:
- print(e)
- from torch.nn.parallel import DistributedDataParallel as DDP
-
- HAS_APEX = False
-
- # import models and training functions
- from lib.core.test import validate
- from lib.models.structures.childnet import gen_childnet
- from lib.utils.util import parse_config_args, get_logger, get_model_flops_params
- from lib.config import DEFAULT_CROP_PCT, IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
-
-
- def main():
- args, cfg = parse_config_args('child net testing')
-
- # resolve logging
- output_dir = os.path.join(cfg.SAVE_PATH,
- "{}-{}".format(datetime.date.today().strftime('%m%d'),
- cfg.MODEL))
- if not os.path.exists(output_dir):
- os.mkdir(output_dir)
-
- if args.local_rank == 0:
- logger = get_logger(os.path.join(output_dir, 'test.log'))
- writer = None # SummaryWriter(os.path.join(output_dir, 'runs'))
- else:
- writer, logger = None, None
-
- # retrain model selection
- if cfg.NET.SELECTION == 481:
- arch_list = [
- [0], [
- 3, 4, 3, 1], [
- 3, 2, 3, 0], [
- 3, 3, 3, 1], [
- 3, 3, 3, 3], [
- 3, 3, 3, 3], [0]]
- cfg.DATASET.IMAGE_SIZE = 224
- elif cfg.NET.SELECTION == 43:
- arch_list = [[0], [3], [3, 1], [3, 1], [3, 3, 3], [3, 3], [0]]
- cfg.DATASET.IMAGE_SIZE = 96
- elif cfg.NET.SELECTION == 14:
- arch_list = [[0], [3], [3, 3], [3, 3], [3], [3], [0]]
- cfg.DATASET.IMAGE_SIZE = 64
- elif cfg.NET.SELECTION == 112:
- arch_list = [[0], [3], [3, 3], [3, 3], [3, 3, 3], [3, 3], [0]]
- cfg.DATASET.IMAGE_SIZE = 160
- elif cfg.NET.SELECTION == 287:
- arch_list = [[0], [3], [3, 3], [3, 1, 3], [3, 3, 3, 3], [3, 3, 3], [0]]
- cfg.DATASET.IMAGE_SIZE = 224
- elif cfg.NET.SELECTION == 604:
- arch_list = [[0], [3, 3, 2, 3, 3], [3, 2, 3, 2, 3], [3, 2, 3, 2, 3],
- [3, 3, 2, 2, 3, 3], [3, 3, 2, 3, 3, 3], [0]]
- cfg.DATASET.IMAGE_SIZE = 224
- else:
- raise ValueError("Model Test Selection is not Supported!")
-
- # define childnet architecture from arch_list
- stem = ['ds_r1_k3_s1_e1_c16_se0.25', 'cn_r1_k1_s1_c320_se0.25']
- # TODO: this param from NNI is different from microsoft/Cream.
- choice_block_pool = ['ir_r1_k3_s2_e4_c24_se0.25',
- 'ir_r1_k5_s2_e4_c40_se0.25',
- 'ir_r1_k3_s2_e6_c80_se0.25',
- 'ir_r1_k3_s1_e6_c96_se0.25',
- 'ir_r1_k5_s2_e6_c192_se0.25']
- arch_def = [[stem[0]]] + [[choice_block_pool[idx]
- for repeat_times in range(len(arch_list[idx + 1]))]
- for idx in range(len(choice_block_pool))] + [[stem[1]]]
-
- # generate childnet
- model = gen_childnet(
- arch_list,
- arch_def,
- num_classes=cfg.DATASET.NUM_CLASSES,
- drop_rate=cfg.NET.DROPOUT_RATE,
- global_pool=cfg.NET.GP)
-
- if args.local_rank == 0:
- macs, params = get_model_flops_params(model, input_size=(
- 1, 3, cfg.DATASET.IMAGE_SIZE, cfg.DATASET.IMAGE_SIZE))
- logger.info(
- '[Model-{}] Flops: {} Params: {}'.format(cfg.NET.SELECTION, macs, params))
-
- # initialize distributed parameters
- torch.cuda.set_device(args.local_rank)
- torch.distributed.init_process_group(backend='nccl', init_method='env://')
- if args.local_rank == 0:
- logger.info(
- "Training on Process {} with {} GPUs.".format(
- args.local_rank, cfg.NUM_GPU))
-
- # resume model from checkpoint
- assert cfg.AUTO_RESUME is True and os.path.exists(cfg.RESUME_PATH)
- resume_checkpoint(model, cfg.RESUME_PATH)
-
- model = model.cuda()
-
- model_ema = None
- if cfg.NET.EMA.USE:
- # Important to create EMA model after cuda(), DP wrapper, and AMP but
- # before SyncBN and DDP wrapper
- model_ema = ModelEma(
- model,
- decay=cfg.NET.EMA.DECAY,
- device='cpu' if cfg.NET.EMA.FORCE_CPU else '',
- resume=cfg.RESUME_PATH)
-
- # imagenet validation dataset
- eval_dir = os.path.join(cfg.DATA_DIR, 'val')
- if not os.path.exists(eval_dir) and args.local_rank == 0:
- logger.error(
- 'Validation folder does not exist at: {}'.format(eval_dir))
- exit(1)
-
- dataset_eval = Dataset(eval_dir)
- loader_eval = create_loader(
- dataset_eval,
- input_size=(3, cfg.DATASET.IMAGE_SIZE, cfg.DATASET.IMAGE_SIZE),
- batch_size=cfg.DATASET.VAL_BATCH_MUL * cfg.DATASET.BATCH_SIZE,
- is_training=False,
- num_workers=cfg.WORKERS,
- distributed=True,
- pin_memory=cfg.DATASET.PIN_MEM,
- crop_pct=DEFAULT_CROP_PCT,
- mean=IMAGENET_DEFAULT_MEAN,
- std=IMAGENET_DEFAULT_STD
- )
-
- # only test accuracy of model-EMA
- validate_loss_fn = nn.CrossEntropyLoss().cuda()
- validate(0, model, loader_eval, validate_loss_fn, cfg,
- log_suffix='_EMA', logger=logger,
- writer=writer, local_rank=args.local_rank)
-
- if cfg.NET.EMA.USE:
- validate(0, model_ema.ema, loader_eval, validate_loss_fn, cfg,
- log_suffix='_EMA', logger=logger,
- writer=writer, local_rank=args.local_rank)
-
-
- if __name__ == '__main__':
- main()
|