|
- _base_ = '../htc/htc_r50_fpn_1x_coco.py'
- # model settings
- model = dict(
- type='SCNet',
- roi_head=dict(
- _delete_=True,
- type='SCNetRoIHead',
- num_stages=3,
- stage_loss_weights=[1, 0.5, 0.25],
- bbox_roi_extractor=dict(
- type='SingleRoIExtractor',
- roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
- out_channels=256,
- featmap_strides=[4, 8, 16, 32]),
- bbox_head=[
- dict(
- type='SCNetBBoxHead',
- num_shared_fcs=2,
- in_channels=256,
- fc_out_channels=1024,
- roi_feat_size=7,
- num_classes=80,
- bbox_coder=dict(
- type='DeltaXYWHBBoxCoder',
- target_means=[0., 0., 0., 0.],
- target_stds=[0.1, 0.1, 0.2, 0.2]),
- reg_class_agnostic=True,
- loss_cls=dict(
- type='CrossEntropyLoss',
- use_sigmoid=False,
- loss_weight=1.0),
- loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
- loss_weight=1.0)),
- dict(
- type='SCNetBBoxHead',
- num_shared_fcs=2,
- in_channels=256,
- fc_out_channels=1024,
- roi_feat_size=7,
- num_classes=80,
- bbox_coder=dict(
- type='DeltaXYWHBBoxCoder',
- target_means=[0., 0., 0., 0.],
- target_stds=[0.05, 0.05, 0.1, 0.1]),
- reg_class_agnostic=True,
- loss_cls=dict(
- type='CrossEntropyLoss',
- use_sigmoid=False,
- loss_weight=1.0),
- loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
- loss_weight=1.0)),
- dict(
- type='SCNetBBoxHead',
- num_shared_fcs=2,
- in_channels=256,
- fc_out_channels=1024,
- roi_feat_size=7,
- num_classes=80,
- bbox_coder=dict(
- type='DeltaXYWHBBoxCoder',
- target_means=[0., 0., 0., 0.],
- target_stds=[0.033, 0.033, 0.067, 0.067]),
- reg_class_agnostic=True,
- loss_cls=dict(
- type='CrossEntropyLoss',
- use_sigmoid=False,
- loss_weight=1.0),
- loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
- ],
- mask_roi_extractor=dict(
- type='SingleRoIExtractor',
- roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
- out_channels=256,
- featmap_strides=[4, 8, 16, 32]),
- mask_head=dict(
- type='SCNetMaskHead',
- num_convs=12,
- in_channels=256,
- conv_out_channels=256,
- num_classes=80,
- conv_to_res=True,
- loss_mask=dict(
- type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),
- semantic_roi_extractor=dict(
- type='SingleRoIExtractor',
- roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
- out_channels=256,
- featmap_strides=[8]),
- semantic_head=dict(
- type='SCNetSemanticHead',
- num_ins=5,
- fusion_level=1,
- num_convs=4,
- in_channels=256,
- conv_out_channels=256,
- num_classes=183,
- loss_seg=dict(
- type='CrossEntropyLoss', ignore_index=255, loss_weight=0.2),
- conv_to_res=True),
- glbctx_head=dict(
- type='GlobalContextHead',
- num_convs=4,
- in_channels=256,
- conv_out_channels=256,
- num_classes=80,
- loss_weight=3.0,
- conv_to_res=True),
- feat_relay_head=dict(
- type='FeatureRelayHead',
- in_channels=1024,
- out_conv_channels=256,
- roi_feat_size=7,
- scale_factor=2)))
-
- # uncomment below code to enable test time augmentations
- # img_norm_cfg = dict(
- # mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
- # test_pipeline = [
- # dict(type='LoadImageFromFile'),
- # dict(
- # type='MultiScaleFlipAug',
- # img_scale=[(600, 900), (800, 1200), (1000, 1500), (1200, 1800),
- # (1400, 2100)],
- # flip=True,
- # transforms=[
- # dict(type='Resize', keep_ratio=True),
- # dict(type='RandomFlip', flip_ratio=0.5),
- # dict(type='Normalize', **img_norm_cfg),
- # dict(type='Pad', size_divisor=32),
- # dict(type='ImageToTensor', keys=['img']),
- # dict(type='Collect', keys=['img']),
- # ])
- # ]
- # data = dict(
- # val=dict(pipeline=test_pipeline),
- # test=dict(pipeline=test_pipeline))
|