|
- # Copyright (c) OpenMMLab. All rights reserved.
- import copy
-
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- from mmcv.cnn import ConvModule
- from mmcv.ops import batched_nms
-
- from ..builder import HEADS
- from .anchor_head import AnchorHead
-
-
- @HEADS.register_module()
- class RPNHead(AnchorHead):
- """RPN head.
-
- Args:
- in_channels (int): Number of channels in the input feature map.
- init_cfg (dict or list[dict], optional): Initialization config dict.
- num_convs (int): Number of convolution layers in the head. Default 1.
- """ # noqa: W605
-
- def __init__(self,
- in_channels,
- init_cfg=dict(type='Normal', layer='Conv2d', std=0.01),
- num_convs=1,
- **kwargs):
- self.num_convs = num_convs
- super(RPNHead, self).__init__(
- 1, in_channels, init_cfg=init_cfg, **kwargs)
-
- def _init_layers(self):
- """Initialize layers of the head."""
- if self.num_convs > 1:
- rpn_convs = []
- for i in range(self.num_convs):
- if i == 0:
- in_channels = self.in_channels
- else:
- in_channels = self.feat_channels
- # use ``inplace=False`` to avoid error: one of the variables
- # needed for gradient computation has been modified by an
- # inplace operation.
- rpn_convs.append(
- ConvModule(
- in_channels,
- self.feat_channels,
- 3,
- padding=1,
- inplace=False))
- self.rpn_conv = nn.Sequential(*rpn_convs)
- else:
- self.rpn_conv = nn.Conv2d(
- self.in_channels, self.feat_channels, 3, padding=1)
- self.rpn_cls = nn.Conv2d(self.feat_channels,
- self.num_base_priors * self.cls_out_channels,
- 1)
- self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_base_priors * 4,
- 1)
-
- def forward_single(self, x):
- """Forward feature map of a single scale level."""
- x = self.rpn_conv(x)
- x = F.relu(x, inplace=True)
- rpn_cls_score = self.rpn_cls(x)
- rpn_bbox_pred = self.rpn_reg(x)
- return rpn_cls_score, rpn_bbox_pred
-
- def loss(self,
- cls_scores,
- bbox_preds,
- gt_bboxes,
- img_metas,
- gt_bboxes_ignore=None):
- """Compute losses of the head.
-
- Args:
- cls_scores (list[Tensor]): Box scores for each scale level
- Has shape (N, num_anchors * num_classes, H, W)
- bbox_preds (list[Tensor]): Box energies / deltas for each scale
- level with shape (N, num_anchors * 4, H, W)
- gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
- shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
- img_metas (list[dict]): Meta information of each image, e.g.,
- image size, scaling factor, etc.
- gt_bboxes_ignore (None | list[Tensor]): specify which bounding
- boxes can be ignored when computing the loss.
-
- Returns:
- dict[str, Tensor]: A dictionary of loss components.
- """
- losses = super(RPNHead, self).loss(
- cls_scores,
- bbox_preds,
- gt_bboxes,
- None,
- img_metas,
- gt_bboxes_ignore=gt_bboxes_ignore)
- return dict(
- loss_rpn_cls=losses['loss_cls'], loss_rpn_bbox=losses['loss_bbox'])
-
- def _get_bboxes_single(self,
- cls_score_list,
- bbox_pred_list,
- score_factor_list,
- mlvl_anchors,
- img_meta,
- cfg,
- rescale=False,
- with_nms=True,
- **kwargs):
- """Transform outputs of a single image into bbox predictions.
-
- Args:
- cls_score_list (list[Tensor]): Box scores from all scale
- levels of a single image, each item has shape
- (num_anchors * num_classes, H, W).
- bbox_pred_list (list[Tensor]): Box energies / deltas from
- all scale levels of a single image, each item has
- shape (num_anchors * 4, H, W).
- score_factor_list (list[Tensor]): Score factor from all scale
- levels of a single image. RPN head does not need this value.
- mlvl_anchors (list[Tensor]): Anchors of all scale level
- each item has shape (num_anchors, 4).
- img_meta (dict): Image meta info.
- cfg (mmcv.Config): Test / postprocessing configuration,
- if None, test_cfg would be used.
- rescale (bool): If True, return boxes in original image space.
- Default: False.
- with_nms (bool): If True, do nms before return boxes.
- Default: True.
-
- Returns:
- Tensor: Labeled boxes in shape (n, 5), where the first 4 columns
- are bounding box positions (tl_x, tl_y, br_x, br_y) and the
- 5-th column is a score between 0 and 1.
- """
- cfg = self.test_cfg if cfg is None else cfg
- cfg = copy.deepcopy(cfg)
- img_shape = img_meta['img_shape']
-
- # bboxes from different level should be independent during NMS,
- # level_ids are used as labels for batched NMS to separate them
- level_ids = []
- mlvl_scores = []
- mlvl_bbox_preds = []
- mlvl_valid_anchors = []
- nms_pre = cfg.get('nms_pre', -1)
- for level_idx in range(len(cls_score_list)):
- rpn_cls_score = cls_score_list[level_idx]
- rpn_bbox_pred = bbox_pred_list[level_idx]
- assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
- rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
- if self.use_sigmoid_cls:
- rpn_cls_score = rpn_cls_score.reshape(-1)
- scores = rpn_cls_score.sigmoid()
- else:
- rpn_cls_score = rpn_cls_score.reshape(-1, 2)
- # We set FG labels to [0, num_class-1] and BG label to
- # num_class in RPN head since mmdet v2.5, which is unified to
- # be consistent with other head since mmdet v2.0. In mmdet v2.0
- # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head.
- scores = rpn_cls_score.softmax(dim=1)[:, 0]
- rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
-
- anchors = mlvl_anchors[level_idx]
- if 0 < nms_pre < scores.shape[0]:
- # sort is faster than topk
- # _, topk_inds = scores.topk(cfg.nms_pre)
- ranked_scores, rank_inds = scores.sort(descending=True)
- topk_inds = rank_inds[:nms_pre]
- scores = ranked_scores[:nms_pre]
- rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
- anchors = anchors[topk_inds, :]
-
- mlvl_scores.append(scores)
- mlvl_bbox_preds.append(rpn_bbox_pred)
- mlvl_valid_anchors.append(anchors)
- level_ids.append(
- scores.new_full((scores.size(0), ),
- level_idx,
- dtype=torch.long))
-
- return self._bbox_post_process(mlvl_scores, mlvl_bbox_preds,
- mlvl_valid_anchors, level_ids, cfg,
- img_shape)
-
- def _bbox_post_process(self, mlvl_scores, mlvl_bboxes, mlvl_valid_anchors,
- level_ids, cfg, img_shape, **kwargs):
- """bbox post-processing method.
-
- The boxes would be rescaled to the original image scale and do
- the nms operation. Usually with_nms is False is used for aug test.
-
- Args:
- mlvl_scores (list[Tensor]): Box scores from all scale
- levels of a single image, each item has shape
- (num_bboxes, num_class).
- mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale
- levels of a single image, each item has shape (num_bboxes, 4).
- mlvl_valid_anchors (list[Tensor]): Anchors of all scale level
- each item has shape (num_bboxes, 4).
- level_ids (list[Tensor]): Indexes from all scale levels of a
- single image, each item has shape (num_bboxes, ).
- cfg (mmcv.Config): Test / postprocessing configuration,
- if None, test_cfg would be used.
- img_shape (tuple(int)): Shape of current image.
-
- Returns:
- Tensor: Labeled boxes in shape (n, 5), where the first 4 columns
- are bounding box positions (tl_x, tl_y, br_x, br_y) and the
- 5-th column is a score between 0 and 1.
- """
- scores = torch.cat(mlvl_scores)
- anchors = torch.cat(mlvl_valid_anchors)
- rpn_bbox_pred = torch.cat(mlvl_bboxes)
- proposals = self.bbox_coder.decode(
- anchors, rpn_bbox_pred, max_shape=img_shape)
- ids = torch.cat(level_ids)
-
- if cfg.min_bbox_size >= 0:
- w = proposals[:, 2] - proposals[:, 0]
- h = proposals[:, 3] - proposals[:, 1]
- valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size)
- if not valid_mask.all():
- proposals = proposals[valid_mask]
- scores = scores[valid_mask]
- ids = ids[valid_mask]
-
- if proposals.numel() > 0:
- dets, _ = batched_nms(proposals, scores, ids, cfg.nms)
- else:
- return proposals.new_zeros(0, 5)
-
- return dets[:cfg.max_per_img]
-
- def onnx_export(self, x, img_metas):
- """Test without augmentation.
-
- Args:
- x (tuple[Tensor]): Features from the upstream network, each is
- a 4D-tensor.
- img_metas (list[dict]): Meta info of each image.
- Returns:
- Tensor: dets of shape [N, num_det, 5].
- """
- cls_scores, bbox_preds = self(x)
-
- assert len(cls_scores) == len(bbox_preds)
-
- batch_bboxes, batch_scores = super(RPNHead, self).onnx_export(
- cls_scores, bbox_preds, img_metas=img_metas, with_nms=False)
- # Use ONNX::NonMaxSuppression in deployment
- from mmdet.core.export import add_dummy_nms_for_onnx
- cfg = copy.deepcopy(self.test_cfg)
- score_threshold = cfg.nms.get('score_thr', 0.0)
- nms_pre = cfg.get('deploy_nms_pre', -1)
- # Different from the normal forward doing NMS level by level,
- # we do NMS across all levels when exporting ONNX.
- dets, _ = add_dummy_nms_for_onnx(batch_bboxes, batch_scores,
- cfg.max_per_img,
- cfg.nms.iou_threshold,
- score_threshold, nms_pre,
- cfg.max_per_img)
- return dets
|