|
- # Copyright (c) OpenMMLab. All rights reserved.
- from abc import ABCMeta, abstractmethod
-
- import torch
- from mmcv.ops import batched_nms
- from mmcv.runner import BaseModule, force_fp32
-
- from mmdet.core.utils import filter_scores_and_topk, select_single_mlvl
-
-
- class BaseDenseHead(BaseModule, metaclass=ABCMeta):
- """Base class for DenseHeads."""
-
- def __init__(self, init_cfg=None):
- super(BaseDenseHead, self).__init__(init_cfg)
-
- @abstractmethod
- def loss(self, **kwargs):
- """Compute losses of the head."""
- pass
-
- @force_fp32(apply_to=('cls_scores', 'bbox_preds'))
- def get_bboxes(self,
- cls_scores,
- bbox_preds,
- score_factors=None,
- img_metas=None,
- cfg=None,
- rescale=False,
- with_nms=True,
- **kwargs):
- """Transform network outputs of a batch into bbox results.
-
- Note: When score_factors is not None, the cls_scores are
- usually multiplied by it then obtain the real score used in NMS,
- such as CenterNess in FCOS, IoU branch in ATSS.
-
- Args:
- cls_scores (list[Tensor]): Classification scores for all
- scale levels, each is a 4D-tensor, has shape
- (batch_size, num_priors * num_classes, H, W).
- bbox_preds (list[Tensor]): Box energies / deltas for all
- scale levels, each is a 4D-tensor, has shape
- (batch_size, num_priors * 4, H, W).
- score_factors (list[Tensor], Optional): Score factor for
- all scale level, each is a 4D-tensor, has shape
- (batch_size, num_priors * 1, H, W). Default None.
- img_metas (list[dict], Optional): Image meta info. Default None.
- cfg (mmcv.Config, Optional): Test / postprocessing configuration,
- if None, test_cfg would be used. Default None.
- rescale (bool): If True, return boxes in original image space.
- Default False.
- with_nms (bool): If True, do nms before return boxes.
- Default True.
-
- Returns:
- list[list[Tensor, Tensor]]: Each item in result_list is 2-tuple.
- The first item is an (n, 5) tensor, where the first 4 columns
- are bounding box positions (tl_x, tl_y, br_x, br_y) and the
- 5-th column is a score between 0 and 1. The second item is a
- (n,) tensor where each item is the predicted class label of
- the corresponding box.
- """
- assert len(cls_scores) == len(bbox_preds)
-
- if score_factors is None:
- # e.g. Retina, FreeAnchor, Foveabox, etc.
- with_score_factors = False
- else:
- # e.g. FCOS, PAA, ATSS, AutoAssign, etc.
- with_score_factors = True
- assert len(cls_scores) == len(score_factors)
-
- num_levels = len(cls_scores)
-
- featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
- mlvl_priors = self.prior_generator.grid_priors(
- featmap_sizes,
- dtype=cls_scores[0].device,
- device=cls_scores[0].device)
-
- result_list = []
-
- for img_id in range(len(img_metas)):
- img_meta = img_metas[img_id]
- cls_score_list = select_single_mlvl(cls_scores, img_id)
- bbox_pred_list = select_single_mlvl(bbox_preds, img_id)
- if with_score_factors:
- score_factor_list = select_single_mlvl(score_factors, img_id)
- else:
- score_factor_list = [None for _ in range(num_levels)]
-
- results = self._get_bboxes_single(cls_score_list, bbox_pred_list,
- score_factor_list, mlvl_priors,
- img_meta, cfg, rescale, with_nms,
- **kwargs)
- result_list.append(results)
- return result_list
-
- def _get_bboxes_single(self,
- cls_score_list,
- bbox_pred_list,
- score_factor_list,
- mlvl_priors,
- img_meta,
- cfg,
- rescale=False,
- with_nms=True,
- **kwargs):
- """Transform outputs of a single image into bbox predictions.
-
- Args:
- cls_score_list (list[Tensor]): Box scores from all scale
- levels of a single image, each item has shape
- (num_priors * num_classes, H, W).
- bbox_pred_list (list[Tensor]): Box energies / deltas from
- all scale levels of a single image, each item has shape
- (num_priors * 4, H, W).
- score_factor_list (list[Tensor]): Score factor from all scale
- levels of a single image, each item has shape
- (num_priors * 1, H, W).
- mlvl_priors (list[Tensor]): Each element in the list is
- the priors of a single level in feature pyramid. In all
- anchor-based methods, it has shape (num_priors, 4). In
- all anchor-free methods, it has shape (num_priors, 2)
- when `with_stride=True`, otherwise it still has shape
- (num_priors, 4).
- img_meta (dict): Image meta info.
- cfg (mmcv.Config): Test / postprocessing configuration,
- if None, test_cfg would be used.
- rescale (bool): If True, return boxes in original image space.
- Default: False.
- with_nms (bool): If True, do nms before return boxes.
- Default: True.
-
- Returns:
- tuple[Tensor]: Results of detected bboxes and labels. If with_nms
- is False and mlvl_score_factor is None, return mlvl_bboxes and
- mlvl_scores, else return mlvl_bboxes, mlvl_scores and
- mlvl_score_factor. Usually with_nms is False is used for aug
- test. If with_nms is True, then return the following format
-
- - det_bboxes (Tensor): Predicted bboxes with shape \
- [num_bboxes, 5], where the first 4 columns are bounding \
- box positions (tl_x, tl_y, br_x, br_y) and the 5-th \
- column are scores between 0 and 1.
- - det_labels (Tensor): Predicted labels of the corresponding \
- box with shape [num_bboxes].
- """
- if score_factor_list[0] is None:
- # e.g. Retina, FreeAnchor, etc.
- with_score_factors = False
- else:
- # e.g. FCOS, PAA, ATSS, etc.
- with_score_factors = True
-
- cfg = self.test_cfg if cfg is None else cfg
- img_shape = img_meta['img_shape']
- nms_pre = cfg.get('nms_pre', -1)
-
- mlvl_bboxes = []
- mlvl_scores = []
- mlvl_labels = []
- if with_score_factors:
- mlvl_score_factors = []
- else:
- mlvl_score_factors = None
- for level_idx, (cls_score, bbox_pred, score_factor, priors) in \
- enumerate(zip(cls_score_list, bbox_pred_list,
- score_factor_list, mlvl_priors)):
-
- assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
-
- bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
- if with_score_factors:
- score_factor = score_factor.permute(1, 2,
- 0).reshape(-1).sigmoid()
- cls_score = cls_score.permute(1, 2,
- 0).reshape(-1, self.cls_out_channels)
- if self.use_sigmoid_cls:
- scores = cls_score.sigmoid()
- else:
- # remind that we set FG labels to [0, num_class-1]
- # since mmdet v2.0
- # BG cat_id: num_class
- scores = cls_score.softmax(-1)
- scores = cls_score.softmax(-1)[:, :-1]
-
- # After https://github.com/open-mmlab/mmdetection/pull/6268/,
- # this operation keeps fewer bboxes under the same `nms_pre`.
- # There is no difference in performance for most models. If you
- # find a slight drop in performance, you can set a larger
- # `nms_pre` than before.
- results = filter_scores_and_topk(
- scores, cfg.score_thr, nms_pre,
- dict(bbox_pred=bbox_pred, priors=priors))
- scores, labels, keep_idxs, filtered_results = results
-
- bbox_pred = filtered_results['bbox_pred']
- priors = filtered_results['priors']
-
- if with_score_factors:
- score_factor = score_factor[keep_idxs]
-
- bboxes = self.bbox_coder.decode(
- priors, bbox_pred, max_shape=img_shape)
-
- mlvl_bboxes.append(bboxes)
- mlvl_scores.append(scores)
- mlvl_labels.append(labels)
- if with_score_factors:
- mlvl_score_factors.append(score_factor)
-
- return self._bbox_post_process(mlvl_scores, mlvl_labels, mlvl_bboxes,
- img_meta['scale_factor'], cfg, rescale,
- with_nms, mlvl_score_factors, **kwargs)
-
- def _bbox_post_process(self,
- mlvl_scores,
- mlvl_labels,
- mlvl_bboxes,
- scale_factor,
- cfg,
- rescale=False,
- with_nms=True,
- mlvl_score_factors=None,
- **kwargs):
- """bbox post-processing method.
-
- The boxes would be rescaled to the original image scale and do
- the nms operation. Usually with_nms is False is used for aug test.
-
- Args:
- mlvl_scores (list[Tensor]): Box scores from all scale
- levels of a single image, each item has shape
- (num_bboxes, ).
- mlvl_labels (list[Tensor]): Box class labels from all scale
- levels of a single image, each item has shape
- (num_bboxes, ).
- mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale
- levels of a single image, each item has shape (num_bboxes, 4).
- scale_factor (ndarray, optional): Scale factor of the image arange
- as (w_scale, h_scale, w_scale, h_scale).
- cfg (mmcv.Config): Test / postprocessing configuration,
- if None, test_cfg would be used.
- rescale (bool): If True, return boxes in original image space.
- Default: False.
- with_nms (bool): If True, do nms before return boxes.
- Default: True.
- mlvl_score_factors (list[Tensor], optional): Score factor from
- all scale levels of a single image, each item has shape
- (num_bboxes, ). Default: None.
-
- Returns:
- tuple[Tensor]: Results of detected bboxes and labels. If with_nms
- is False and mlvl_score_factor is None, return mlvl_bboxes and
- mlvl_scores, else return mlvl_bboxes, mlvl_scores and
- mlvl_score_factor. Usually with_nms is False is used for aug
- test. If with_nms is True, then return the following format
-
- - det_bboxes (Tensor): Predicted bboxes with shape \
- [num_bboxes, 5], where the first 4 columns are bounding \
- box positions (tl_x, tl_y, br_x, br_y) and the 5-th \
- column are scores between 0 and 1.
- - det_labels (Tensor): Predicted labels of the corresponding \
- box with shape [num_bboxes].
- """
- assert len(mlvl_scores) == len(mlvl_bboxes) == len(mlvl_labels)
-
- mlvl_bboxes = torch.cat(mlvl_bboxes)
- if rescale:
- mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
- mlvl_scores = torch.cat(mlvl_scores)
- mlvl_labels = torch.cat(mlvl_labels)
-
- if mlvl_score_factors is not None:
- # TODO: Add sqrt operation in order to be consistent with
- # the paper.
- mlvl_score_factors = torch.cat(mlvl_score_factors)
- mlvl_scores = mlvl_scores * mlvl_score_factors
-
- if with_nms:
- if mlvl_bboxes.numel() == 0:
- det_bboxes = torch.cat([mlvl_bboxes, mlvl_scores[:, None]], -1)
- return det_bboxes, mlvl_labels
-
- det_bboxes, keep_idxs = batched_nms(mlvl_bboxes, mlvl_scores,
- mlvl_labels, cfg.nms)
- det_bboxes = det_bboxes[:cfg.max_per_img]
- det_labels = mlvl_labels[keep_idxs][:cfg.max_per_img]
- return det_bboxes, det_labels
- else:
- return mlvl_bboxes, mlvl_scores, mlvl_labels
-
- def forward_train(self,
- x,
- img_metas,
- gt_bboxes,
- gt_labels=None,
- gt_bboxes_ignore=None,
- proposal_cfg=None,
- **kwargs):
- """
- Args:
- x (list[Tensor]): Features from FPN.
- img_metas (list[dict]): Meta information of each image, e.g.,
- image size, scaling factor, etc.
- gt_bboxes (Tensor): Ground truth bboxes of the image,
- shape (num_gts, 4).
- gt_labels (Tensor): Ground truth labels of each box,
- shape (num_gts,).
- gt_bboxes_ignore (Tensor): Ground truth bboxes to be
- ignored, shape (num_ignored_gts, 4).
- proposal_cfg (mmcv.Config): Test / postprocessing configuration,
- if None, test_cfg would be used
-
- Returns:
- tuple:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- proposal_list (list[Tensor]): Proposals of each image.
- """
- outs = self(x)
- if gt_labels is None:
- loss_inputs = outs + (gt_bboxes, img_metas)
- else:
- loss_inputs = outs + (gt_bboxes, gt_labels, img_metas)
- losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
- if proposal_cfg is None:
- return losses
- else:
- proposal_list = self.get_bboxes(
- *outs, img_metas=img_metas, cfg=proposal_cfg)
- return losses, proposal_list
-
- def simple_test(self, feats, img_metas, rescale=False):
- """Test function without test-time augmentation.
-
- Args:
- feats (tuple[torch.Tensor]): Multi-level features from the
- upstream network, each is a 4D-tensor.
- img_metas (list[dict]): List of image information.
- rescale (bool, optional): Whether to rescale the results.
- Defaults to False.
-
- Returns:
- list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
- The first item is ``bboxes`` with shape (n, 5),
- where 5 represent (tl_x, tl_y, br_x, br_y, score).
- The shape of the second tensor in the tuple is ``labels``
- with shape (n, ).
- """
- return self.simple_test_bboxes(feats, img_metas, rescale=rescale)
-
- @force_fp32(apply_to=('cls_scores', 'bbox_preds'))
- def onnx_export(self,
- cls_scores,
- bbox_preds,
- score_factors=None,
- img_metas=None,
- with_nms=True):
- """Transform network output for a batch into bbox predictions.
-
- Args:
- cls_scores (list[Tensor]): Box scores for each scale level
- with shape (N, num_points * num_classes, H, W).
- bbox_preds (list[Tensor]): Box energies / deltas for each scale
- level with shape (N, num_points * 4, H, W).
- score_factors (list[Tensor]): score_factors for each s
- cale level with shape (N, num_points * 1, H, W).
- Default: None.
- img_metas (list[dict]): Meta information of each image, e.g.,
- image size, scaling factor, etc. Default: None.
- with_nms (bool): Whether apply nms to the bboxes. Default: True.
-
- Returns:
- tuple[Tensor, Tensor] | list[tuple]: When `with_nms` is True,
- it is tuple[Tensor, Tensor], first tensor bboxes with shape
- [N, num_det, 5], 5 arrange as (x1, y1, x2, y2, score)
- and second element is class labels of shape [N, num_det].
- When `with_nms` is False, first tensor is bboxes with
- shape [N, num_det, 4], second tensor is raw score has
- shape [N, num_det, num_classes].
- """
- assert len(cls_scores) == len(bbox_preds)
-
- num_levels = len(cls_scores)
-
- featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
- mlvl_priors = self.prior_generator.grid_priors(
- featmap_sizes,
- dtype=bbox_preds[0].dtype,
- device=bbox_preds[0].device)
-
- mlvl_cls_scores = [cls_scores[i].detach() for i in range(num_levels)]
- mlvl_bbox_preds = [bbox_preds[i].detach() for i in range(num_levels)]
-
- assert len(
- img_metas
- ) == 1, 'Only support one input image while in exporting to ONNX'
- img_shape = img_metas[0]['img_shape_for_onnx']
-
- cfg = self.test_cfg
- assert len(cls_scores) == len(bbox_preds) == len(mlvl_priors)
- device = cls_scores[0].device
- batch_size = cls_scores[0].shape[0]
- # convert to tensor to keep tracing
- nms_pre_tensor = torch.tensor(
- cfg.get('nms_pre', -1), device=device, dtype=torch.long)
-
- # e.g. Retina, FreeAnchor, etc.
- if score_factors is None:
- with_score_factors = False
- mlvl_score_factor = [None for _ in range(num_levels)]
- else:
- # e.g. FCOS, PAA, ATSS, etc.
- with_score_factors = True
- mlvl_score_factor = [
- score_factors[i].detach() for i in range(num_levels)
- ]
- mlvl_score_factors = []
-
- mlvl_batch_bboxes = []
- mlvl_scores = []
-
- for cls_score, bbox_pred, score_factors, priors in zip(
- mlvl_cls_scores, mlvl_bbox_preds, mlvl_score_factor,
- mlvl_priors):
- assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
-
- scores = cls_score.permute(0, 2, 3,
- 1).reshape(batch_size, -1,
- self.cls_out_channels)
- if self.use_sigmoid_cls:
- scores = scores.sigmoid()
- nms_pre_score = scores
- else:
- scores = scores.softmax(-1)
- nms_pre_score = scores
-
- if with_score_factors:
- score_factors = score_factors.permute(0, 2, 3, 1).reshape(
- batch_size, -1).sigmoid()
- bbox_pred = bbox_pred.permute(0, 2, 3,
- 1).reshape(batch_size, -1, 4)
- priors = priors.expand(batch_size, -1, priors.size(-1))
- # Get top-k predictions
- from mmdet.core.export import get_k_for_topk
- nms_pre = get_k_for_topk(nms_pre_tensor, bbox_pred.shape[1])
- if nms_pre > 0:
-
- if with_score_factors:
- nms_pre_score = (nms_pre_score * score_factors[..., None])
- else:
- nms_pre_score = nms_pre_score
-
- # Get maximum scores for foreground classes.
- if self.use_sigmoid_cls:
- max_scores, _ = nms_pre_score.max(-1)
- else:
- # remind that we set FG labels to [0, num_class-1]
- # since mmdet v2.0
- # BG cat_id: num_class
- max_scores, _ = nms_pre_score[..., :-1].max(-1)
- _, topk_inds = max_scores.topk(nms_pre)
-
- batch_inds = torch.arange(
- batch_size, device=bbox_pred.device).view(
- -1, 1).expand_as(topk_inds).long()
- # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501
- transformed_inds = bbox_pred.shape[1] * batch_inds + topk_inds
- priors = priors.reshape(
- -1, priors.size(-1))[transformed_inds, :].reshape(
- batch_size, -1, priors.size(-1))
- bbox_pred = bbox_pred.reshape(-1,
- 4)[transformed_inds, :].reshape(
- batch_size, -1, 4)
- scores = scores.reshape(
- -1, self.cls_out_channels)[transformed_inds, :].reshape(
- batch_size, -1, self.cls_out_channels)
- if with_score_factors:
- score_factors = score_factors.reshape(
- -1, 1)[transformed_inds].reshape(batch_size, -1)
-
- bboxes = self.bbox_coder.decode(
- priors, bbox_pred, max_shape=img_shape)
-
- mlvl_batch_bboxes.append(bboxes)
- mlvl_scores.append(scores)
- if with_score_factors:
- mlvl_score_factors.append(score_factors)
-
- batch_bboxes = torch.cat(mlvl_batch_bboxes, dim=1)
- batch_scores = torch.cat(mlvl_scores, dim=1)
- if with_score_factors:
- batch_score_factors = torch.cat(mlvl_score_factors, dim=1)
-
- # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment
-
- from mmdet.core.export import add_dummy_nms_for_onnx
-
- if not self.use_sigmoid_cls:
- batch_scores = batch_scores[..., :self.num_classes]
-
- if with_score_factors:
- batch_scores = batch_scores * (batch_score_factors.unsqueeze(2))
-
- if with_nms:
- max_output_boxes_per_class = cfg.nms.get(
- 'max_output_boxes_per_class', 200)
- iou_threshold = cfg.nms.get('iou_threshold', 0.5)
- score_threshold = cfg.score_thr
- nms_pre = cfg.get('deploy_nms_pre', -1)
- return add_dummy_nms_for_onnx(batch_bboxes, batch_scores,
- max_output_boxes_per_class,
- iou_threshold, score_threshold,
- nms_pre, cfg.max_per_img)
- else:
- return batch_bboxes, batch_scores
|