|
- #!/usr/bin/env python3
- # -*- coding:utf-8 -*-
- # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
-
- from typing import Tuple
- import numpy as np
- import cv2
- import torch
- import torchvision
- import torchvision.transforms as T
-
- __all__ = [
- "filter_box",
- "postprocess",
- "bboxes_iou",
- "matrix_iou",
- "adjust_box_anns",
- "xyxy2xywh",
- "xyxy2cxcywh",
- "pre_process"
- ]
-
- def pre_process(image: np.ndarray,
- input_size: Tuple = (640, 640),
- mean: Tuple = (0.485, 0.456, 0.406),
- std: Tuple = (0.229, 0.224, 0.225),
- swap: Tuple = (2, 0, 1),
- device: str = None) -> Tuple[torch.Tensor, Tuple[int, int]]:
-
- # Resize and pad image
- if len(image.shape) == 2:
- image = np.concatenate((image, image, image), axis=-1)
-
- # move shape to be multiple of 32 - for YOLOX forward pass
- img_sh = image.shape[0:2]
- r = min(input_size[0] / img_sh[0], input_size[1] / img_sh[1])
- new_size = [int(img_sh[1] * r), int(img_sh[0] * r)]
- new_size_f = (int(np.ceil(new_size[0] / 32)) * 32, int(np.ceil(new_size[1] / 32)) * 32)
-
- # image resize
- resized_img = cv2.resize(
- image,
- new_size_f,
- interpolation=cv2.INTER_NEAREST,
- ).astype(np.float32).transpose(swap)
-
- # to device
- resized_img = torch.from_numpy(resized_img).to(device)
-
- # torch normalization
- resized_img = torch.div(resized_img, 255.)
- transforms = torch.nn.Sequential(
- T.Normalize(mean=mean, std=std)
- )
-
- resized_img: torch.Tensor = transforms(resized_img).unsqueeze(0)
-
- return resized_img, new_size_f
-
-
- def filter_box(output, scale_range):
- """
- output: (N, 5+class) shape
- """
- min_scale, max_scale = scale_range
- w = output[:, 2] - output[:, 0]
- h = output[:, 3] - output[:, 1]
- keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale)
- return output[keep]
-
-
- def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45):
- box_corner = prediction.new(prediction.shape)
- box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
- box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
- box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
- box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
- prediction[:, :, :4] = box_corner[:, :, :4]
-
- output = [None for _ in range(len(prediction))]
- for i, image_pred in enumerate(prediction):
-
- # If none are remaining => process next image
- if not image_pred.size(0):
- continue
- # Get score and class with highest confidence
- class_conf, class_pred = torch.max(
- image_pred[:, 5 : 5 + num_classes], 1, keepdim=True
- )
-
- conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
- # _, conf_mask = torch.topk((image_pred[:, 4] * class_conf.squeeze()), 1000)
- # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
- detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
- detections = detections[conf_mask]
- if not detections.size(0):
- continue
-
- nms_out_index = torchvision.ops.batched_nms(
- detections[:, :4],
- detections[:, 4] * detections[:, 5],
- detections[:, 6],
- nms_thre,
- )
- detections = detections[nms_out_index]
- if output[i] is None:
- output[i] = detections
- else:
- output[i] = torch.cat((output[i], detections))
-
- return output
-
-
- def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
- if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
- raise IndexError
-
- if xyxy:
- tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
- br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
- area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
- area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
- else:
- tl = torch.max(
- (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
- (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),
- )
- br = torch.min(
- (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
- (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),
- )
-
- area_a = torch.prod(bboxes_a[:, 2:], 1)
- area_b = torch.prod(bboxes_b[:, 2:], 1)
- en = (tl < br).type(tl.type()).prod(dim=2)
- area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all())
- return area_i / (area_a[:, None] + area_b - area_i)
-
-
- def matrix_iou(a, b):
- """
- return iou of a and b, numpy version for data augenmentation
- """
- lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
- rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
-
- area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
- area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
- area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
- return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12)
-
-
- def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max):
- #bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max)
- #bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max)
- bbox[:, 0::2] = bbox[:, 0::2] * scale_ratio + padw
- bbox[:, 1::2] = bbox[:, 1::2] * scale_ratio + padh
- return bbox
-
-
- def xyxy2xywh(bboxes):
- bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
- bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
- return bboxes
-
-
- def xyxy2cxcywh(bboxes):
- bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
- bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
- bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5
- bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5
- return bboxes
|