bailulu
/
mindspore_yolo

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "failing-secret",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "import cv2\n",
    "import yaml\n",
    "from torchvision import transforms\n",
    "import numpy as np\n",
    "\n",
    "from utils.datasets import letterbox\n",
    "from utils.general import non_max_suppression_mask_conf\n",
    "\n",
    "from detectron2.modeling.poolers import ROIPooler\n",
    "from detectron2.structures import Boxes\n",
    "from detectron2.utils.memory import retry_if_cuda_oom\n",
    "from detectron2.layers import paste_masks_in_image\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "sixth-universe",
   "metadata": {},
   "outputs": [],
   "source": [
    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
    "with open('data/hyp.scratch.mask.yaml') as f:\n",
    "    hyp = yaml.load(f, Loader=yaml.FullLoader)\n",
    "weigths = torch.load('yolov7-mask.pt')\n",
    "model = weigths['model']\n",
    "model = model.half().to(device)\n",
    "_ = model.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "respected-source",
   "metadata": {},
   "outputs": [],
   "source": [
    "image = cv2.imread('./horses.jpg')  # 504x378 image\n",
    "image = letterbox(image, 640, stride=64, auto=True)[0]\n",
    "image_ = image.copy()\n",
    "image = transforms.ToTensor()(image)\n",
    "image = torch.tensor(np.array([image.numpy()]))\n",
    "image = image.to(device)\n",
    "image = image.half()\n",
    "\n",
    "output = model(image)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cognitive-writer",
   "metadata": {},
   "outputs": [],
   "source": [
    "inf_out, train_out, attn, mask_iou, bases, sem_output = output['test'], output['bbox_and_cls'], output['attn'], output['mask_iou'], output['bases'], output['sem']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dedicated-helmet",
   "metadata": {},
   "outputs": [],
   "source": [
    "bases = torch.cat([bases, sem_output], dim=1)\n",
    "nb, _, height, width = image.shape\n",
    "names = model.names\n",
    "pooler_scale = model.pooler_scale\n",
    "pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "martial-burner",
   "metadata": {},
   "outputs": [],
   "source": [
    "output, output_mask, output_mask_score, output_ac, output_ab = non_max_suppression_mask_conf(inf_out, attn, bases, pooler, hyp, conf_thres=0.25, iou_thres=0.65, merge=False, mask_iou=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "further-conditions",
   "metadata": {},
   "outputs": [],
   "source": [
    "pred, pred_masks = output[0], output_mask[0]\n",
    "base = bases[0]\n",
    "bboxes = Boxes(pred[:, :4])\n",
    "original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution'])\n",
    "pred_masks = retry_if_cuda_oom(paste_masks_in_image)( original_pred_masks, bboxes, (height, width), threshold=0.5)\n",
    "pred_masks_np = pred_masks.detach().cpu().numpy()\n",
    "pred_cls = pred[:, 5].detach().cpu().numpy()\n",
    "pred_conf = pred[:, 4].detach().cpu().numpy()\n",
    "nimg = image[0].permute(1, 2, 0) * 255\n",
    "nimg = nimg.cpu().numpy().astype(np.uint8)\n",
    "nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)\n",
    "nbboxes = bboxes.tensor.detach().cpu().numpy().astype(np.int)\n",
    "pnimg = nimg.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "grand-thickness",
   "metadata": {},
   "outputs": [],
   "source": [
    "for one_mask, bbox, cls, conf in zip(pred_masks_np, nbboxes, pred_cls, pred_conf):\n",
    "    if conf < 0.25:\n",
    "        continue\n",
    "    color = [np.random.randint(255), np.random.randint(255), np.random.randint(255)]\n",
    "                        \n",
    "                        \n",
    "    pnimg[one_mask] = pnimg[one_mask] * 0.5 + np.array(color, dtype=np.uint8) * 0.5\n",
    "    pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)\n",
    "    #label = '%s %.3f' % (names[int(cls)], conf)\n",
    "    #t_size = cv2.getTextSize(label, 0, fontScale=0.5, thickness=1)[0]\n",
    "    #c2 = bbox[0] + t_size[0], bbox[1] - t_size[1] - 3\n",
    "    #pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), c2, color, -1, cv2.LINE_AA)  # filled\n",
    "    #pnimg = cv2.putText(pnimg, label, (bbox[0], bbox[1] - 2), 0, 0.5, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA)  \n",
    "                     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "allied-drama",
   "metadata": {},
   "outputs": [
    {
     "data": {