bailulu
/
mindspore_yolo

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "04eee3aa-c235-4bf7-a4cf-b6b2d08b0626",
   "metadata": {},
   "source": [
    "# Export onnx for onnxruntime end2end detect !!!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "936876aa-146a-4100-b41e-36d4a13feb2c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Namespace(batch_size=1, conf_thres=0.35, device='cpu', dynamic=False, end2end=True, grid=True, img_size=[640, 640], include_nms=False, iou_thres=0.65, max_wh=640, simplify=True, topk_all=100, weights='weights/yolov7-d6.pt')\n",
      "YOLOR 🚀 v0.1-59-g33a9e01 torch 1.12.0+cu116 CPU\n",
      "\n",
      "Fusing layers... \n",
      "Model Summary: 539 layers, 133757052 parameters, 133757052 gradients\n",
      "/home/ubuntu/miniconda3/envs/torch/lib/python3.8/site-packages/torch/functional.py:478: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at  ../aten/src/ATen/native/TensorShape.cpp:2894.)\n",
      "  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]\n",
      "\n",
      "Starting TorchScript export with torch 1.12.0+cu116...\n",
      "/home/ubuntu/work/yolo/yolov7/models/yolo.py:51: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
      "  if self.grid[i].shape[2:4] != x[i].shape[2:4]:\n",
      "TorchScript export success, saved as weights/yolov7-d6.torchscript.pt\n",
      "\n",
      "Starting ONNX export with onnx 1.12.0...\n",
      "onnxruntime\n",
      "/home/ubuntu/miniconda3/envs/torch/lib/python3.8/site-packages/torch/_tensor.py:1083: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at  aten/src/ATen/core/TensorBody.h:477.)\n",
      "  return self._grad\n",
      "/home/ubuntu/miniconda3/envs/torch/lib/python3.8/site-packages/torch/onnx/symbolic_opset9.py:4182: UserWarning: Exporting aten::index operator of advanced indexing in opset 12 is achieved by combination of multiple ONNX operators, including Reshape, Transpose, Concat, and Gather. If indices include negative values, the exported graph will produce incorrect results.\n",
      "  warnings.warn(\n",
      "\n",
      "Starting to simplify ONNX...\n",
      "ONNX export success, saved as weights/yolov7-d6.onnx\n",
      "CoreML export failure: No module named 'coremltools'\n",
      "\n",
      "Export complete (18.38s). Visualize with https://github.com/lutzroeder/netron.\n"
     ]
    }
   ],
   "source": [
    "# export onnx first\n",
    "!python export.py --weights weights/yolov7-d6.pt \\\n",
    "        --grid --end2end --simplify \\\n",
    "        --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 \\\n",
    "        --max-wh 640 # For onnxruntime, you need to specify this value as an integer, when it is 0 it means agnostic NMS, \n",
    "                     # otherwise it is non-agnostic NMS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6ec4c01e-dac9-417e-b4cf-7c6440e274e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "import time\n",
    "import requests\n",
    "import random\n",
    "import numpy as np\n",
    "import onnxruntime as ort\n",
    "from PIL import Image\n",
    "from pathlib import Path\n",
    "from collections import OrderedDict,namedtuple"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "06a9a121-40a2-4eb6-8a79-94894a01915a",
   "metadata": {},
   "outputs": [],
   "source": [
    "cuda = False\n",
    "w = \"weights/yolov7-d6.onnx\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "007a7721-c49d-4713-94c6-4a57790acabd",
   "metadata": {},
   "outputs": [],
   "source": [
    "providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']\n",
    "session = ort.InferenceSession(w, providers=providers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "6c7a8ce1-5026-4870-8705-61399c6b7609",
   "metadata": {},
   "outputs": [],
   "source": [
    "def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):\n",
    "    # Resize and pad image while meeting stride-multiple constraints\n",
    "    shape = im.shape[:2]  # current shape [height, width]\n",
    "    if isinstance(new_shape, int):\n",
    "        new_shape = (new_shape, new_shape)\n",
    "\n",
    "    # Scale ratio (new / old)\n",
    "    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])\n",
    "    if not scaleup:  # only scale down, do not scale up (for better val mAP)\n",
    "        r = min(r, 1.0)\n",
    "\n",
    "    # Compute padding\n",
    "    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))\n",
    "    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding\n",
    "\n",
    "    if auto:  # minimum rectangle\n",
    "        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding\n",
    "\n",
    "    dw /= 2  # divide padding into 2 sides\n",
    "    dh /= 2\n",
    "\n",
    "    if shape[::-1] != new_unpad:  # resize\n",
    "        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)\n",
    "    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))\n",
    "    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))\n",
    "    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border\n",
    "    return im, r, (dw, dh)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "fdf1c66b-37bf-4c94-9005-2338331cf73d",
   "metadata": {},
   "outputs": [],
   "source": [
    "names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', \n",
    "         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', \n",
    "         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', \n",
    "         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', \n",
    "         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', \n",
    "         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', \n",
    "         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', \n",
    "         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', \n",
    "         'hair drier', 'toothbrush']\n",
    "colors = {name:[random.randint(0, 255) for _ in range(3)] for i,name in enumerate(names)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b9ce7a13-31b8-4a35-bd8d-4f0debd46480",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 3, 640, 640)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = 'https://oneflow-static.oss-cn-beijing.aliyuncs.com/tripleMu/image1.jpg'\n",
    "file = requests.get(url)\n",
    "img = cv2.imdecode(np.frombuffer(file.content, np.uint8), 1)\n",
    "img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
    "\n",
    "image = img.copy()\n",
    "image, ratio, dwdh = letterbox(image, auto=False)\n",
    "image = image.transpose((2, 0, 1))\n",
    "image = np.expand_dims(image, 0)\n",
    "image = np.ascontiguousarray(image)\n",
    "\n",
    "im = image.astype(np.float32)\n",
    "im /= 255\n",
    "im.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c382a4d2-b37a-40be-9618-653419319fde",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['output']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "outname = [i.name for i in session.get_outputs()]\n",
    "outname"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b448209b-3b92-4a48-9a55-134590e717d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['images']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inname = [i.name for i in session.get_inputs()]\n",
    "inname"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ef8bc01f-a7c6-47e0-93ed-42f41f631fee",
   "metadata": {},
   "outputs": [],
   "source": [
    "inp = {inname[0]:im}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "c0a50aee-fa52-4b6e-aa92-bbb1f12d5652",
   "metadata": {},
   "outputs": [],
   "source": [
    "outputs = session.run(outname, inp)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9d7d69af-bcd4-45e6-8a53-0a49b6d7c586",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.0000000e+00, 2.4857600e+02, 1.2993985e+02, 6.2988782e+02,\n",
       "        5.8161853e+02, 0.0000000e+00, 9.6787709e-01],\n",
       "       [0.0000000e+00, 2.4103851e+00, 5.6784058e+01, 3.9849658e+02,\n",
       "        5.8172839e+02, 0.0000000e+00, 9.6493036e-01],\n",
       "       [0.0000000e+00, 1.9663359e+02, 2.5155685e+02, 2.5495015e+02,\n",
       "        4.7303458e+02, 2.7000000e+01, 8.8971210e-01],\n",
       "       [0.0000000e+00, 2.5648334e+02, 2.5891159e+02, 2.9980090e+02,\n",
       "        2.9901724e+02, 5.5000000e+01, 5.1295620e-01]], dtype=float32)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "f4385c28-1b6c-4c61-a876-fd155d7df915",
   "metadata": {},
   "outputs": [],
   "source": [
    "ori_images = [img.copy()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "d30904c5-1089-4a2a-a464-d446235ee9fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i,(batch_id,x0,y0,x1,y1,cls_id,score) in enumerate(outputs):\n",
    "    image = ori_images[int(batch_id)]\n",
    "    box = np.array([x0,y0,x1,y1])\n",
    "    box -= np.array(dwdh*2)\n",
    "    box /= ratio\n",
    "    box = box.round().astype(np.int32).tolist()\n",
    "    cls_id = int(cls_id)\n",
    "    score = round(float(score),3)\n",
    "    name = names[cls_id]\n",
    "    color = colors[name]\n",
    "    name += ' '+str(score)\n",
    "    cv2.rectangle(image,box[:2],box[2:],color,2)\n",
    "    cv2.putText(image,name,(box[0], box[1] - 2),cv2.FONT_HERSHEY_SIMPLEX,0.75,[225, 255, 255],thickness=2)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "b4449198-3c2b-41d6-9a23-de7accf73d82",
   "metadata": {},
   "outputs": [
    {
     "data": {