|
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # @Author : linjie
- import sys
- sys.path.insert(0, './yolov5')
-
- from utils.datasets import LoadImages, LoadStreams
- from utils.general import check_img_size, non_max_suppression, scale_coords
- from utils.torch_utils import select_device, time_synchronized
- from deep_sort_pytorch.utils.parser import get_config
- from deep_sort_pytorch.deep_sort import DeepSort
- import argparse
- import os
- import platform
- import shutil
- import time
- from pathlib import Path
- import cv2
- import torch
- import torch.backends.cudnn as cudnn
- from utils.general import (
- check_img_size, non_max_suppression, apply_classifier, scale_coords,
- xyxy2xywh, plot_one_box, strip_optimizer, set_logging)
-
-
- palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
-
-
- def bbox_rel(*xyxy):
- """" Calculates the relative bounding box from absolute pixel values. """
- bbox_left = min([xyxy[0].item(), xyxy[2].item()])
- bbox_top = min([xyxy[1].item(), xyxy[3].item()])
- bbox_w = abs(xyxy[0].item() - xyxy[2].item())
- bbox_h = abs(xyxy[1].item() - xyxy[3].item())
- x_c = (bbox_left + bbox_w / 2)
- y_c = (bbox_top + bbox_h / 2)
- w = bbox_w
- h = bbox_h
- return x_c, y_c, w, h
-
-
- def compute_color_for_labels(label):
- """
- Simple function that adds fixed color depending on the class
- """
- color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
- return tuple(color)
-
- past_identities = []
- def draw_boxes(img, bbox, cls_names, scores, identities=None, offset=(0, 0)):
- for i, box in enumerate(bbox):
- x1, y1, x2, y2 = [int(i) for i in box]
- x1 += offset[0]
- x2 += offset[0]
- y1 += offset[1]
- y2 += offset[1]
- # box text and bar
- id = int(identities[i]) if identities is not None else 0
- if int(1) not in identities:
- print('===---===-------没有1============================')
- print(identities)
- color = compute_color_for_labels(id)
- label = '%d %s %d' % (id, cls_names[i], scores[i])
- label += '%'
- print("{0}号人物出现!========================================".format(id))
- t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
- cv2.rectangle(img, (x1, y1), (x2, y2), color, 3)
- cv2.rectangle(
- img, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1)
- cv2.putText(img, label, (x1, y1 +
- t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)
- return img
-
-
- def detect(opt, save_img=False):
- out, source, weights, view_img, save_txt, imgsz = \
- opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
- webcam = source == '0' or source.startswith(
- 'rtsp') or source.startswith('http') or source.endswith('.txt')
-
- # initialize deepsort
- cfg = get_config()
- cfg.merge_from_file(opt.config_deepsort)
- deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
- max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
- nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
- max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
- use_cuda=True)
-
- # Initialize
- device = select_device(opt.device)
- if os.path.exists(out):
- shutil.rmtree(out) # delete output folder
- os.makedirs(out) # make new output folder
- half = device.type != 'cpu' # half precision only supported on CUDA
-
- # Load model
- model = torch.load(weights, map_location=device)[
- 'model'].float() # load to FP32
- model.to(device).eval()
- if half:
- model.half() # to FP16
-
- # Set Dataloader
- vid_path, vid_writer = None, None
- if webcam:
- view_img = True
- cudnn.benchmark = True # set True to speed up constant image size inference
- dataset = LoadStreams(source, img_size=imgsz)
- else:
- view_img = True
- save_img = True
- dataset = LoadImages(source, img_size=imgsz)
-
- # Get names and colors
- names = model.module.names if hasattr(model, 'module') else model.names
-
- # Run inference
- t0 = time.time()
- img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
- # run once
- _ = model(img.half() if half else img) if device.type != 'cpu' else None
-
- save_path = str(Path(out))
- txt_path = str(Path(out)) + '/results.txt'
-
- for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
- img = torch.from_numpy(img).to(device)
- img = img.half() if half else img.float() # uint8 to fp16/32
- img /= 255.0 # 0 - 255 to 0.0 - 1.0
- if img.ndimension() == 3:
- img = img.unsqueeze(0)
-
- # Inference
- t1 = time_synchronized()
- pred = model(img, augment=opt.augment)[0]
-
- # Apply NMS
- pred = non_max_suppression(
- pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
- t2 = time_synchronized()
-
- # Process detections
- for i, det in enumerate(pred): # detections per image
- if webcam: # batch_size >= 1
- p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
- else:
- p, s, im0 = path, '', im0s
-
- s += '%gx%g ' % img.shape[2:] # print string
- save_path = str(Path(out) / Path(p).name)
-
- if det is not None and len(det):
- # Rescale boxes from img_size to im0 size
- det[:, :4] = scale_coords(
- img.shape[2:], det[:, :4], im0.shape).round()
-
- # Print results
- for c in det[:, -1].unique():
- n = (det[:, -1] == c).sum() # detections per class
- s += '%g %ss, ' % (n, names[int(c)]) # add to string
-
- bbox_xywh = []
- confs = []
- clses = []
- # Adapt detections to deep sort input format
- for *xyxy, conf, cls in det:
- x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
- obj = [x_c, y_c, bbox_w, bbox_h]
- bbox_xywh.append(obj)
- confs.append([conf.item()])
- clses.append([cls.item()])
- xywhs = torch.Tensor(bbox_xywh)
- confss = torch.Tensor(confs)
- clses = torch.Tensor(clses)
- outputs = deepsort.update(xywhs, confss, clses, im0)
-
- # draw boxes for visualization
- if len(outputs) > 0:
- bbox_tlwh = []
- bbox_xyxy = outputs[:, :4]
- identities = outputs[:, 4]
- clses = outputs[:, 5]
- scores = outputs[:, 6]
- stays = outputs[:, 7]
- draw_boxes(im0, bbox_xyxy, [names[i] for i in clses], scores, identities)
-
- # Write MOT compliant results to file
- if save_txt and len(outputs) != 0:
- for j, output in enumerate(outputs):
- bbox_left = output[0]
- bbox_top = output[1]
- bbox_w = output[2]
- bbox_h = output[3]
- identity = output[-1]
- with open(txt_path, 'a') as f:
- f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left,
- bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format
-
- else:
- deepsort.increment_ages()
-
- # Print time (inference + NMS)
- print('%sDone. (%.3fs)' % (s, t2 - t1))
-
- # Stream results
- if view_img:
- cv2.imshow(p, im0)
- if cv2.waitKey(1) == ord('q'): # q to quit
- raise StopIteration
-
- # Save results (image with detections)
- if save_img:
- print('saving img!')
- if dataset.mode == 'images':
- cv2.imwrite(save_path, im0)
- else:
- print('saving video!')
- if vid_path != save_path: # new video
- vid_path = save_path
- if isinstance(vid_writer, cv2.VideoWriter):
- vid_writer.release() # release previous video writer
-
- fps = vid_cap.get(cv2.CAP_PROP_FPS)
- w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
- h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
- vid_writer = cv2.VideoWriter(
- save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
- vid_writer.write(im0)
-
- if save_txt or save_img:
- print('Results saved to %s' % os.getcwd() + os.sep + out)
- if platform == 'darwin': # MacOS
- os.system('open ' + save_path)
-
- print('Done. (%.3fs)' % (time.time() - t0))
-
-
- if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument('--weights', type=str,
- default='weights/yolov5s.pt', help='model.pt path')
- # file/folder, 0 for webcam
- parser.add_argument('--source', type=str,
- default='inference/images', help='source')
- parser.add_argument('--output', type=str, default='inference/output',
- help='output folder') # output folder
- parser.add_argument('--img-size', type=int, default=640,
- help='inference size (pixels)')
- parser.add_argument('--conf-thres', type=float,
- default=0.4, help='object confidence threshold')
- parser.add_argument('--iou-thres', type=float,
- default=0.5, help='IOU threshold for NMS')
- parser.add_argument('--fourcc', type=str, default='mp4v',
- help='output video codec (verify ffmpeg support)')
- parser.add_argument('--device', default='',
- help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
- parser.add_argument('--view-img', action='store_true',
- help='display results')
- parser.add_argument('--save-txt', action='store_true',
- help='save results to *.txt')
- # class 0 is person
- parser.add_argument('--classes', nargs='+', type=int,
- default=[0], help='filter by class')
- parser.add_argument('--agnostic-nms', action='store_true',
- help='class-agnostic NMS')
- parser.add_argument('--augment', action='store_true',
- help='augmented inference')
- parser.add_argument("--config_deepsort", type=str,
- default="deep_sort_pytorch/configs/deep_sort.yaml")
- args = parser.parse_args()
- args.img_size = check_img_size(args.img_size)
- print(args)
-
-
- with torch.no_grad():
- detect(args)
|