You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

datasets.py 38 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912
  1. import glob
  2. import math
  3. import os
  4. import random
  5. import shutil
  6. import time
  7. from pathlib import Path
  8. from threading import Thread
  9. import cv2
  10. import numpy as np
  11. import torch
  12. from PIL import Image, ExifTags
  13. from torch.utils.data import Dataset
  14. from tqdm import tqdm
  15. from utils.general import xyxy2xywh, xywh2xyxy, torch_distributed_zero_first
  16. help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
  17. img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
  18. vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
  19. # Get orientation exif tag
  20. for orientation in ExifTags.TAGS.keys():
  21. if ExifTags.TAGS[orientation] == 'Orientation':
  22. break
  23. def get_hash(files):
  24. # Returns a single hash value of a list of files
  25. return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
  26. def exif_size(img):
  27. # Returns exif-corrected PIL size
  28. s = img.size # (width, height)
  29. try:
  30. rotation = dict(img._getexif().items())[orientation]
  31. if rotation == 6: # rotation 270
  32. s = (s[1], s[0])
  33. elif rotation == 8: # rotation 90
  34. s = (s[1], s[0])
  35. except:
  36. pass
  37. return s
  38. def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
  39. rank=-1, world_size=1, workers=8):
  40. # Make sure only the first process in DDP process the dataset first, and the following others can use the cache.
  41. with torch_distributed_zero_first(rank):
  42. dataset = LoadImagesAndLabels(path, imgsz, batch_size,
  43. augment=augment, # augment images
  44. hyp=hyp, # augmentation hyperparameters
  45. rect=rect, # rectangular training
  46. cache_images=cache,
  47. single_cls=opt.single_cls,
  48. stride=int(stride),
  49. pad=pad,
  50. rank=rank)
  51. batch_size = min(batch_size, len(dataset))
  52. nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers
  53. train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
  54. dataloader = torch.utils.data.DataLoader(dataset,
  55. batch_size=batch_size,
  56. num_workers=nw,
  57. sampler=train_sampler,
  58. pin_memory=True,
  59. collate_fn=LoadImagesAndLabels.collate_fn)
  60. return dataloader, dataset
  61. class LoadImages: # for inference
  62. def __init__(self, path, img_size=640):
  63. #print('看看path是什么:{0}'.format(path))
  64. p = str(Path(path)) # os-agnostic
  65. p = os.path.abspath(p) # absolute path
  66. if '*' in p:
  67. files = sorted(glob.glob(p)) # glob
  68. elif os.path.isdir(p):
  69. files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
  70. elif os.path.isfile(p):
  71. files = [p] # files
  72. else:
  73. raise Exception('ERROR: %s does not exist' % p)
  74. images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
  75. videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
  76. ni, nv = len(images), len(videos)
  77. self.img_size = img_size
  78. self.files = images + videos
  79. self.nf = ni + nv # number of files
  80. self.video_flag = [False] * ni + [True] * nv
  81. self.mode = 'images'
  82. if any(videos):
  83. self.new_video(videos[0]) # new video
  84. else:
  85. self.cap = None
  86. assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
  87. (p, img_formats, vid_formats)
  88. def __iter__(self):
  89. self.count = 0
  90. return self
  91. def __next__(self):
  92. if self.count == self.nf:
  93. raise StopIteration
  94. path = self.files[self.count]
  95. if self.video_flag[self.count]:
  96. # Read video
  97. self.mode = 'video'
  98. ret_val, img0 = self.cap.read()
  99. if not ret_val:
  100. self.count += 1
  101. self.cap.release()
  102. if self.count == self.nf: # last video
  103. raise StopIteration
  104. else:
  105. path = self.files[self.count]
  106. self.new_video(path)
  107. ret_val, img0 = self.cap.read()
  108. self.frame += 1
  109. print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
  110. else:
  111. # Read image
  112. self.count += 1
  113. img0 = cv2.imread(path) # BGR
  114. assert img0 is not None, 'Image Not Found ' + path
  115. print('image %g/%g %s: ' % (self.count, self.nf, path), end='')
  116. # Padded resize
  117. img = letterbox(img0, new_shape=self.img_size)[0]
  118. # Convert
  119. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  120. img = np.ascontiguousarray(img)
  121. # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
  122. return path, img, img0, self.cap
  123. def new_video(self, path):
  124. self.frame = 0
  125. self.cap = cv2.VideoCapture(path)
  126. self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
  127. def __len__(self):
  128. return self.nf # number of files
  129. class LoadWebcam: # for inference
  130. def __init__(self, pipe=0, img_size=640):
  131. self.img_size = img_size
  132. if pipe == '0':
  133. pipe = 0 # local camera
  134. # pipe = 'rtsp://192.168.1.64/1' # IP camera
  135. # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
  136. # pipe = 'rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa' # IP traffic camera
  137. # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
  138. # https://answers.opencv.org/question/215996/changing-gstreamer-pipeline-to-opencv-in-pythonsolved/
  139. # pipe = '"rtspsrc location="rtsp://username:password@192.168.1.64/1" latency=10 ! appsink' # GStreamer
  140. # https://answers.opencv.org/question/200787/video-acceleration-gstremer-pipeline-in-videocapture/
  141. # https://stackoverflow.com/questions/54095699/install-gstreamer-support-for-opencv-python-package # install help
  142. # pipe = "rtspsrc location=rtsp://root:root@192.168.0.91:554/axis-media/media.amp?videocodec=h264&resolution=3840x2160 protocols=GST_RTSP_LOWER_TRANS_TCP ! rtph264depay ! queue ! vaapih264dec ! videoconvert ! appsink" # GStreamer
  143. self.pipe = pipe
  144. self.cap = cv2.VideoCapture(pipe) # video capture object
  145. self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
  146. def __iter__(self):
  147. self.count = -1
  148. return self
  149. def __next__(self):
  150. self.count += 1
  151. if cv2.waitKey(1) == ord('q'): # q to quit
  152. self.cap.release()
  153. cv2.destroyAllWindows()
  154. raise StopIteration
  155. # Read frame
  156. if self.pipe == 0: # local camera
  157. ret_val, img0 = self.cap.read()
  158. img0 = cv2.flip(img0, 1) # flip left-right
  159. else: # IP camera
  160. n = 0
  161. while True:
  162. n += 1
  163. self.cap.grab()
  164. if n % 30 == 0: # skip frames
  165. ret_val, img0 = self.cap.retrieve()
  166. if ret_val:
  167. break
  168. # Print
  169. assert ret_val, 'Camera Error %s' % self.pipe
  170. img_path = 'webcam.jpg'
  171. print('webcam %g: ' % self.count, end='')
  172. # Padded resize
  173. img = letterbox(img0, new_shape=self.img_size)[0]
  174. # Convert
  175. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  176. img = np.ascontiguousarray(img)
  177. return img_path, img, img0, None
  178. def __len__(self):
  179. return 0
  180. class LoadStreams: # multiple IP or RTSP cameras
  181. def __init__(self, sources='streams.txt', img_size=640):
  182. self.mode = 'images'
  183. self.img_size = img_size
  184. if os.path.isfile(sources):
  185. with open(sources, 'r') as f:
  186. sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
  187. else:
  188. sources = [sources]
  189. n = len(sources)
  190. self.imgs = [None] * n
  191. self.sources = sources
  192. for i, s in enumerate(sources):
  193. # Start the thread to read frames from the video stream
  194. print('%g/%g: %s... ' % (i + 1, n, s), end='')
  195. cap = cv2.VideoCapture(0 if s == '0' else s)
  196. assert cap.isOpened(), 'Failed to open %s' % s
  197. w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  198. h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  199. fps = cap.get(cv2.CAP_PROP_FPS) % 100
  200. _, self.imgs[i] = cap.read() # guarantee first frame
  201. thread = Thread(target=self.update, args=([i, cap]), daemon=True)
  202. print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
  203. thread.start()
  204. print('') # newline
  205. # check for common shapes
  206. s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
  207. self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
  208. if not self.rect:
  209. print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
  210. def update(self, index, cap):
  211. # Read next stream frame in a daemon thread
  212. n = 0
  213. while cap.isOpened():
  214. n += 1
  215. # _, self.imgs[index] = cap.read()
  216. cap.grab()
  217. if n == 4: # read every 4th frame
  218. _, self.imgs[index] = cap.retrieve()
  219. n = 0
  220. time.sleep(0.01) # wait time
  221. def __iter__(self):
  222. self.count = -1
  223. return self
  224. def __next__(self):
  225. self.count += 1
  226. img0 = self.imgs.copy()
  227. if cv2.waitKey(1) == ord('q'): # q to quit
  228. cv2.destroyAllWindows()
  229. raise StopIteration
  230. # Letterbox
  231. img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
  232. # Stack
  233. img = np.stack(img, 0)
  234. # Convert
  235. img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
  236. img = np.ascontiguousarray(img)
  237. return self.sources, img, img0, None
  238. def __len__(self):
  239. return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
  240. class LoadImagesAndLabels(Dataset): # for training/testing
  241. def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
  242. cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1):
  243. try:
  244. f = [] # image files
  245. for p in path if isinstance(path, list) else [path]:
  246. p = str(Path(p)) # os-agnostic
  247. parent = str(Path(p).parent) + os.sep
  248. if os.path.isfile(p): # file
  249. with open(p, 'r') as t:
  250. t = t.read().splitlines()
  251. f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
  252. elif os.path.isdir(p): # folder
  253. f += glob.iglob(p + os.sep + '*.*')
  254. else:
  255. raise Exception('%s does not exist' % p)
  256. self.img_files = sorted(
  257. [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats])
  258. except Exception as e:
  259. raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
  260. n = len(self.img_files)
  261. assert n > 0, 'No images found in %s. See %s' % (path, help_url)
  262. bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
  263. nb = bi[-1] + 1 # number of batches
  264. self.n = n # number of images
  265. self.batch = bi # batch index of image
  266. self.img_size = img_size
  267. self.augment = augment
  268. self.hyp = hyp
  269. self.image_weights = image_weights
  270. self.rect = False if image_weights else rect
  271. self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
  272. self.mosaic_border = [-img_size // 2, -img_size // 2]
  273. self.stride = stride
  274. # Define labels
  275. self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in
  276. self.img_files]
  277. # Check cache
  278. cache_path = str(Path(self.label_files[0]).parent) + '.cache' # cached labels
  279. if os.path.isfile(cache_path):
  280. cache = torch.load(cache_path) # load
  281. if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed
  282. cache = self.cache_labels(cache_path) # re-cache
  283. else:
  284. cache = self.cache_labels(cache_path) # cache
  285. # Get labels
  286. labels, shapes = zip(*[cache[x] for x in self.img_files])
  287. self.shapes = np.array(shapes, dtype=np.float64)
  288. self.labels = list(labels)
  289. # Rectangular Training https://github.com/ultralytics/yolov3/issues/232
  290. if self.rect:
  291. # Sort by aspect ratio
  292. s = self.shapes # wh
  293. ar = s[:, 1] / s[:, 0] # aspect ratio
  294. irect = ar.argsort()
  295. self.img_files = [self.img_files[i] for i in irect]
  296. self.label_files = [self.label_files[i] for i in irect]
  297. self.labels = [self.labels[i] for i in irect]
  298. self.shapes = s[irect] # wh
  299. ar = ar[irect]
  300. # Set training image shapes
  301. shapes = [[1, 1]] * nb
  302. for i in range(nb):
  303. ari = ar[bi == i]
  304. mini, maxi = ari.min(), ari.max()
  305. if maxi < 1:
  306. shapes[i] = [maxi, 1]
  307. elif mini > 1:
  308. shapes[i] = [1, 1 / mini]
  309. self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
  310. # Cache labels
  311. create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
  312. nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
  313. pbar = enumerate(self.label_files)
  314. if rank in [-1, 0]:
  315. pbar = tqdm(pbar)
  316. for i, file in pbar:
  317. l = self.labels[i] # label
  318. if l is not None and l.shape[0]:
  319. assert l.shape[1] == 5, '> 5 label columns: %s' % file
  320. assert (l >= 0).all(), 'negative labels: %s' % file
  321. assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
  322. if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
  323. nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
  324. if single_cls:
  325. l[:, 0] = 0 # force dataset into single-class mode
  326. self.labels[i] = l
  327. nf += 1 # file found
  328. # Create subdataset (a smaller dataset)
  329. if create_datasubset and ns < 1E4:
  330. if ns == 0:
  331. create_folder(path='./datasubset')
  332. os.makedirs('./datasubset/images')
  333. exclude_classes = 43
  334. if exclude_classes not in l[:, 0]:
  335. ns += 1
  336. # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
  337. with open('./datasubset/images.txt', 'a') as f:
  338. f.write(self.img_files[i] + '\n')
  339. # Extract object detection boxes for a second stage classifier
  340. if extract_bounding_boxes:
  341. p = Path(self.img_files[i])
  342. img = cv2.imread(str(p))
  343. h, w = img.shape[:2]
  344. for j, x in enumerate(l):
  345. f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
  346. if not os.path.exists(Path(f).parent):
  347. os.makedirs(Path(f).parent) # make new output folder
  348. b = x[1:] * [w, h, w, h] # box
  349. b[2:] = b[2:].max() # rectangle to square
  350. b[2:] = b[2:] * 1.3 + 30 # pad
  351. b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
  352. b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
  353. b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
  354. assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
  355. else:
  356. ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
  357. # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
  358. if rank in [-1, 0]:
  359. pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
  360. cache_path, nf, nm, ne, nd, n)
  361. if nf == 0:
  362. s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
  363. print(s)
  364. assert not augment, '%s. Can not train without labels.' % s
  365. # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
  366. self.imgs = [None] * n
  367. if cache_images:
  368. gb = 0 # Gigabytes of cached images
  369. pbar = tqdm(range(len(self.img_files)), desc='Caching images')
  370. self.img_hw0, self.img_hw = [None] * n, [None] * n
  371. for i in pbar: # max 10k images
  372. self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
  373. gb += self.imgs[i].nbytes
  374. pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
  375. def cache_labels(self, path='labels.cache'):
  376. # Cache dataset labels, check images and read shapes
  377. x = {} # dict
  378. pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
  379. for (img, label) in pbar:
  380. try:
  381. l = []
  382. image = Image.open(img)
  383. image.verify() # PIL verify
  384. # _ = io.imread(img) # skimage verify (from skimage import io)
  385. shape = exif_size(image) # image size
  386. assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
  387. if os.path.isfile(label):
  388. with open(label, 'r') as f:
  389. l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels
  390. if len(l) == 0:
  391. l = np.zeros((0, 5), dtype=np.float32)
  392. x[img] = [l, shape]
  393. except Exception as e:
  394. x[img] = [None, None]
  395. print('WARNING: %s: %s' % (img, e))
  396. x['hash'] = get_hash(self.label_files + self.img_files)
  397. torch.save(x, path) # save for next time
  398. return x
  399. def __len__(self):
  400. return len(self.img_files)
  401. # def __iter__(self):
  402. # self.count = -1
  403. # print('ran dataset iter')
  404. # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
  405. # return self
  406. def __getitem__(self, index):
  407. if self.image_weights:
  408. index = self.indices[index]
  409. hyp = self.hyp
  410. if self.mosaic:
  411. # Load mosaic
  412. img, labels = load_mosaic(self, index)
  413. shapes = None
  414. # MixUp https://arxiv.org/pdf/1710.09412.pdf
  415. if random.random() < hyp['mixup']:
  416. img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
  417. r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0
  418. img = (img * r + img2 * (1 - r)).astype(np.uint8)
  419. labels = np.concatenate((labels, labels2), 0)
  420. else:
  421. # Load image
  422. img, (h0, w0), (h, w) = load_image(self, index)
  423. # Letterbox
  424. shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
  425. img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
  426. shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
  427. # Load labels
  428. labels = []
  429. x = self.labels[index]
  430. if x.size > 0:
  431. # Normalized xywh to pixel xyxy format
  432. labels = x.copy()
  433. labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
  434. labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
  435. labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
  436. labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
  437. if self.augment:
  438. # Augment imagespace
  439. if not self.mosaic:
  440. img, labels = random_perspective(img, labels,
  441. degrees=hyp['degrees'],
  442. translate=hyp['translate'],
  443. scale=hyp['scale'],
  444. shear=hyp['shear'],
  445. perspective=hyp['perspective'])
  446. # Augment colorspace
  447. augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
  448. # Apply cutouts
  449. # if random.random() < 0.9:
  450. # labels = cutout(img, labels)
  451. nL = len(labels) # number of labels
  452. if nL:
  453. labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
  454. labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
  455. labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
  456. if self.augment:
  457. # flip up-down
  458. if random.random() < hyp['flipud']:
  459. img = np.flipud(img)
  460. if nL:
  461. labels[:, 2] = 1 - labels[:, 2]
  462. # flip left-right
  463. if random.random() < hyp['fliplr']:
  464. img = np.fliplr(img)
  465. if nL:
  466. labels[:, 1] = 1 - labels[:, 1]
  467. labels_out = torch.zeros((nL, 6))
  468. if nL:
  469. labels_out[:, 1:] = torch.from_numpy(labels)
  470. # Convert
  471. img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
  472. img = np.ascontiguousarray(img)
  473. return torch.from_numpy(img), labels_out, self.img_files[index], shapes
  474. @staticmethod
  475. def collate_fn(batch):
  476. img, label, path, shapes = zip(*batch) # transposed
  477. for i, l in enumerate(label):
  478. l[:, 0] = i # add target image index for build_targets()
  479. return torch.stack(img, 0), torch.cat(label, 0), path, shapes
  480. # Ancillary functions --------------------------------------------------------------------------------------------------
  481. def load_image(self, index):
  482. # loads 1 image from dataset, returns img, original hw, resized hw
  483. img = self.imgs[index]
  484. if img is None: # not cached
  485. path = self.img_files[index]
  486. img = cv2.imread(path) # BGR
  487. assert img is not None, 'Image Not Found ' + path
  488. h0, w0 = img.shape[:2] # orig hw
  489. r = self.img_size / max(h0, w0) # resize image to img_size
  490. if r != 1: # always resize down, only resize up if training with augmentation
  491. interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
  492. img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
  493. return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
  494. else:
  495. return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
  496. def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
  497. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  498. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  499. dtype = img.dtype # uint8
  500. x = np.arange(0, 256, dtype=np.int16)
  501. lut_hue = ((x * r[0]) % 180).astype(dtype)
  502. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  503. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  504. img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
  505. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  506. # Histogram equalization
  507. # if random.random() < 0.2:
  508. # for i in range(3):
  509. # img[:, :, i] = cv2.equalizeHist(img[:, :, i])
  510. def load_mosaic(self, index):
  511. # loads images in a mosaic
  512. labels4 = []
  513. s = self.img_size
  514. yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
  515. indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
  516. for i, index in enumerate(indices):
  517. # Load image
  518. img, _, (h, w) = load_image(self, index)
  519. # place img in img4
  520. if i == 0: # top left
  521. img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
  522. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
  523. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
  524. elif i == 1: # top right
  525. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
  526. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  527. elif i == 2: # bottom left
  528. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
  529. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
  530. elif i == 3: # bottom right
  531. x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
  532. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  533. img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  534. padw = x1a - x1b
  535. padh = y1a - y1b
  536. # Labels
  537. x = self.labels[index]
  538. labels = x.copy()
  539. if x.size > 0: # Normalized xywh to pixel xyxy format
  540. labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
  541. labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
  542. labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
  543. labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
  544. labels4.append(labels)
  545. # Concat/clip labels
  546. if len(labels4):
  547. labels4 = np.concatenate(labels4, 0)
  548. # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop
  549. np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine
  550. # Replicate
  551. # img4, labels4 = replicate(img4, labels4)
  552. # Augment
  553. # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)
  554. img4, labels4 = random_perspective(img4, labels4,
  555. degrees=self.hyp['degrees'],
  556. translate=self.hyp['translate'],
  557. scale=self.hyp['scale'],
  558. shear=self.hyp['shear'],
  559. perspective=self.hyp['perspective'],
  560. border=self.mosaic_border) # border to remove
  561. return img4, labels4
  562. def replicate(img, labels):
  563. # Replicate labels
  564. h, w = img.shape[:2]
  565. boxes = labels[:, 1:].astype(int)
  566. x1, y1, x2, y2 = boxes.T
  567. s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
  568. for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
  569. x1b, y1b, x2b, y2b = boxes[i]
  570. bh, bw = y2b - y1b, x2b - x1b
  571. yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
  572. x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
  573. img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
  574. labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
  575. return img, labels
  576. def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
  577. # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
  578. shape = img.shape[:2] # current shape [height, width]
  579. if isinstance(new_shape, int):
  580. new_shape = (new_shape, new_shape)
  581. # Scale ratio (new / old)
  582. r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
  583. if not scaleup: # only scale down, do not scale up (for better test mAP)
  584. r = min(r, 1.0)
  585. # Compute padding
  586. ratio = r, r # width, height ratios
  587. new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
  588. dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
  589. if auto: # minimum rectangle
  590. dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
  591. elif scaleFill: # stretch
  592. dw, dh = 0.0, 0.0
  593. new_unpad = (new_shape[1], new_shape[0])
  594. ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
  595. dw /= 2 # divide padding into 2 sides
  596. dh /= 2
  597. if shape[::-1] != new_unpad: # resize
  598. img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
  599. top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
  600. left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
  601. img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
  602. return img, ratio, (dw, dh)
  603. def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):
  604. # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
  605. # targets = [cls, xyxy]
  606. height = img.shape[0] + border[0] * 2 # shape(h,w,c)
  607. width = img.shape[1] + border[1] * 2
  608. # Center
  609. C = np.eye(3)
  610. C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
  611. C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
  612. # Perspective
  613. P = np.eye(3)
  614. P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
  615. P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
  616. # Rotation and Scale
  617. R = np.eye(3)
  618. a = random.uniform(-degrees, degrees)
  619. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  620. s = random.uniform(1 - scale, 1 + scale)
  621. # s = 2 ** random.uniform(-scale, scale)
  622. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
  623. # Shear
  624. S = np.eye(3)
  625. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  626. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  627. # Translation
  628. T = np.eye(3)
  629. T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
  630. T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
  631. # Combined rotation matrix
  632. M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
  633. if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
  634. if perspective:
  635. img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
  636. else: # affine
  637. img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
  638. # Visualize
  639. # import matplotlib.pyplot as plt
  640. # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
  641. # ax[0].imshow(img[:, :, ::-1]) # base
  642. # ax[1].imshow(img2[:, :, ::-1]) # warped
  643. # Transform label coordinates
  644. n = len(targets)
  645. if n:
  646. # warp points
  647. xy = np.ones((n * 4, 3))
  648. xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
  649. xy = xy @ M.T # transform
  650. if perspective:
  651. xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
  652. else: # affine
  653. xy = xy[:, :2].reshape(n, 8)
  654. # create new boxes
  655. x = xy[:, [0, 2, 4, 6]]
  656. y = xy[:, [1, 3, 5, 7]]
  657. xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  658. # # apply angle-based reduction of bounding boxes
  659. # radians = a * math.pi / 180
  660. # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
  661. # x = (xy[:, 2] + xy[:, 0]) / 2
  662. # y = (xy[:, 3] + xy[:, 1]) / 2
  663. # w = (xy[:, 2] - xy[:, 0]) * reduction
  664. # h = (xy[:, 3] - xy[:, 1]) * reduction
  665. # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
  666. # clip boxes
  667. xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
  668. xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
  669. # filter candidates
  670. i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)
  671. targets = targets[i]
  672. targets[:, 1:5] = xy[i]
  673. return img, targets
  674. def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n)
  675. # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
  676. w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
  677. w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
  678. ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
  679. return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates
  680. def cutout(image, labels):
  681. # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
  682. h, w = image.shape[:2]
  683. def bbox_ioa(box1, box2):
  684. # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
  685. box2 = box2.transpose()
  686. # Get the coordinates of bounding boxes
  687. b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
  688. b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
  689. # Intersection area
  690. inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
  691. (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
  692. # box2 area
  693. box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
  694. # Intersection over box2 area
  695. return inter_area / box2_area
  696. # create random masks
  697. scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
  698. for s in scales:
  699. mask_h = random.randint(1, int(h * s))
  700. mask_w = random.randint(1, int(w * s))
  701. # box
  702. xmin = max(0, random.randint(0, w) - mask_w // 2)
  703. ymin = max(0, random.randint(0, h) - mask_h // 2)
  704. xmax = min(w, xmin + mask_w)
  705. ymax = min(h, ymin + mask_h)
  706. # apply random color mask
  707. image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
  708. # return unobscured labels
  709. if len(labels) and s > 0.03:
  710. box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
  711. ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
  712. labels = labels[ioa < 0.60] # remove >60% obscured labels
  713. return labels
  714. def reduce_img_size(path='path/images', img_size=1024): # from utils.datasets import *; reduce_img_size()
  715. # creates a new ./images_reduced folder with reduced size images of maximum size img_size
  716. path_new = path + '_reduced' # reduced images path
  717. create_folder(path_new)
  718. for f in tqdm(glob.glob('%s/*.*' % path)):
  719. try:
  720. img = cv2.imread(f)
  721. h, w = img.shape[:2]
  722. r = img_size / max(h, w) # size ratio
  723. if r < 1.0:
  724. img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA) # _LINEAR fastest
  725. fnew = f.replace(path, path_new) # .replace(Path(f).suffix, '.jpg')
  726. cv2.imwrite(fnew, img)
  727. except:
  728. print('WARNING: image failure %s' % f)
  729. def recursive_dataset2bmp(dataset='path/dataset_bmp'): # from utils.datasets import *; recursive_dataset2bmp()
  730. # Converts dataset to bmp (for faster training)
  731. formats = [x.lower() for x in img_formats] + [x.upper() for x in img_formats]
  732. for a, b, files in os.walk(dataset):
  733. for file in tqdm(files, desc=a):
  734. p = a + '/' + file
  735. s = Path(file).suffix
  736. if s == '.txt': # replace text
  737. with open(p, 'r') as f:
  738. lines = f.read()
  739. for f in formats:
  740. lines = lines.replace(f, '.bmp')
  741. with open(p, 'w') as f:
  742. f.write(lines)
  743. elif s in formats: # replace image
  744. cv2.imwrite(p.replace(s, '.bmp'), cv2.imread(p))
  745. if s != '.bmp':
  746. os.system("rm '%s'" % p)
  747. def imagelist2folder(path='path/images.txt'): # from utils.datasets import *; imagelist2folder()
  748. # Copies all the images in a text file (list of images) into a folder
  749. create_folder(path[:-4])
  750. with open(path, 'r') as f:
  751. for line in f.read().splitlines():
  752. os.system('cp "%s" %s' % (line, path[:-4]))
  753. print(line)
  754. def create_folder(path='./new'):
  755. # Create folder
  756. if os.path.exists(path):
  757. shutil.rmtree(path) # delete output folder
  758. os.makedirs(path) # make new output folder

随着人工智能和大数据的发展,任一方面对自动化工具有着一定的需求,在当下疫情防控期间,使用mindspore来实现yolo模型来进行目标检测及语义分割,对视频或图片都可以进行口罩佩戴检测和行人社交距离检测,来对公共场所的疫情防控来实行自动化管理。