You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.py 11 kB

4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. #! /usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import tensorflow as tf
  4. import colorsys, random, cv2
  5. import numpy as np
  6. from tensorlayer.visualize import save_image
  7. def decode_tf(conv_output, output_size, NUM_CLASS, STRIDES, ANCHORS, i=0, XYSCALE=[1, 1, 1]):
  8. batch_size = tf.shape(conv_output)[0]
  9. conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
  10. conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, NUM_CLASS), axis=-1)
  11. xy_grid = tf.meshgrid(tf.range(output_size), tf.range(output_size))
  12. xy_grid = tf.expand_dims(tf.stack(xy_grid, axis=-1), axis=2) # [gx, gy, 1, 2]
  13. xy_grid = tf.tile(tf.expand_dims(xy_grid, axis=0), [batch_size, 1, 1, 3, 1])
  14. xy_grid = tf.cast(xy_grid, tf.float32)
  15. pred_xy = ((tf.sigmoid(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * \
  16. STRIDES[i]
  17. pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i])
  18. pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
  19. pred_conf = tf.sigmoid(conv_raw_conf)
  20. pred_prob = tf.sigmoid(conv_raw_prob)
  21. pred_prob = pred_conf * pred_prob
  22. pred_prob = tf.reshape(pred_prob, (batch_size, -1, NUM_CLASS))
  23. pred_xywh = tf.reshape(pred_xywh, (batch_size, -1, 4))
  24. return pred_xywh, pred_prob
  25. def decode(conv_output, output_size, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE=[1, 1, 1]):
  26. return decode_tf(conv_output, output_size, NUM_CLASS, STRIDES, ANCHORS, i=i, XYSCALE=XYSCALE)
  27. def filter_boxes(box_xywh, scores, score_threshold=0.4, input_shape=tf.constant([416, 416])):
  28. scores_max = tf.math.reduce_max(scores, axis=-1)
  29. mask = scores_max >= score_threshold
  30. class_boxes = tf.boolean_mask(box_xywh, mask)
  31. pred_conf = tf.boolean_mask(scores, mask)
  32. class_boxes = tf.reshape(class_boxes, [tf.shape(scores)[0], -1, tf.shape(class_boxes)[-1]])
  33. pred_conf = tf.reshape(pred_conf, [tf.shape(scores)[0], -1, tf.shape(pred_conf)[-1]])
  34. box_xy, box_wh = tf.split(class_boxes, (2, 2), axis=-1)
  35. input_shape = tf.cast(input_shape, dtype=tf.float32)
  36. box_yx = box_xy[..., ::-1]
  37. box_hw = box_wh[..., ::-1]
  38. box_mins = (box_yx - (box_hw / 2.)) / input_shape
  39. box_maxes = (box_yx + (box_hw / 2.)) / input_shape
  40. boxes = tf.concat(
  41. [
  42. box_mins[..., 0:1], # y_min
  43. box_mins[..., 1:2], # x_min
  44. box_maxes[..., 0:1], # y_max
  45. box_maxes[..., 1:2] # x_max
  46. ],
  47. axis=-1
  48. )
  49. # return tf.concat([boxes, pred_conf], axis=-1)
  50. return (boxes, pred_conf)
  51. def read_class_names(class_file_name):
  52. names = {}
  53. with open(class_file_name, 'r') as data:
  54. for ID, name in enumerate(data):
  55. names[ID] = name.strip('\n')
  56. return names
  57. def draw_bbox(image, bboxes, show_label=True):
  58. classes = read_class_names('model/coco.names')
  59. num_classes = len(classes)
  60. image_h, image_w, _ = image.shape
  61. hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
  62. colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
  63. colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
  64. random.seed(0)
  65. random.shuffle(colors)
  66. random.seed(None)
  67. out_boxes, out_scores, out_classes, num_boxes = bboxes
  68. for i in range(num_boxes[0]):
  69. if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > num_classes: continue
  70. coor = out_boxes[0][i]
  71. coor[0] = int(coor[0] * image_h)
  72. coor[2] = int(coor[2] * image_h)
  73. coor[1] = int(coor[1] * image_w)
  74. coor[3] = int(coor[3] * image_w)
  75. fontScale = 0.5
  76. score = out_scores[0][i]
  77. class_ind = int(out_classes[0][i])
  78. bbox_color = colors[class_ind]
  79. bbox_thick = int(0.6 * (image_h + image_w) / 600)
  80. c1, c2 = (coor[1], coor[0]), (coor[3], coor[2])
  81. cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
  82. if show_label:
  83. bbox_mess = '%s: %.2f' % (classes[class_ind], score)
  84. t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0]
  85. c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3)
  86. cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) #filled
  87. cv2.putText(
  88. image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 0),
  89. bbox_thick // 2, lineType=cv2.LINE_AA
  90. )
  91. return image
  92. def get_anchors(anchors_path, tiny=False):
  93. anchors = np.array(anchors_path)
  94. if tiny:
  95. return anchors.reshape(2, 3, 2)
  96. else:
  97. return anchors.reshape(3, 3, 2)
  98. def decode_train(conv_output, output_size, NUM_CLASS, STRIDES, ANCHORS, i=0, XYSCALE=[1, 1, 1]):
  99. conv_output = tf.reshape(conv_output, (tf.shape(conv_output)[0], output_size, output_size, 3, 5 + NUM_CLASS))
  100. conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, NUM_CLASS), axis=-1)
  101. xy_grid = tf.meshgrid(tf.range(output_size), tf.range(output_size))
  102. xy_grid = tf.expand_dims(tf.stack(xy_grid, axis=-1), axis=2) # [gx, gy, 1, 2]
  103. xy_grid = tf.tile(tf.expand_dims(xy_grid, axis=0), [tf.shape(conv_output)[0], 1, 1, 3, 1])
  104. xy_grid = tf.cast(xy_grid, tf.float32)
  105. pred_xy = ((tf.sigmoid(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * \
  106. STRIDES[i]
  107. pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i])
  108. pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
  109. pred_conf = tf.sigmoid(conv_raw_conf)
  110. pred_prob = tf.sigmoid(conv_raw_prob)
  111. return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
  112. def yolo4_input_processing(original_image):
  113. image_data = cv2.resize(original_image, (416, 416))
  114. image_data = image_data / 255.
  115. images_data = []
  116. for i in range(1):
  117. images_data.append(image_data)
  118. images_data = np.asarray(images_data).astype(np.float32)
  119. batch_data = tf.constant(images_data)
  120. return batch_data
  121. def yolo4_output_processing(feature_maps):
  122. STRIDES = [8, 16, 32]
  123. ANCHORS = get_anchors([12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401])
  124. NUM_CLASS = 80
  125. XYSCALE = [1.2, 1.1, 1.05]
  126. iou_threshold = 0.45
  127. score_threshold = 0.25
  128. bbox_tensors = []
  129. prob_tensors = []
  130. score_thres = 0.2
  131. for i, fm in enumerate(feature_maps):
  132. if i == 0:
  133. output_tensors = decode(fm, 416 // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
  134. elif i == 1:
  135. output_tensors = decode(fm, 416 // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
  136. else:
  137. output_tensors = decode(fm, 416 // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
  138. bbox_tensors.append(output_tensors[0])
  139. prob_tensors.append(output_tensors[1])
  140. pred_bbox = tf.concat(bbox_tensors, axis=1)
  141. pred_prob = tf.concat(prob_tensors, axis=1)
  142. boxes, pred_conf = filter_boxes(
  143. pred_bbox, pred_prob, score_threshold=score_thres, input_shape=tf.constant([416, 416])
  144. )
  145. pred = {'concat': tf.concat([boxes, pred_conf], axis=-1)}
  146. for key, value in pred.items():
  147. boxes = value[:, :, 0:4]
  148. pred_conf = value[:, :, 4:]
  149. boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
  150. boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
  151. scores=tf.reshape(pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
  152. max_output_size_per_class=50, max_total_size=50, iou_threshold=iou_threshold, score_threshold=score_threshold
  153. )
  154. output = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
  155. return output
  156. def result_to_json(image, pred_bbox):
  157. image_h, image_w, _ = image.shape
  158. out_boxes, out_scores, out_classes, num_boxes = pred_bbox
  159. class_names = {}
  160. json_result = []
  161. with open('model/coco.names', 'r') as data:
  162. for ID, name in enumerate(data):
  163. class_names[ID] = name.strip('\n')
  164. nums_class = len(class_names)
  165. for i in range(num_boxes[0]):
  166. if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > nums_class: continue
  167. coor = out_boxes[0][i]
  168. coor[0] = int(coor[0] * image_h)
  169. coor[2] = int(coor[2] * image_h)
  170. coor[1] = int(coor[1] * image_w)
  171. coor[3] = int(coor[3] * image_w)
  172. score = float(out_scores[0][i])
  173. class_ind = int(out_classes[0][i])
  174. bbox = np.array([coor[1], coor[0], coor[3], coor[2]]).tolist() # [x1,y1,x2,y2]
  175. json_result.append({'image': None, 'category_id': class_ind, 'bbox': bbox, 'score': score})
  176. return json_result
  177. def draw_boxes_and_labels_to_image_with_json(image, json_result, class_list, save_name=None):
  178. """Draw bboxes and class labels on image. Return the image with bboxes.
  179. Parameters
  180. -----------
  181. image : numpy.array
  182. The RGB image [height, width, channel].
  183. json_result : list of dict
  184. The object detection result with json format.
  185. classes_list : list of str
  186. For converting ID to string on image.
  187. save_name : None or str
  188. The name of image file (i.e. image.png), if None, not to save image.
  189. Returns
  190. -------
  191. numpy.array
  192. The saved image.
  193. References
  194. -----------
  195. - OpenCV rectangle and putText.
  196. - `scikit-image <http://scikit-image.org/docs/dev/api/skimage.draw.html#skimage.draw.rectangle>`__.
  197. """
  198. image_h, image_w, _ = image.shape
  199. num_classes = len(class_list)
  200. hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
  201. colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
  202. colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
  203. random.seed(0)
  204. random.shuffle(colors)
  205. random.seed(None)
  206. bbox_thick = int(0.6 * (image_h + image_w) / 600)
  207. fontScale = 0.5
  208. for bbox_info in json_result:
  209. image_name = bbox_info['image']
  210. category_id = bbox_info['category_id']
  211. if category_id < 0 or category_id > num_classes: continue
  212. bbox = bbox_info['bbox'] # the order of coordinates is [x1, y2, x2, y2]
  213. score = bbox_info['score']
  214. bbox_color = colors[category_id]
  215. c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3]))
  216. cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
  217. bbox_mess = '%s: %.2f' % (class_list[category_id], score)
  218. t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0]
  219. c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3)
  220. cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1)
  221. cv2.putText(
  222. image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 0),
  223. bbox_thick // 2, lineType=cv2.LINE_AA
  224. )
  225. if save_name is not None:
  226. save_image(image, save_name)
  227. return image

TensorLayer3.0 是一款兼容多种深度学习框架为计算后端的深度学习库。计划兼容TensorFlow, Pytorch, MindSpore, Paddle.