You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

yolo3_multiscale.py 29 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. import logging
  2. import cv2
  3. import numpy as np
  4. import os
  5. import tensorflow as tf
  6. from resnet18 import ResNet18
  7. LOG = logging.getLogger(__name__)
  8. flags = tf.flags.FLAGS
  9. class Yolo3:
  10. def __init__(self, sess, is_training, config):
  11. LOG.info('is_training: %s' % is_training)
  12. LOG.info('model dir: %s' % flags.train_url)
  13. LOG.info('input_shape: (%d, %d)' % (flags.input_shape[0], flags.input_shape[1]))
  14. LOG.info('learning rate: %f' % float(flags.learning_rate))
  15. self.is_training = is_training
  16. self.model_dir = flags.train_url
  17. self.norm_epsilon = config.norm_epsilon
  18. self.norm_decay = config.norm_decay
  19. self.obj_threshold = float(flags.obj_threshold)
  20. self.nms_threshold = float(flags.nms_threshold)
  21. self.anchors = np.array([float(x) for x in config.anchors]).reshape(-1, 2)
  22. self.class_names = flags.class_names
  23. self.num_classes = len(self.class_names)
  24. self.input_shape = flags.input_shape
  25. self.nas_sequence = flags.nas_sequence
  26. if not os.path.exists(self.model_dir):
  27. os.makedirs(self.model_dir)
  28. print("anchors : ", self.anchors)
  29. print("class_names : ", self.class_names)
  30. if is_training:
  31. self.images = tf.placeholder(shape=[None, None, None, 3], dtype=tf.float32, name='images')
  32. else:
  33. self.images = tf.placeholder(shape=[1, self.input_shape[0], self.input_shape[1], 3], dtype=tf.float32,
  34. name='images')
  35. self.image_shape = tf.placeholder(dtype=tf.int32, shape=(2,), name='shapes')
  36. self.bbox_true_13 = tf.placeholder(shape=[None, None, None, 3, self.num_classes + 5], dtype=tf.float32)
  37. self.bbox_true_26 = tf.placeholder(shape=[None, None, None, 3, self.num_classes + 5], dtype=tf.float32)
  38. self.bbox_true_52 = tf.placeholder(shape=[None, None, None, 3, self.num_classes + 5], dtype=tf.float32)
  39. bbox_true = [self.bbox_true_13, self.bbox_true_26, self.bbox_true_52]
  40. features_out, filters_yolo_block, conv_index = self._resnet18(self.images, self.is_training)
  41. self.output = self.yolo_inference(features_out, filters_yolo_block, conv_index, len(self.anchors) / 3,
  42. self.num_classes, self.is_training)
  43. self.loss = self.yolo_loss(self.output, bbox_true, self.anchors, self.num_classes, config.ignore_thresh)
  44. self.global_step = tf.Variable(0, trainable=False)
  45. if self.is_training:
  46. learning_rate = tf.train.exponential_decay(float(flags.learning_rate), self.global_step, 1000, 0.95,
  47. staircase=True)
  48. optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
  49. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  50. with tf.control_dependencies(update_ops):
  51. self.train_op = optimizer.minimize(loss=self.loss, global_step=self.global_step)
  52. else:
  53. self.boxes, self.scores, self.classes = self.yolo_eval(self.output, self.image_shape, config.max_boxes)
  54. self.saver = tf.train.Saver()
  55. ckpt = tf.train.get_checkpoint_state(flags.train_url)
  56. if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
  57. if not flags.label_changed:
  58. print('restore model', ckpt.model_checkpoint_path)
  59. self.saver.restore(sess, ckpt.model_checkpoint_path)
  60. else:
  61. print('restore model', ckpt.model_checkpoint_path)
  62. sess.run(tf.global_variables_initializer())
  63. sess.run(tf.local_variables_initializer())
  64. variables = tf.global_variables()
  65. vars_restore = [var for var in variables if not ("Adam" in var.name
  66. or '25' in var.name
  67. or '33' in var.name
  68. or '41' in var.name)] # or ("yolo" in var.name))]
  69. saver_restore = tf.train.Saver(vars_restore)
  70. saver_restore.restore(sess, ckpt.model_checkpoint_path)
  71. else:
  72. print('initialize model with fresh weights...')
  73. sess.run(tf.global_variables_initializer())
  74. sess.run(tf.local_variables_initializer())
  75. def load_weights(self, sess, fpath):
  76. sess = tf.get_default_session()
  77. variables = sess.graph.get_collection("variables")
  78. data = np.load(fpath)
  79. for v in variables:
  80. vname = v.name.replace(':0', '')
  81. if vname not in data:
  82. print("----------skip %s----------" % vname)
  83. continue
  84. print("assigning %s" % vname)
  85. sess.run(v.assign(data[vname]))
  86. def step(self, sess, batch_data, is_training):
  87. """step, read one batch, generate gradients
  88. """
  89. # Input feed
  90. input_feed = {}
  91. input_feed[self.images] = batch_data['images']
  92. input_feed[self.bbox_true_13] = batch_data['bbox_true_13']
  93. input_feed[self.bbox_true_26] = batch_data['bbox_true_26']
  94. input_feed[self.bbox_true_52] = batch_data['bbox_true_52']
  95. # Output feed: depends on training or test
  96. output_feed = [self.loss] # Loss for this batch.
  97. if is_training:
  98. output_feed.append(self.train_op) # Gradient updates
  99. outputs = sess.run(output_feed, input_feed)
  100. return outputs[0] # loss
  101. def _batch_normalization_layer(self, input_layer, name=None, training=True, norm_decay=0.997, norm_epsilon=1e-5):
  102. """Batch normalization is used for feature map extracted from
  103. convolution layer
  104. :param input_layer: four dimensional tensor of input
  105. :param name: the name of batchnorm layer
  106. :param training: is training or not
  107. :param norm_decay: The decay rate of moving average is calculated
  108. during prediction
  109. :param norm_epsilon: Variance plus a minimal number to prevent
  110. division by 0
  111. :return bn_layer: batch normalization处理之后的feature map
  112. """
  113. bn_layer = tf.layers.batch_normalization(inputs=input_layer,
  114. momentum=norm_decay, epsilon=norm_epsilon, center=True,
  115. scale=True, training=training, name=name, fused=True)
  116. return tf.nn.relu(bn_layer)
  117. # return tf.nn.leaky_relu(bn_layer, alpha = 0.1)
  118. def _conv2d_layer(self, inputs, filters_num, kernel_size, name, use_bias=False, strides=1):
  119. """Use tf.layers.conv2d Reduce the weight and bias matrix
  120. initialization process, as well as convolution plus bias operation
  121. :param inputs: Input variables
  122. :param filters_num: Number of convolution kernels
  123. :param strides: Convolution step
  124. :param name: Convolution layer name
  125. :param training: is a training process or not
  126. :param use_bias: use bias or not
  127. :param kernel_size: the kernels size
  128. :return conv: Feature map after convolution
  129. """
  130. if strides > 1: # modified 0327
  131. inputs = tf.pad(inputs, paddings=[[0, 0], [1, 0], [1, 0], [0, 0]], mode='CONSTANT')
  132. conv = tf.layers.conv2d(inputs=inputs, filters=filters_num,
  133. kernel_size=kernel_size, strides=[strides, strides],
  134. padding=('SAME' if strides == 1 else 'VALID'), # padding = 'SAME', #
  135. use_bias=use_bias,
  136. name=name) # , kernel_initializer = tf.contrib.layers.xavier_initializer()
  137. return conv
  138. def _Residual_block(self, inputs, filters_num, blocks_num, conv_index, training=True, norm_decay=0.997,
  139. norm_epsilon=1e-5):
  140. layer = self._conv2d_layer(inputs, filters_num, kernel_size=3, strides=2, name="conv2d_" + str(conv_index))
  141. layer = self._batch_normalization_layer(layer, name="batch_normalization_" + str(conv_index), training=training,
  142. norm_decay=norm_decay, norm_epsilon=norm_epsilon)
  143. conv_index += 1
  144. for _ in range(blocks_num):
  145. shortcut = layer
  146. layer = self._conv2d_layer(layer, filters_num // 2, kernel_size=1, strides=1,
  147. name="conv2d_" + str(conv_index))
  148. layer = self._batch_normalization_layer(layer, name="batch_normalization_" + str(conv_index),
  149. training=training, norm_decay=norm_decay, norm_epsilon=norm_epsilon)
  150. conv_index += 1
  151. layer = self._conv2d_layer(layer, filters_num, kernel_size=3, strides=1, name="conv2d_" + str(conv_index))
  152. layer = self._batch_normalization_layer(layer, name="batch_normalization_" + str(conv_index),
  153. training=training, norm_decay=norm_decay, norm_epsilon=norm_epsilon)
  154. conv_index += 1
  155. layer += shortcut
  156. return layer, conv_index
  157. def _resnet18(self, inputs, training=True):
  158. cnn_model = ResNet18(inputs, training)
  159. for k, v in cnn_model.end_points.items():
  160. print(k)
  161. print(v)
  162. features_out = [cnn_model.end_points['conv5_output'], cnn_model.end_points['conv4_output'],
  163. cnn_model.end_points['conv3_output']]
  164. filters_yolo_block = [256, 128, 64]
  165. conv_index = 19
  166. return features_out, filters_yolo_block, conv_index
  167. def _yolo_block(self, inputs, filters_num, out_filters, conv_index, training=True, norm_decay=0.997,
  168. norm_epsilon=1e-5):
  169. conv = self._conv2d_layer(inputs, filters_num=filters_num, kernel_size=1, strides=1,
  170. name="conv2d_" + str(conv_index))
  171. conv = self._batch_normalization_layer(conv, name="batch_normalization_" + str(conv_index), training=training,
  172. norm_decay=norm_decay, norm_epsilon=norm_epsilon)
  173. conv_index += 1
  174. conv = self._conv2d_layer(conv, filters_num=filters_num * 2, kernel_size=3, strides=1,
  175. name="conv2d_" + str(conv_index))
  176. conv = self._batch_normalization_layer(conv, name="batch_normalization_" + str(conv_index), training=training,
  177. norm_decay=norm_decay, norm_epsilon=norm_epsilon)
  178. conv_index += 1
  179. conv = self._conv2d_layer(conv, filters_num=filters_num, kernel_size=1, strides=1,
  180. name="conv2d_" + str(conv_index))
  181. conv = self._batch_normalization_layer(conv, name="batch_normalization_" + str(conv_index), training=training,
  182. norm_decay=norm_decay, norm_epsilon=norm_epsilon)
  183. conv_index += 1
  184. conv = self._conv2d_layer(conv, filters_num=filters_num * 2, kernel_size=3, strides=1,
  185. name="conv2d_" + str(conv_index))
  186. conv = self._batch_normalization_layer(conv, name="batch_normalization_" + str(conv_index), training=training,
  187. norm_decay=norm_decay, norm_epsilon=norm_epsilon)
  188. conv_index += 1
  189. conv = self._conv2d_layer(conv, filters_num=filters_num, kernel_size=1, strides=1,
  190. name="conv2d_" + str(conv_index))
  191. conv = self._batch_normalization_layer(conv, name="batch_normalization_" + str(conv_index), training=training,
  192. norm_decay=norm_decay, norm_epsilon=norm_epsilon)
  193. conv_index += 1
  194. route = conv
  195. conv = self._conv2d_layer(conv, filters_num=filters_num * 2, kernel_size=3, strides=1,
  196. name="conv2d_" + str(conv_index))
  197. conv = self._batch_normalization_layer(conv, name="batch_normalization_" + str(conv_index), training=training,
  198. norm_decay=norm_decay, norm_epsilon=norm_epsilon)
  199. conv_index += 1
  200. conv = self._conv2d_layer(conv, filters_num=out_filters, kernel_size=1, strides=1,
  201. name="conv2d_" + str(conv_index), use_bias=True)
  202. conv_index += 1
  203. return route, conv, conv_index
  204. def yolo_inference(self, features_out, filters_yolo_block, conv_index, num_anchors, num_classes, training=True):
  205. conv = features_out[0]
  206. conv2d_45 = features_out[1]
  207. conv2d_26 = features_out[2]
  208. print('conv : ', conv)
  209. print('conv2d_45 : ', conv2d_45)
  210. print('conv2d_26 : ', conv2d_26)
  211. with tf.variable_scope('yolo'):
  212. conv2d_57, conv2d_59, conv_index = self._yolo_block(conv, filters_yolo_block[0],
  213. num_anchors * (num_classes + 5), conv_index=conv_index,
  214. training=training, norm_decay=self.norm_decay,
  215. norm_epsilon=self.norm_epsilon)
  216. print('conv2d_59 : ', conv2d_59)
  217. print('conv2d_57 : ', conv2d_57)
  218. conv2d_60 = self._conv2d_layer(conv2d_57, filters_num=filters_yolo_block[1], kernel_size=1, strides=1,
  219. name="conv2d_" + str(conv_index))
  220. conv2d_60 = self._batch_normalization_layer(conv2d_60, name="batch_normalization_" + str(conv_index),
  221. training=training, norm_decay=self.norm_decay,
  222. norm_epsilon=self.norm_epsilon)
  223. print('conv2d_60 : ', conv2d_60)
  224. conv_index += 1
  225. upSample_0 = tf.image.resize_nearest_neighbor(conv2d_60,
  226. [2 * tf.shape(conv2d_60)[1], 2 * tf.shape(conv2d_60)[2]],
  227. name='upSample_0')
  228. print('upSample_0 : ', upSample_0)
  229. route0 = tf.concat([upSample_0, conv2d_45], axis=-1, name='route_0')
  230. print('route0 : ', route0)
  231. conv2d_65, conv2d_67, conv_index = self._yolo_block(route0, filters_yolo_block[1],
  232. num_anchors * (num_classes + 5), conv_index=conv_index,
  233. training=training, norm_decay=self.norm_decay,
  234. norm_epsilon=self.norm_epsilon)
  235. print('conv2d_67 : ', conv2d_67)
  236. print('conv2d_65 : ', conv2d_65)
  237. conv2d_68 = self._conv2d_layer(conv2d_65, filters_num=filters_yolo_block[2], kernel_size=1, strides=1,
  238. name="conv2d_" + str(conv_index))
  239. conv2d_68 = self._batch_normalization_layer(conv2d_68, name="batch_normalization_" + str(conv_index),
  240. training=training, norm_decay=self.norm_decay,
  241. norm_epsilon=self.norm_epsilon)
  242. print('conv2d_68 : ', conv2d_68)
  243. conv_index += 1
  244. upSample_1 = tf.image.resize_nearest_neighbor(conv2d_68,
  245. [2 * tf.shape(conv2d_68)[1], 2 * tf.shape(conv2d_68)[2]],
  246. name='upSample_1')
  247. print('upSample_1 : ', upSample_1)
  248. route1 = tf.concat([upSample_1, conv2d_26], axis=-1, name='route_1')
  249. print('route1 : ', route1)
  250. _, conv2d_75, _ = self._yolo_block(route1, filters_yolo_block[2], num_anchors * (num_classes + 5),
  251. conv_index=conv_index, training=training, norm_decay=self.norm_decay,
  252. norm_epsilon=self.norm_epsilon)
  253. print('conv2d_75 : ', conv2d_75)
  254. return [conv2d_59, conv2d_67, conv2d_75]
  255. def yolo_head(self, feats, anchors, num_classes, input_shape, training=True):
  256. num_anchors = len(anchors)
  257. anchors_tensor = tf.reshape(tf.constant(anchors, dtype=tf.float32), [1, 1, 1, num_anchors, 2])
  258. grid_size = tf.shape(feats)[1:3]
  259. predictions = tf.reshape(feats, [-1, grid_size[0], grid_size[1], num_anchors, num_classes + 5])
  260. grid_y = tf.tile(tf.reshape(tf.range(grid_size[0]), [-1, 1, 1, 1]), [1, grid_size[1], 1, 1])
  261. grid_x = tf.tile(tf.reshape(tf.range(grid_size[1]), [1, -1, 1, 1]), [grid_size[0], 1, 1, 1])
  262. grid = tf.concat([grid_x, grid_y], axis=-1)
  263. grid = tf.cast(grid, tf.float32)
  264. box_xy = (tf.sigmoid(predictions[..., :2]) + grid) / tf.cast(grid_size[::-1], tf.float32)
  265. box_wh = tf.exp(predictions[..., 2:4]) * anchors_tensor / input_shape[::-1]
  266. box_confidence = tf.sigmoid(predictions[..., 4:5])
  267. box_class_probs = tf.sigmoid(predictions[..., 5:])
  268. if training == True:
  269. return grid, predictions, box_xy, box_wh
  270. return box_xy, box_wh, box_confidence, box_class_probs
  271. def yolo_boxes_scores(self, feats, anchors, num_classes, input_shape, image_shape):
  272. input_shape = tf.cast(input_shape, tf.float32)
  273. image_shape = tf.cast(image_shape, tf.float32)
  274. box_xy, box_wh, box_confidence, box_class_probs = self.yolo_head(feats, anchors, num_classes, input_shape,
  275. training=False)
  276. box_yx = box_xy[..., ::-1]
  277. box_hw = box_wh[..., ::-1]
  278. new_shape = tf.round(image_shape * tf.reduce_min(input_shape / image_shape))
  279. offset = (input_shape - new_shape) / 2. / input_shape
  280. scale = input_shape / new_shape
  281. box_yx = (box_yx - offset) * scale
  282. box_hw = box_hw * scale
  283. box_min = box_yx - box_hw / 2.
  284. box_max = box_yx + box_hw / 2.
  285. boxes = tf.concat(
  286. [box_min[..., 0:1],
  287. box_min[..., 1:2],
  288. box_max[..., 0:1],
  289. box_max[..., 1:2]],
  290. axis=-1
  291. )
  292. boxes *= tf.concat([image_shape, image_shape], axis=-1)
  293. boxes = tf.reshape(boxes, [-1, 4])
  294. boxes_scores = box_confidence * box_class_probs
  295. boxes_scores = tf.reshape(boxes_scores, [-1, num_classes])
  296. return boxes, boxes_scores
  297. def box_iou(self, box1, box2):
  298. box1 = tf.expand_dims(box1, -2)
  299. box1_xy = box1[..., :2]
  300. box1_wh = box1[..., 2:4]
  301. box1_mins = box1_xy - box1_wh / 2.
  302. box1_maxs = box1_xy + box1_wh / 2.
  303. box2 = tf.expand_dims(box2, 0)
  304. box2_xy = box2[..., :2]
  305. box2_wh = box2[..., 2:4]
  306. box2_mins = box2_xy - box2_wh / 2.
  307. box2_maxs = box2_xy + box2_wh / 2.
  308. intersect_mins = tf.maximum(box1_mins, box2_mins)
  309. intersect_maxs = tf.minimum(box1_maxs, box2_maxs)
  310. intersect_wh = tf.maximum(intersect_maxs - intersect_mins, 0.)
  311. intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
  312. box1_area = box1_wh[..., 0] * box1_wh[..., 1]
  313. box2_area = box2_wh[..., 0] * box2_wh[..., 1]
  314. iou = intersect_area / (box1_area + box2_area - intersect_area)
  315. return iou
  316. def yolo_loss(self, yolo_output, y_true, anchors, num_classes, ignore_thresh=.5):
  317. loss = 0.0
  318. anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  319. input_shape = tf.shape(yolo_output[0])[1: 3] * 32
  320. input_shape = tf.cast(input_shape, tf.float32)
  321. grid_shapes = [tf.cast(tf.shape(yolo_output[l])[1:3], tf.float32) for l in range(3)]
  322. for index in range(3):
  323. object_mask = y_true[index][..., 4:5]
  324. class_probs = y_true[index][..., 5:]
  325. grid, predictions, pred_xy, pred_wh = self.yolo_head(yolo_output[index], anchors[anchor_mask[index]],
  326. num_classes, input_shape, training=True)
  327. pred_box = tf.concat([pred_xy, pred_wh], axis=-1)
  328. raw_true_xy = y_true[index][..., :2] * grid_shapes[index][::-1] - grid
  329. object_mask_bool = tf.cast(object_mask, dtype=tf.bool)
  330. raw_true_wh = tf.log(
  331. tf.where(tf.equal(y_true[index][..., 2:4] / anchors[anchor_mask[index]] * input_shape[::-1], 0),
  332. tf.ones_like(y_true[index][..., 2:4]),
  333. y_true[index][..., 2:4] / anchors[anchor_mask[index]] * input_shape[::-1]))
  334. box_loss_scale = 2 - y_true[index][..., 2:3] * y_true[index][..., 3:4]
  335. ignore_mask = tf.TensorArray(dtype=tf.float32, size=1, dynamic_size=True)
  336. def loop_body(internal_index, ignore_mask):
  337. true_box = tf.boolean_mask(y_true[index][internal_index, ..., 0:4],
  338. object_mask_bool[internal_index, ..., 0])
  339. iou = self.box_iou(pred_box[internal_index], true_box)
  340. best_iou = tf.reduce_max(iou, axis=-1)
  341. ignore_mask = ignore_mask.write(internal_index, tf.cast(best_iou < ignore_thresh, tf.float32))
  342. return internal_index + 1, ignore_mask
  343. _, ignore_mask = tf.while_loop(
  344. lambda internal_index, ignore_mask: internal_index < tf.shape(yolo_output[0])[0], loop_body,
  345. [0, ignore_mask])
  346. ignore_mask = ignore_mask.stack()
  347. ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
  348. xy_loss = object_mask * box_loss_scale * tf.nn.sigmoid_cross_entropy_with_logits(
  349. labels=raw_true_xy,
  350. logits=predictions[..., 0:2])
  351. wh_loss = object_mask * box_loss_scale * 0.5 * tf.square(raw_true_wh - predictions[..., 2:4])
  352. confidence_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(
  353. labels=object_mask,
  354. logits=predictions[..., 4:5]) + (1 - object_mask) * tf.nn.sigmoid_cross_entropy_with_logits(
  355. labels=object_mask,
  356. logits=predictions[..., 4:5]) * ignore_mask
  357. class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=class_probs,
  358. logits=predictions[..., 5:])
  359. xy_loss = tf.reduce_sum(xy_loss) / tf.cast(tf.shape(yolo_output[0])[0], tf.float32)
  360. wh_loss = tf.reduce_sum(wh_loss) / tf.cast(tf.shape(yolo_output[0])[0], tf.float32)
  361. confidence_loss = tf.reduce_sum(confidence_loss) / tf.cast(tf.shape(yolo_output[0])[0], tf.float32)
  362. class_loss = tf.reduce_sum(class_loss) / tf.cast(tf.shape(yolo_output[0])[0], tf.float32)
  363. loss += xy_loss + wh_loss + confidence_loss + class_loss
  364. return loss
  365. def yolo_eval(self, yolo_outputs, image_shape, max_boxes=20):
  366. with tf.variable_scope('boxes_scores'):
  367. anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  368. boxes = []
  369. box_scores = []
  370. input_shape = tf.shape(yolo_outputs[0])[1: 3] * 32
  371. for i in range(len(yolo_outputs)):
  372. _boxes, _box_scores = self.yolo_boxes_scores(yolo_outputs[i], self.anchors[anchor_mask[i]],
  373. len(self.class_names), input_shape, image_shape)
  374. boxes.append(_boxes)
  375. box_scores.append(_box_scores)
  376. boxes = tf.concat(boxes, axis=0)
  377. box_scores = tf.concat(box_scores, axis=0)
  378. with tf.variable_scope('nms'):
  379. mask = box_scores >= self.obj_threshold
  380. max_boxes_tensor = tf.constant(max_boxes, dtype=tf.int32)
  381. boxes_ = []
  382. scores_ = []
  383. classes_ = []
  384. for c in range(len(self.class_names)):
  385. class_boxes = tf.boolean_mask(boxes, mask[:, c])
  386. class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
  387. nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor,
  388. iou_threshold=self.nms_threshold)
  389. class_boxes = tf.gather(class_boxes, nms_index)
  390. class_box_scores = tf.gather(class_box_scores, nms_index)
  391. classes = tf.ones_like(class_box_scores, 'int32') * c
  392. boxes_.append(class_boxes)
  393. scores_.append(class_box_scores)
  394. classes_.append(classes)
  395. with tf.variable_scope('output'):
  396. boxes_ = tf.concat(boxes_, axis=0, name='boxes')
  397. scores_ = tf.concat(scores_, axis=0, name='scores')
  398. classes_ = tf.concat(classes_, axis=0, name='classes')
  399. return boxes_, scores_, classes_
  400. class YoloConfig:
  401. gpu_index = "3"
  402. net_type = 'resnet18'
  403. anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 163, 326]
  404. max_boxes = 50
  405. jitter = 0.3
  406. hue = 0.1
  407. sat = 1.0
  408. cont = 0.8
  409. bri = 0.1
  410. norm_decay = 0.99
  411. norm_epsilon = 1e-5
  412. ignore_thresh = 0.5
  413. class YOLOInference(object):
  414. # pylint: disable=too-many-arguments, too-many-instance-attributes
  415. def __init__(self, sess, pb_model_path, input_shape):
  416. """
  417. initialization
  418. """
  419. self.load_model(sess, pb_model_path)
  420. self.input_shape = input_shape
  421. def load_model(self, sess, pb_model_path):
  422. """
  423. import model and load parameters from pb file
  424. """
  425. logging.info("Import yolo model from pb start .......")
  426. with sess.as_default():
  427. with sess.graph.as_default():
  428. with tf.gfile.FastGFile(pb_model_path, 'rb') as f_handle:
  429. logging.info("ParseFromString start .......")
  430. graph_def = tf.GraphDef()
  431. graph_def.ParseFromString(f_handle.read())
  432. logging.info("ParseFromString end .......")
  433. tf.import_graph_def(graph_def, name='')
  434. logging.info("Import_graph_def end .......")
  435. logging.info("Import yolo model from pb end .......")
  436. # pylint: disable=too-many-locals
  437. # pylint: disable=invalid-name
  438. def predict(self, sess, img_data):
  439. """
  440. prediction for image rectangle by input_feed and output_feed
  441. """
  442. with sess.as_default():
  443. new_image = self.preprocess(img_data, self.input_shape)
  444. input_feed = self.create_input_feed(sess, new_image, img_data)
  445. output_fetch = self.create_output_fetch(sess)
  446. all_classes, all_scores, all_bboxes = sess.run(output_fetch, input_feed)
  447. return all_classes, all_scores, all_bboxes
  448. def create_input_feed(self, sess, new_image, img_data):
  449. """
  450. create input feed data
  451. """
  452. input_feed = {}
  453. input_img_data = sess.graph.get_tensor_by_name('images:0')
  454. input_feed[input_img_data] = new_image
  455. input_img_shape = sess.graph.get_tensor_by_name('shapes:0')
  456. input_feed[input_img_shape] = [img_data.shape[0], img_data.shape[1]]
  457. return input_feed
  458. def create_output_fetch(self, sess):
  459. """
  460. create output fetch tensors
  461. """
  462. output_classes = sess.graph.get_tensor_by_name('output/classes:0')
  463. output_scores = sess.graph.get_tensor_by_name('output/scores:0')
  464. output_boxes = sess.graph.get_tensor_by_name('output/boxes:0')
  465. output_fetch = [output_classes, output_scores, output_boxes]
  466. return output_fetch
  467. def preprocess(self, image, input_shape):
  468. """
  469. resize image with unchanged aspect ratio using padding by opencv
  470. """
  471. # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  472. h, w, _ = image.shape
  473. input_h, input_w = input_shape
  474. scale = min(float(input_w) / float(w), float(input_h) / float(h))
  475. nw = int(w * scale)
  476. nh = int(h * scale)
  477. image = cv2.resize(image, (nw, nh))
  478. new_image = np.zeros((input_h, input_w, 3), np.float32)
  479. new_image.fill(128)
  480. bh, bw, _ = new_image.shape
  481. new_image[int((bh - nh) / 2):(nh + int((bh - nh) / 2)), int((bw - nw) / 2):(nw + int((bw - nw) / 2)), :] = image
  482. new_image /= 255.
  483. new_image = np.expand_dims(new_image, 0) # Add batch dimension.
  484. return new_image