You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

yolo3_multiscale.py 33 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919
  1. # Copyright 2021 The KubeEdge Authors.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import logging
  16. import cv2
  17. import numpy as np
  18. import tensorflow as tf
  19. from resnet18 import ResNet18
  20. LOG = logging.getLogger(__name__)
  21. flags = tf.flags.FLAGS
  22. class Yolo3:
  23. def __init__(self, sess, is_training, config):
  24. LOG.info('is_training: %s' % is_training)
  25. LOG.info('model dir: %s' % flags.train_url)
  26. LOG.info('input_shape: (%d, %d)' %
  27. (flags.input_shape[0], flags.input_shape[1]))
  28. LOG.info('learning rate: %f' % float(flags.learning_rate))
  29. self.is_training = is_training
  30. self.model_dir = flags.train_url
  31. self.norm_epsilon = config.norm_epsilon
  32. self.norm_decay = config.norm_decay
  33. self.obj_threshold = float(flags.obj_threshold)
  34. self.nms_threshold = float(flags.nms_threshold)
  35. self.anchors = np.array([float(x)
  36. for x in config.anchors]).reshape(-1, 2)
  37. self.class_names = flags.class_names
  38. self.num_classes = len(self.class_names)
  39. self.input_shape = flags.input_shape
  40. self.nas_sequence = flags.nas_sequence
  41. if not os.path.exists(self.model_dir):
  42. os.makedirs(self.model_dir)
  43. print("anchors : ", self.anchors)
  44. print("class_names : ", self.class_names)
  45. if is_training:
  46. self.images = tf.placeholder(
  47. shape=[
  48. None,
  49. None,
  50. None,
  51. 3],
  52. dtype=tf.float32,
  53. name='images')
  54. else:
  55. self.images = tf.placeholder(
  56. shape=[
  57. 1,
  58. self.input_shape[0],
  59. self.input_shape[1],
  60. 3],
  61. dtype=tf.float32,
  62. name='images')
  63. self.image_shape = tf.placeholder(
  64. dtype=tf.int32, shape=(2,), name='shapes')
  65. self.bbox_true_13 = tf.placeholder(
  66. shape=[
  67. None,
  68. None,
  69. None,
  70. 3,
  71. self.num_classes +
  72. 5],
  73. dtype=tf.float32)
  74. self.bbox_true_26 = tf.placeholder(
  75. shape=[
  76. None,
  77. None,
  78. None,
  79. 3,
  80. self.num_classes +
  81. 5],
  82. dtype=tf.float32)
  83. self.bbox_true_52 = tf.placeholder(
  84. shape=[
  85. None,
  86. None,
  87. None,
  88. 3,
  89. self.num_classes +
  90. 5],
  91. dtype=tf.float32)
  92. bbox_true = [self.bbox_true_13, self.bbox_true_26, self.bbox_true_52]
  93. features_out, filters_yolo_block, conv_index = self._resnet18(
  94. self.images, self.is_training)
  95. self.output = self.yolo_inference(
  96. features_out, filters_yolo_block, conv_index,
  97. len(self.anchors) / 3, self.num_classes, self.is_training)
  98. self.loss = self.yolo_loss(
  99. self.output,
  100. bbox_true,
  101. self.anchors,
  102. self.num_classes,
  103. config.ignore_thresh)
  104. self.global_step = tf.Variable(0, trainable=False)
  105. if self.is_training:
  106. learning_rate = tf.train.exponential_decay(
  107. float(
  108. flags.learning_rate),
  109. self.global_step,
  110. 1000,
  111. 0.95,
  112. staircase=True)
  113. optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
  114. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  115. with tf.control_dependencies(update_ops):
  116. self.train_op = optimizer.minimize(
  117. loss=self.loss, global_step=self.global_step)
  118. else:
  119. self.boxes, self.scores, self.classes = self.yolo_eval(
  120. self.output, self.image_shape, config.max_boxes)
  121. self.saver = tf.train.Saver()
  122. ckpt = tf.train.get_checkpoint_state(flags.train_url)
  123. if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
  124. if not flags.label_changed:
  125. print('restore model', ckpt.model_checkpoint_path)
  126. self.saver.restore(sess, ckpt.model_checkpoint_path)
  127. else:
  128. print('restore model', ckpt.model_checkpoint_path)
  129. sess.run(tf.global_variables_initializer())
  130. sess.run(tf.local_variables_initializer())
  131. variables = tf.global_variables()
  132. vars_restore = [var for var in variables
  133. if not ("Adam" in var.name
  134. or '25' in var.name
  135. or '33' in var.name
  136. or '41' in var.name)]
  137. saver_restore = tf.train.Saver(vars_restore)
  138. saver_restore.restore(sess, ckpt.model_checkpoint_path)
  139. else:
  140. print('initialize model with fresh weights...')
  141. sess.run(tf.global_variables_initializer())
  142. sess.run(tf.local_variables_initializer())
  143. def load_weights(self, sess, fpath):
  144. sess = tf.get_default_session()
  145. variables = sess.graph.get_collection("variables")
  146. data = np.load(fpath)
  147. for v in variables:
  148. vname = v.name.replace(':0', '')
  149. if vname not in data:
  150. print("----------skip %s----------" % vname)
  151. continue
  152. print("assigning %s" % vname)
  153. sess.run(v.assign(data[vname]))
  154. def step(self, sess, batch_data, is_training):
  155. """step, read one batch, generate gradients
  156. """
  157. # Input feed
  158. input_feed = {}
  159. input_feed[self.images] = batch_data['images']
  160. input_feed[self.bbox_true_13] = batch_data['bbox_true_13']
  161. input_feed[self.bbox_true_26] = batch_data['bbox_true_26']
  162. input_feed[self.bbox_true_52] = batch_data['bbox_true_52']
  163. # Output feed: depends on training or test
  164. output_feed = [self.loss] # Loss for this batch.
  165. if is_training:
  166. output_feed.append(self.train_op) # Gradient updates
  167. outputs = sess.run(output_feed, input_feed)
  168. return outputs[0] # loss
  169. def _batch_normalization_layer(
  170. self,
  171. input_layer,
  172. name=None,
  173. training=True,
  174. norm_decay=0.997,
  175. norm_epsilon=1e-5):
  176. """Batch normalization is used for feature map extracted from
  177. convolution layer
  178. :param input_layer: four dimensional tensor of input
  179. :param name: the name of batchnorm layer
  180. :param training: is training or not
  181. :param norm_decay: The decay rate of moving average is calculated
  182. during prediction
  183. :param norm_epsilon: Variance plus a minimal number to prevent
  184. division by 0
  185. :return bn_layer: batch normalization处理之后的feature map
  186. """
  187. bn_layer = tf.layers.batch_normalization(
  188. inputs=input_layer,
  189. momentum=norm_decay,
  190. epsilon=norm_epsilon,
  191. center=True,
  192. scale=True,
  193. training=training,
  194. name=name,
  195. fused=True)
  196. return tf.nn.relu(bn_layer)
  197. # return tf.nn.leaky_relu(bn_layer, alpha = 0.1)
  198. def _conv2d_layer(
  199. self,
  200. inputs,
  201. filters_num,
  202. kernel_size,
  203. name,
  204. use_bias=False,
  205. strides=1):
  206. """Use tf.layers.conv2d Reduce the weight and bias matrix
  207. initialization process, as well as convolution plus bias operation
  208. :param inputs: Input variables
  209. :param filters_num: Number of convolution kernels
  210. :param strides: Convolution step
  211. :param name: Convolution layer name
  212. :param training: is a training process or not
  213. :param use_bias: use bias or not
  214. :param kernel_size: the kernels size
  215. :return conv: Feature map after convolution
  216. """
  217. if strides > 1: # modified 0327
  218. inputs = tf.pad(inputs, paddings=[[0, 0], [1, 0], [
  219. 1, 0], [0, 0]], mode='CONSTANT')
  220. conv = tf.layers.conv2d(inputs=inputs, filters=filters_num,
  221. kernel_size=kernel_size,
  222. strides=[strides, strides],
  223. padding=('SAME' if strides == 1 else 'VALID'),
  224. # padding = 'SAME', #
  225. use_bias=use_bias,
  226. name=name)
  227. return conv
  228. def _Residual_block(
  229. self,
  230. inputs,
  231. filters_num,
  232. blocks_num,
  233. conv_index,
  234. training=True,
  235. norm_decay=0.997,
  236. norm_epsilon=1e-5):
  237. layer = self._conv2d_layer(
  238. inputs,
  239. filters_num,
  240. kernel_size=3,
  241. strides=2,
  242. name="conv2d_" +
  243. str(conv_index))
  244. layer = self._batch_normalization_layer(
  245. layer,
  246. name="batch_normalization_" +
  247. str(conv_index),
  248. training=training,
  249. norm_decay=norm_decay,
  250. norm_epsilon=norm_epsilon)
  251. conv_index += 1
  252. for _ in range(blocks_num):
  253. shortcut = layer
  254. layer = self._conv2d_layer(
  255. layer,
  256. filters_num //
  257. 2,
  258. kernel_size=1,
  259. strides=1,
  260. name="conv2d_" +
  261. str(conv_index))
  262. layer = self._batch_normalization_layer(
  263. layer,
  264. name="batch_normalization_" +
  265. str(conv_index),
  266. training=training,
  267. norm_decay=norm_decay,
  268. norm_epsilon=norm_epsilon)
  269. conv_index += 1
  270. layer = self._conv2d_layer(
  271. layer,
  272. filters_num,
  273. kernel_size=3,
  274. strides=1,
  275. name="conv2d_" +
  276. str(conv_index))
  277. layer = self._batch_normalization_layer(
  278. layer,
  279. name="batch_normalization_" +
  280. str(conv_index),
  281. training=training,
  282. norm_decay=norm_decay,
  283. norm_epsilon=norm_epsilon)
  284. conv_index += 1
  285. layer += shortcut
  286. return layer, conv_index
  287. def _resnet18(self, inputs, training=True):
  288. cnn_model = ResNet18(inputs, training)
  289. for k, v in cnn_model.end_points.items():
  290. print(k)
  291. print(v)
  292. features_out = [
  293. cnn_model.end_points['conv5_output'],
  294. cnn_model.end_points['conv4_output'],
  295. cnn_model.end_points['conv3_output']]
  296. filters_yolo_block = [256, 128, 64]
  297. conv_index = 19
  298. return features_out, filters_yolo_block, conv_index
  299. def _yolo_block(
  300. self,
  301. inputs,
  302. filters_num,
  303. out_filters,
  304. conv_index,
  305. training=True,
  306. norm_decay=0.997,
  307. norm_epsilon=1e-5):
  308. conv = self._conv2d_layer(
  309. inputs,
  310. filters_num=filters_num,
  311. kernel_size=1,
  312. strides=1,
  313. name="conv2d_" +
  314. str(conv_index))
  315. conv = self._batch_normalization_layer(
  316. conv,
  317. name="batch_normalization_" +
  318. str(conv_index),
  319. training=training,
  320. norm_decay=norm_decay,
  321. norm_epsilon=norm_epsilon)
  322. conv_index += 1
  323. conv = self._conv2d_layer(
  324. conv,
  325. filters_num=filters_num * 2,
  326. kernel_size=3,
  327. strides=1,
  328. name="conv2d_" +
  329. str(conv_index))
  330. conv = self._batch_normalization_layer(
  331. conv,
  332. name="batch_normalization_" +
  333. str(conv_index),
  334. training=training,
  335. norm_decay=norm_decay,
  336. norm_epsilon=norm_epsilon)
  337. conv_index += 1
  338. conv = self._conv2d_layer(
  339. conv,
  340. filters_num=filters_num,
  341. kernel_size=1,
  342. strides=1,
  343. name="conv2d_" + str(conv_index))
  344. conv = self._batch_normalization_layer(
  345. conv,
  346. name="batch_normalization_" + str(conv_index),
  347. training=training,
  348. norm_decay=norm_decay,
  349. norm_epsilon=norm_epsilon)
  350. conv_index += 1
  351. conv = self._conv2d_layer(
  352. conv,
  353. filters_num=filters_num * 2,
  354. kernel_size=3,
  355. strides=1,
  356. name="conv2d_" + str(conv_index))
  357. conv = self._batch_normalization_layer(
  358. conv,
  359. name="batch_normalization_" + str(conv_index),
  360. training=training,
  361. norm_decay=norm_decay,
  362. norm_epsilon=norm_epsilon)
  363. conv_index += 1
  364. conv = self._conv2d_layer(
  365. conv,
  366. filters_num=filters_num,
  367. kernel_size=1,
  368. strides=1,
  369. name="conv2d_" + str(conv_index))
  370. conv = self._batch_normalization_layer(
  371. conv,
  372. name="batch_normalization_" + str(conv_index),
  373. training=training,
  374. norm_decay=norm_decay,
  375. norm_epsilon=norm_epsilon)
  376. conv_index += 1
  377. route = conv
  378. conv = self._conv2d_layer(
  379. conv,
  380. filters_num=filters_num * 2,
  381. kernel_size=3,
  382. strides=1,
  383. name="conv2d_" +
  384. str(conv_index))
  385. conv = self._batch_normalization_layer(
  386. conv,
  387. name="batch_normalization_" + str(conv_index),
  388. training=training,
  389. norm_decay=norm_decay,
  390. norm_epsilon=norm_epsilon)
  391. conv_index += 1
  392. conv = self._conv2d_layer(
  393. conv,
  394. filters_num=out_filters,
  395. kernel_size=1,
  396. strides=1,
  397. name="conv2d_" +
  398. str(conv_index),
  399. use_bias=True)
  400. conv_index += 1
  401. return route, conv, conv_index
  402. def yolo_inference(
  403. self,
  404. features_out,
  405. filters_yolo_block,
  406. conv_index,
  407. num_anchors,
  408. num_classes,
  409. training=True
  410. ):
  411. conv = features_out[0]
  412. conv2d_45 = features_out[1]
  413. conv2d_26 = features_out[2]
  414. print('conv : ', conv)
  415. print('conv2d_45 : ', conv2d_45)
  416. print('conv2d_26 : ', conv2d_26)
  417. with tf.variable_scope('yolo'):
  418. conv2d_57, conv2d_59, conv_index = (
  419. self._yolo_block(conv,
  420. filters_yolo_block[0],
  421. num_anchors * (num_classes + 5),
  422. conv_index=conv_index,
  423. training=training,
  424. norm_decay=self.norm_decay,
  425. norm_epsilon=self.norm_epsilon)
  426. )
  427. print('conv2d_59 : ', conv2d_59)
  428. print('conv2d_57 : ', conv2d_57)
  429. conv2d_60 = self._conv2d_layer(
  430. conv2d_57,
  431. filters_num=filters_yolo_block[1],
  432. kernel_size=1,
  433. strides=1,
  434. name="conv2d_" +
  435. str(conv_index))
  436. conv2d_60 = self._batch_normalization_layer(
  437. conv2d_60,
  438. name="batch_normalization_" +
  439. str(conv_index),
  440. training=training,
  441. norm_decay=self.norm_decay,
  442. norm_epsilon=self.norm_epsilon)
  443. print('conv2d_60 : ', conv2d_60)
  444. conv_index += 1
  445. upSample_0 = tf.image.resize_nearest_neighbor(
  446. conv2d_60, [
  447. 2 * tf.shape(conv2d_60)[1], 2 * tf.shape(conv2d_60)[2]],
  448. name='upSample_0')
  449. print('upSample_0 : ', upSample_0)
  450. route0 = tf.concat([upSample_0, conv2d_45],
  451. axis=-1, name='route_0')
  452. print('route0 : ', route0)
  453. conv2d_65, conv2d_67, conv_index = (
  454. self._yolo_block(route0,
  455. filters_yolo_block[1],
  456. num_anchors * (num_classes + 5),
  457. conv_index=conv_index,
  458. training=training,
  459. norm_decay=self.norm_decay,
  460. norm_epsilon=self.norm_epsilon)
  461. )
  462. print('conv2d_67 : ', conv2d_67)
  463. print('conv2d_65 : ', conv2d_65)
  464. conv2d_68 = self._conv2d_layer(
  465. conv2d_65,
  466. filters_num=filters_yolo_block[2],
  467. kernel_size=1,
  468. strides=1,
  469. name="conv2d_" +
  470. str(conv_index))
  471. conv2d_68 = self._batch_normalization_layer(
  472. conv2d_68,
  473. name="batch_normalization_" +
  474. str(conv_index),
  475. training=training,
  476. norm_decay=self.norm_decay,
  477. norm_epsilon=self.norm_epsilon)
  478. print('conv2d_68 : ', conv2d_68)
  479. conv_index += 1
  480. upSample_1 = tf.image.resize_nearest_neighbor(
  481. conv2d_68, [
  482. 2 * tf.shape(conv2d_68)[1], 2 * tf.shape(conv2d_68)[2]],
  483. name='upSample_1')
  484. print('upSample_1 : ', upSample_1)
  485. route1 = tf.concat([upSample_1, conv2d_26],
  486. axis=-1, name='route_1')
  487. print('route1 : ', route1)
  488. _, conv2d_75, _ = self._yolo_block(route1, filters_yolo_block[2],
  489. num_anchors * (num_classes + 5),
  490. conv_index=conv_index,
  491. training=training,
  492. norm_decay=self.norm_decay,
  493. norm_epsilon=self.norm_epsilon)
  494. print('conv2d_75 : ', conv2d_75)
  495. return [conv2d_59, conv2d_67, conv2d_75]
  496. def yolo_head(
  497. self,
  498. feats,
  499. anchors,
  500. num_classes,
  501. input_shape,
  502. training=True
  503. ):
  504. num_anchors = len(anchors)
  505. anchors_tensor = tf.reshape(
  506. tf.constant(
  507. anchors, dtype=tf.float32), [
  508. 1, 1, 1, num_anchors, 2])
  509. grid_size = tf.shape(feats)[1:3]
  510. predictions = tf.reshape(
  511. feats,
  512. [-1, grid_size[0], grid_size[1], num_anchors, num_classes + 5])
  513. grid_y = tf.tile(tf.reshape(
  514. tf.range(grid_size[0]), [-1, 1, 1, 1]), [1, grid_size[1], 1, 1])
  515. grid_x = tf.tile(tf.reshape(tf.range(grid_size[1]), [
  516. 1, -1, 1, 1]), [grid_size[0], 1, 1, 1])
  517. grid = tf.concat([grid_x, grid_y], axis=-1)
  518. grid = tf.cast(grid, tf.float32)
  519. box_xy = (tf.sigmoid(predictions[..., :2]) +
  520. grid) / tf.cast(grid_size[::-1], tf.float32)
  521. box_wh = tf.exp(
  522. predictions[..., 2:4]) * anchors_tensor / input_shape[::-1]
  523. box_confidence = tf.sigmoid(predictions[..., 4:5])
  524. box_class_probs = tf.sigmoid(predictions[..., 5:])
  525. if training:
  526. return grid, predictions, box_xy, box_wh
  527. return box_xy, box_wh, box_confidence, box_class_probs
  528. def yolo_boxes_scores(
  529. self,
  530. feats,
  531. anchors,
  532. num_classes,
  533. input_shape,
  534. image_shape):
  535. input_shape = tf.cast(input_shape, tf.float32)
  536. image_shape = tf.cast(image_shape, tf.float32)
  537. box_xy, box_wh, box_confidence, box_class_probs = self.yolo_head(
  538. feats, anchors, num_classes, input_shape, training=False)
  539. box_yx = box_xy[..., ::-1]
  540. box_hw = box_wh[..., ::-1]
  541. new_shape = tf.round(
  542. image_shape *
  543. tf.reduce_min(
  544. input_shape /
  545. image_shape))
  546. offset = (input_shape - new_shape) / 2. / input_shape
  547. scale = input_shape / new_shape
  548. box_yx = (box_yx - offset) * scale
  549. box_hw = box_hw * scale
  550. box_min = box_yx - box_hw / 2.
  551. box_max = box_yx + box_hw / 2.
  552. boxes = tf.concat(
  553. [box_min[..., 0:1],
  554. box_min[..., 1:2],
  555. box_max[..., 0:1],
  556. box_max[..., 1:2]],
  557. axis=-1
  558. )
  559. boxes *= tf.concat([image_shape, image_shape], axis=-1)
  560. boxes = tf.reshape(boxes, [-1, 4])
  561. boxes_scores = box_confidence * box_class_probs
  562. boxes_scores = tf.reshape(boxes_scores, [-1, num_classes])
  563. return boxes, boxes_scores
  564. def box_iou(self, box1, box2):
  565. box1 = tf.expand_dims(box1, -2)
  566. box1_xy = box1[..., :2]
  567. box1_wh = box1[..., 2:4]
  568. box1_mins = box1_xy - box1_wh / 2.
  569. box1_maxs = box1_xy + box1_wh / 2.
  570. box2 = tf.expand_dims(box2, 0)
  571. box2_xy = box2[..., :2]
  572. box2_wh = box2[..., 2:4]
  573. box2_mins = box2_xy - box2_wh / 2.
  574. box2_maxs = box2_xy + box2_wh / 2.
  575. intersect_mins = tf.maximum(box1_mins, box2_mins)
  576. intersect_maxs = tf.minimum(box1_maxs, box2_maxs)
  577. intersect_wh = tf.maximum(intersect_maxs - intersect_mins, 0.)
  578. intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
  579. box1_area = box1_wh[..., 0] * box1_wh[..., 1]
  580. box2_area = box2_wh[..., 0] * box2_wh[..., 1]
  581. iou = intersect_area / (box1_area + box2_area - intersect_area)
  582. return iou
  583. def yolo_loss(
  584. self,
  585. yolo_output,
  586. y_true,
  587. anchors,
  588. num_classes,
  589. ignore_thresh=.5):
  590. loss = 0.0
  591. anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  592. input_shape = tf.shape(yolo_output[0])[1: 3] * 32
  593. input_shape = tf.cast(input_shape, tf.float32)
  594. grid_shapes = [
  595. tf.cast(
  596. tf.shape(
  597. yolo_output[layer])[1:3],
  598. tf.float32) for layer in range(3)]
  599. for index in range(3):
  600. object_mask = y_true[index][..., 4:5]
  601. class_probs = y_true[index][..., 5:]
  602. grid, predictions, pred_xy, pred_wh = self.yolo_head(
  603. yolo_output[index], anchors[anchor_mask[index]], num_classes,
  604. input_shape, training=True)
  605. pred_box = tf.concat([pred_xy, pred_wh], axis=-1)
  606. raw_true_xy = y_true[
  607. index][..., :2] * grid_shapes[index][::-1] - grid
  608. object_mask_bool = tf.cast(object_mask, dtype=tf.bool)
  609. raw_true_wh = tf.log(
  610. tf.where(tf.equal(y_true[index][..., 2:4] / anchors[
  611. anchor_mask[index]] * input_shape[::-1], 0),
  612. tf.ones_like(y_true[index][..., 2:4]),
  613. y_true[index][..., 2:4] / anchors[
  614. anchor_mask[index]] * input_shape[::-1]))
  615. box_loss_scale = 2 - y_true[
  616. index][..., 2:3] * y_true[index][..., 3:4]
  617. ignore_mask = tf.TensorArray(
  618. dtype=tf.float32, size=1, dynamic_size=True)
  619. def loop_body(internal_index, ignore_mask):
  620. true_box = tf.boolean_mask(
  621. y_true[index][internal_index, ..., 0:4],
  622. object_mask_bool[internal_index, ..., 0])
  623. iou = self.box_iou(pred_box[internal_index], true_box)
  624. best_iou = tf.reduce_max(iou, axis=-1)
  625. ignore_mask = ignore_mask.write(
  626. internal_index, tf.cast(
  627. best_iou < ignore_thresh, tf.float32))
  628. return internal_index + 1, ignore_mask
  629. _, ignore_mask = tf.while_loop(
  630. lambda internal_index, ignore_mask: internal_index < tf.shape(
  631. yolo_output[0])[0], loop_body, [
  632. 0, ignore_mask])
  633. ignore_mask = ignore_mask.stack()
  634. ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
  635. xy_loss = (
  636. object_mask
  637. * box_loss_scale
  638. * tf.nn.sigmoid_cross_entropy_with_logits(
  639. labels=raw_true_xy,
  640. logits=predictions[..., 0:2])
  641. )
  642. wh_loss = (
  643. object_mask
  644. * box_loss_scale
  645. * 0.5
  646. * tf.square(raw_true_wh - predictions[..., 2:4]))
  647. confidence_loss = (
  648. object_mask
  649. * tf.nn.sigmoid_cross_entropy_with_logits(
  650. labels=object_mask,
  651. logits=predictions[..., 4:5])
  652. + (1 - object_mask)
  653. * tf.nn.sigmoid_cross_entropy_with_logits(
  654. labels=object_mask,
  655. logits=predictions[..., 4:5])
  656. * ignore_mask
  657. )
  658. class_loss = (
  659. object_mask
  660. * tf.nn.sigmoid_cross_entropy_with_logits(
  661. labels=class_probs, logits=predictions[..., 5:])
  662. )
  663. xy_loss = tf.reduce_sum(
  664. xy_loss) / tf.cast(tf.shape(yolo_output[0])[0], tf.float32)
  665. wh_loss = tf.reduce_sum(
  666. wh_loss) / tf.cast(tf.shape(yolo_output[0])[0], tf.float32)
  667. confidence_loss = tf.reduce_sum(
  668. confidence_loss) / tf.cast(tf.shape(yolo_output[0])[0],
  669. tf.float32)
  670. class_loss = tf.reduce_sum(
  671. class_loss) / tf.cast(tf.shape(yolo_output[0])[0], tf.float32)
  672. loss += xy_loss + wh_loss + confidence_loss + class_loss
  673. return loss
  674. def yolo_eval(self, yolo_outputs, image_shape, max_boxes=20):
  675. with tf.variable_scope('boxes_scores'):
  676. anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  677. boxes = []
  678. box_scores = []
  679. input_shape = tf.shape(yolo_outputs[0])[1: 3] * 32
  680. for i in range(len(yolo_outputs)):
  681. _boxes, _box_scores = (
  682. self.yolo_boxes_scores(yolo_outputs[i],
  683. self.anchors[
  684. anchor_mask[i]],
  685. len(self.class_names),
  686. input_shape,
  687. image_shape)
  688. )
  689. boxes.append(_boxes)
  690. box_scores.append(_box_scores)
  691. boxes = tf.concat(boxes, axis=0)
  692. box_scores = tf.concat(box_scores, axis=0)
  693. with tf.variable_scope('nms'):
  694. mask = box_scores >= self.obj_threshold
  695. max_boxes_tensor = tf.constant(max_boxes, dtype=tf.int32)
  696. boxes_ = []
  697. scores_ = []
  698. classes_ = []
  699. for c in range(len(self.class_names)):
  700. class_boxes = tf.boolean_mask(boxes, mask[:, c])
  701. class_box_scores = tf.boolean_mask(
  702. box_scores[:, c], mask[:, c])
  703. nms_index = tf.image.non_max_suppression(
  704. class_boxes,
  705. class_box_scores,
  706. max_boxes_tensor,
  707. iou_threshold=self.nms_threshold)
  708. class_boxes = tf.gather(class_boxes, nms_index)
  709. class_box_scores = tf.gather(class_box_scores, nms_index)
  710. classes = tf.ones_like(class_box_scores, 'int32') * c
  711. boxes_.append(class_boxes)
  712. scores_.append(class_box_scores)
  713. classes_.append(classes)
  714. with tf.variable_scope('output'):
  715. boxes_ = tf.concat(boxes_, axis=0, name='boxes')
  716. scores_ = tf.concat(scores_, axis=0, name='scores')
  717. classes_ = tf.concat(classes_, axis=0, name='classes')
  718. return boxes_, scores_, classes_
  719. class YoloConfig:
  720. gpu_index = "3"
  721. net_type = 'resnet18'
  722. anchors = [
  723. 10,
  724. 13,
  725. 16,
  726. 30,
  727. 33,
  728. 23,
  729. 30,
  730. 61,
  731. 62,
  732. 45,
  733. 59,
  734. 119,
  735. 116,
  736. 90,
  737. 156,
  738. 198,
  739. 163,
  740. 326]
  741. max_boxes = 50
  742. jitter = 0.3
  743. hue = 0.1
  744. sat = 1.0
  745. cont = 0.8
  746. bri = 0.1
  747. norm_decay = 0.99
  748. norm_epsilon = 1e-5
  749. ignore_thresh = 0.5
  750. class YOLOInference(object):
  751. # pylint: disable=too-many-arguments, too-many-instance-attributes
  752. def __init__(self, sess, pb_model_path, input_shape):
  753. """
  754. initialization
  755. """
  756. self.load_model(sess, pb_model_path)
  757. self.input_shape = input_shape
  758. def load_model(self, sess, pb_model_path):
  759. """
  760. import model and load parameters from pb file
  761. """
  762. logging.info("Import yolo model from pb start .......")
  763. with sess.as_default():
  764. with sess.graph.as_default():
  765. with tf.gfile.FastGFile(pb_model_path, 'rb') as f_handle:
  766. logging.info("ParseFromString start .......")
  767. graph_def = tf.GraphDef()
  768. graph_def.ParseFromString(f_handle.read())
  769. logging.info("ParseFromString end .......")
  770. tf.import_graph_def(graph_def, name='')
  771. logging.info("Import_graph_def end .......")
  772. logging.info("Import yolo model from pb end .......")
  773. # pylint: disable=too-many-locals
  774. # pylint: disable=invalid-name
  775. def predict(self, sess, img_data):
  776. """
  777. prediction for image rectangle by input_feed and output_feed
  778. """
  779. with sess.as_default():
  780. new_image = self.preprocess(img_data, self.input_shape)
  781. input_feed = self.create_input_feed(sess, new_image, img_data)
  782. output_fetch = self.create_output_fetch(sess)
  783. all_classes, all_scores, all_bboxes = sess.run(
  784. output_fetch, input_feed)
  785. return all_classes, all_scores, all_bboxes
  786. def create_input_feed(self, sess, new_image, img_data):
  787. """
  788. create input feed data
  789. """
  790. input_feed = {}
  791. input_img_data = sess.graph.get_tensor_by_name('images:0')
  792. input_feed[input_img_data] = new_image
  793. input_img_shape = sess.graph.get_tensor_by_name('shapes:0')
  794. input_feed[input_img_shape] = [img_data.shape[0], img_data.shape[1]]
  795. return input_feed
  796. def create_output_fetch(self, sess):
  797. """
  798. create output fetch tensors
  799. """
  800. output_classes = sess.graph.get_tensor_by_name('output/classes:0')
  801. output_scores = sess.graph.get_tensor_by_name('output/scores:0')
  802. output_boxes = sess.graph.get_tensor_by_name('output/boxes:0')
  803. output_fetch = [output_classes, output_scores, output_boxes]
  804. return output_fetch
  805. def preprocess(self, image, input_shape):
  806. """
  807. resize image with unchanged aspect ratio using padding by opencv
  808. """
  809. # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  810. h, w, _ = image.shape
  811. input_h, input_w = input_shape
  812. scale = min(float(input_w) / float(w), float(input_h) / float(h))
  813. nw = int(w * scale)
  814. nh = int(h * scale)
  815. image = cv2.resize(image, (nw, nh))
  816. new_image = np.zeros((input_h, input_w, 3), np.float32)
  817. new_image.fill(128)
  818. bh, bw, _ = new_image.shape
  819. _inx_1 = int((bh - nh) / 2)
  820. _inx_2 = nh + int((bh - nh) / 2)
  821. _inx_3 = int((bw - nw) / 2)
  822. _inx_4 = nw + int((bw - nw) / 2)
  823. new_image[_inx_1: _inx_2, _inx_3:_inx_4:] = image
  824. new_image /= 255.
  825. new_image = np.expand_dims(new_image, 0) # Add batch dimension.
  826. return new_image