You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

YOLOv3.cs 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. using NumSharp;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. using Tensorflow;
  6. using static Tensorflow.Binding;
  7. namespace TensorFlowNET.Examples.ImageProcessing.YOLO
  8. {
  9. public class YOLOv3
  10. {
  11. Config cfg;
  12. Tensor trainable;
  13. Tensor input_data;
  14. Dictionary<int, string> classes;
  15. int num_class;
  16. NDArray strides;
  17. NDArray anchors;
  18. int anchor_per_scale;
  19. float iou_loss_thresh;
  20. string upsample_method;
  21. Tensor conv_lbbox;
  22. Tensor conv_mbbox;
  23. Tensor conv_sbbox;
  24. Tensor pred_sbbox;
  25. Tensor pred_mbbox;
  26. Tensor pred_lbbox;
  27. public YOLOv3(Config cfg_, Tensor input_data_, Tensor trainable_)
  28. {
  29. cfg = cfg_;
  30. input_data = input_data_;
  31. trainable = trainable_;
  32. classes = Utils.read_class_names(cfg.YOLO.CLASSES);
  33. num_class = len(classes);
  34. strides = np.array(cfg.YOLO.STRIDES);
  35. anchors = Utils.get_anchors(cfg.YOLO.ANCHORS);
  36. anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE;
  37. iou_loss_thresh = cfg.YOLO.IOU_LOSS_THRESH;
  38. upsample_method = cfg.YOLO.UPSAMPLE_METHOD;
  39. (conv_lbbox, conv_mbbox, conv_sbbox) = __build_nework(input_data);
  40. tf_with(tf.variable_scope("pred_sbbox"), scope =>
  41. {
  42. pred_sbbox = decode(conv_sbbox, anchors[0], strides[0]);
  43. });
  44. tf_with(tf.variable_scope("pred_mbbox"), scope =>
  45. {
  46. pred_mbbox = decode(conv_mbbox, anchors[1], strides[1]);
  47. });
  48. tf_with(tf.variable_scope("pred_lbbox"), scope =>
  49. {
  50. pred_lbbox = decode(conv_lbbox, anchors[2], strides[2]);
  51. });
  52. }
  53. private (Tensor, Tensor, Tensor) __build_nework(Tensor input_data)
  54. {
  55. Tensor route_1, route_2;
  56. (route_1, route_2, input_data) = backbone.darknet53(input_data, trainable);
  57. input_data = common.convolutional(input_data, new[] { 1, 1, 1024, 512 }, trainable, "conv52");
  58. input_data = common.convolutional(input_data, new[] { 3, 3, 512, 1024 }, trainable, "conv53");
  59. input_data = common.convolutional(input_data, new[] { 1, 1, 1024, 512 }, trainable, "conv54");
  60. input_data = common.convolutional(input_data, new[] { 3, 3, 512, 1024 }, trainable, "conv55");
  61. input_data = common.convolutional(input_data, new[] { 1, 1, 1024, 512 }, trainable, "conv56");
  62. var conv_lobj_branch = common.convolutional(input_data, new[] { 3, 3, 512, 1024 }, trainable, name: "conv_lobj_branch");
  63. var conv_lbbox = common.convolutional(conv_lobj_branch, new[] { 1, 1, 1024, 3 * (num_class + 5) },
  64. trainable: trainable, name: "conv_lbbox", activate: false, bn: false);
  65. input_data = common.convolutional(input_data, new[] { 1, 1, 512, 256 }, trainable, "conv57");
  66. input_data = common.upsample(input_data, name: "upsample0", method: upsample_method);
  67. tf_with(tf.variable_scope("route_1"), delegate
  68. {
  69. input_data = tf.concat(new[] { input_data, route_2 }, axis: -1);
  70. });
  71. input_data = common.convolutional(input_data, new[] { 1, 1, 768, 256 }, trainable, "conv58");
  72. input_data = common.convolutional(input_data, new[] { 3, 3, 256, 512 }, trainable, "conv59");
  73. input_data = common.convolutional(input_data, new[] { 1, 1, 512, 256 }, trainable, "conv60");
  74. input_data = common.convolutional(input_data, new[] { 3, 3, 256, 512 }, trainable, "conv61");
  75. input_data = common.convolutional(input_data, new[] { 1, 1, 512, 256 }, trainable, "conv62");
  76. var conv_mobj_branch = common.convolutional(input_data, new[] { 3, 3, 256, 512 }, trainable, name: "conv_mobj_branch");
  77. conv_mbbox = common.convolutional(conv_mobj_branch, new[] { 1, 1, 512, 3 * (num_class + 5) },
  78. trainable: trainable, name: "conv_mbbox", activate: false, bn: false);
  79. input_data = common.convolutional(input_data, new[] { 1, 1, 256, 128 }, trainable, "conv63");
  80. input_data = common.upsample(input_data, name: "upsample1", method: upsample_method);
  81. tf_with(tf.variable_scope("route_2"), delegate
  82. {
  83. input_data = tf.concat(new[] { input_data, route_1 }, axis: -1);
  84. });
  85. input_data = common.convolutional(input_data, new[] { 1, 1, 384, 128 }, trainable, "conv64");
  86. input_data = common.convolutional(input_data, new[] { 3, 3, 128, 256 }, trainable, "conv65");
  87. input_data = common.convolutional(input_data, new[] { 1, 1, 256, 128 }, trainable, "conv66");
  88. input_data = common.convolutional(input_data, new[] { 3, 3, 128, 256 }, trainable, "conv67");
  89. input_data = common.convolutional(input_data, new[] { 1, 1, 256, 128 }, trainable, "conv68");
  90. var conv_sobj_branch = common.convolutional(input_data, new[] { 3, 3, 128, 256 }, trainable, name: "conv_sobj_branch");
  91. conv_sbbox = common.convolutional(conv_sobj_branch, new[] { 1, 1, 256, 3 * (num_class + 5) },
  92. trainable: trainable, name: "conv_sbbox", activate: false, bn: false);
  93. return (conv_lbbox, conv_mbbox, conv_sbbox);
  94. }
  95. private Tensor decode(Tensor conv_output, NDArray anchors, int stride)
  96. {
  97. var conv_shape = tf.shape(conv_output);
  98. var batch_size = conv_shape[0];
  99. var output_size = conv_shape[1];
  100. anchor_per_scale = len(anchors);
  101. conv_output = tf.reshape(conv_output, new object[] { batch_size, output_size, output_size, anchor_per_scale, 5 + num_class });
  102. var conv_raw_dxdy = conv_output[":", ":", ":", ":", "0:2"];
  103. var conv_raw_dwdh = conv_output[":", ":", ":", ":", "2:4"];
  104. var conv_raw_conf = conv_output[":", ":", ":", ":", "4:5"];
  105. var conv_raw_prob = conv_output[":", ":", ":", ":", "5:"];
  106. var y = tf.tile(tf.range(output_size, dtype: tf.int32)[":", tf.newaxis], new object[] { 1, output_size });
  107. var x = tf.tile(tf.range(output_size, dtype: tf.int32)[tf.newaxis, ":"], new object[] { output_size, 1 });
  108. var xy_grid = tf.concat(new[] { x[":", ":", tf.newaxis], y[":", ":", tf.newaxis] }, axis: -1);
  109. xy_grid = tf.tile(xy_grid[tf.newaxis, ":", ":", tf.newaxis, ":"], new object[] { batch_size, 1, 1, anchor_per_scale, 1 });
  110. xy_grid = tf.cast(xy_grid, tf.float32);
  111. var pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * stride;
  112. var pred_wh = (tf.exp(conv_raw_dwdh) * anchors) * stride;
  113. var pred_xywh = tf.concat(new[] { pred_xy, pred_wh }, axis: -1);
  114. var pred_conf = tf.sigmoid(conv_raw_conf);
  115. var pred_prob = tf.sigmoid(conv_raw_prob);
  116. return tf.concat(new[] { pred_xywh, pred_conf, pred_prob }, axis: -1);
  117. }
  118. public (Tensor, Tensor, Tensor) compute_loss(Tensor label_sbbox, Tensor label_mbbox, Tensor label_lbbox,
  119. Tensor true_sbbox, Tensor true_mbbox, Tensor true_lbbox)
  120. {
  121. Tensor giou_loss = null, conf_loss = null, prob_loss = null;
  122. (Tensor, Tensor, Tensor) loss_sbbox = (null, null, null);
  123. (Tensor, Tensor, Tensor) loss_mbbox = (null, null, null);
  124. (Tensor, Tensor, Tensor) loss_lbbox = (null, null, null);
  125. tf_with(tf.name_scope("smaller_box_loss"), delegate
  126. {
  127. loss_sbbox = loss_layer(conv_sbbox, pred_sbbox, label_sbbox, true_sbbox,
  128. anchors: anchors[0], stride: strides[0]);
  129. });
  130. tf_with(tf.name_scope("medium_box_loss"), delegate
  131. {
  132. loss_mbbox = loss_layer(conv_mbbox, pred_mbbox, label_mbbox, true_mbbox,
  133. anchors: anchors[1], stride: strides[1]);
  134. });
  135. tf_with(tf.name_scope("bigger_box_loss"), delegate
  136. {
  137. loss_lbbox = loss_layer(conv_lbbox, pred_lbbox, label_lbbox, true_lbbox,
  138. anchors: anchors[2], stride: strides[2]);
  139. });
  140. tf_with(tf.name_scope("giou_loss"), delegate
  141. {
  142. giou_loss = loss_sbbox.Item1 + loss_mbbox.Item1 + loss_lbbox.Item1;
  143. });
  144. tf_with(tf.name_scope("conf_loss"), delegate
  145. {
  146. conf_loss = loss_sbbox.Item2 + loss_mbbox.Item2 + loss_lbbox.Item2;
  147. });
  148. tf_with(tf.name_scope("prob_loss"), delegate
  149. {
  150. prob_loss = loss_sbbox.Item3 + loss_mbbox.Item3 + loss_lbbox.Item3;
  151. });
  152. return (giou_loss, conf_loss, prob_loss);
  153. }
  154. public (Tensor, Tensor, Tensor) loss_layer(Tensor conv, Tensor pred, Tensor label, Tensor bboxes, NDArray anchors, int stride)
  155. {
  156. var conv_shape = tf.shape(conv);
  157. var batch_size = conv_shape[0];
  158. var output_size = conv_shape[1];
  159. var input_size = stride * output_size;
  160. conv = tf.reshape(conv, new object[] {batch_size, output_size, output_size,
  161. anchor_per_scale, 5 + num_class });
  162. var conv_raw_conf = conv[":", ":", ":", ":", "4:5"];
  163. var conv_raw_prob = conv[":", ":", ":", ":", "5:"];
  164. var pred_xywh = pred[":", ":", ":", ":", "0:4"];
  165. var pred_conf = pred[":", ":", ":", ":", "4:5"];
  166. var label_xywh = label[":", ":", ":", ":", "0:4"];
  167. var respond_bbox = label[":", ":", ":", ":", "4:5"];
  168. var label_prob = label[":", ":", ":", ":", "5:"];
  169. var giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis: -1);
  170. input_size = tf.cast(input_size, tf.float32);
  171. var bbox_loss_scale = 2.0 - 1.0 * label_xywh[":", ":", ":", ":", "2:3"] * label_xywh[":", ":", ":", ":", "3:4"] / (tf.sqrt(input_size));
  172. var giou_loss = respond_bbox * bbox_loss_scale * (1 - giou);
  173. var iou = bbox_iou(pred_xywh[":", ":", ":", ":", tf.newaxis, ":"], bboxes[":", tf.newaxis, tf.newaxis, tf.newaxis, ":", ":"]);
  174. var max_iou = tf.expand_dims(tf.reduce_max(iou, axis: new[] { -1 }), axis: -1);
  175. var respond_bgd = (1.0 - respond_bbox) * tf.cast(max_iou < iou_loss_thresh, tf.float32);
  176. var conf_focal = focal(respond_bbox, pred_conf);
  177. var conf_loss = conf_focal * (
  178. respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels: respond_bbox, logits: conv_raw_conf) +
  179. respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels: respond_bbox, logits: conv_raw_conf));
  180. var prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels: label_prob, logits: conv_raw_prob);
  181. giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis: new[] { 1, 2, 3, 4 }));
  182. conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis: new[] { 1, 2, 3, 4 }));
  183. prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis: new[] { 1, 2, 3, 4 }));
  184. return (giou_loss, conf_loss, prob_loss);
  185. }
  186. public Tensor focal(Tensor target, Tensor actual, int alpha = 1, int gamma = 2)
  187. {
  188. var focal_loss = alpha * tf.pow(tf.abs(target - actual), gamma);
  189. return focal_loss;
  190. }
  191. public Tensor bbox_giou(Tensor boxes1, Tensor boxes2)
  192. {
  193. boxes1 = tf.concat(new[] { boxes1["...", ":2"] - boxes1["...", "2:"] * 0.5,
  194. boxes1["...", ":2"] + boxes1["...", "2:"] * 0.5}, axis: -1);
  195. boxes2 = tf.concat(new[] { boxes2["...", ":2"] - boxes2["...", "2:"] * 0.5,
  196. boxes2["...", ":2"] + boxes2["...", "2:"] * 0.5}, axis: -1);
  197. boxes1 = tf.concat(new[] { tf.minimum(boxes1["...", ":2"], boxes1["...", "2:"]),
  198. tf.maximum(boxes1["...", ":2"], boxes1["...", "2:"])}, axis: -1);
  199. boxes2 = tf.concat(new[] { tf.minimum(boxes2["...", ":2"], boxes2["...", "2:"]),
  200. tf.maximum(boxes2["...", ":2"], boxes2["...", "2:"])}, axis: -1);
  201. var boxes1_area = (boxes1["...", "2"] - boxes1["...", "0"]) * (boxes1["...", "3"] - boxes1["...", "1"]);
  202. var boxes2_area = (boxes2["...", "2"] - boxes2["...", "0"]) * (boxes2["...", "3"] - boxes2["...", "1"]);
  203. var left_up = tf.maximum(boxes1["...", ":2"], boxes2["...", ":2"]);
  204. var right_down = tf.minimum(boxes1["...", "2:"], boxes2["...", "2:"]);
  205. var inter_section = tf.maximum(right_down - left_up, 0.0f);
  206. var inter_area = inter_section["...", "0"] * inter_section["...", "1"];
  207. var union_area = boxes1_area + boxes2_area - inter_area;
  208. var iou = inter_area / union_area;
  209. var enclose_left_up = tf.minimum(boxes1["...", ":2"], boxes2["...", ":2"]);
  210. var enclose_right_down = tf.maximum(boxes1["...", "2:"], boxes2["...", "2:"]);
  211. var enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0);
  212. var enclose_area = enclose["...", "0"] * enclose["...", "1"];
  213. var giou = iou - 1.0 * (enclose_area - union_area) / enclose_area;
  214. return giou;
  215. }
  216. public Tensor bbox_iou(Tensor boxes1, Tensor boxes2)
  217. {
  218. var boxes1_area = boxes1["...", "2"] * boxes1["...", "3"];
  219. var boxes2_area = boxes2["...", "2"] * boxes2["...", "3"];
  220. boxes1 = tf.concat(new[] { boxes1["...", ":2"] - boxes1["...", "2:"] * 0.5,
  221. boxes1["...", ":2"] + boxes1["...", "2:"] * 0.5}, axis: -1);
  222. boxes2 = tf.concat(new[] { boxes2["...", ":2"] - boxes2["...", "2:"] * 0.5,
  223. boxes2["...", ":2"] + boxes2["...", "2:"] * 0.5}, axis: -1);
  224. var left_up = tf.maximum(boxes1["...", ":2"], boxes2["...", ":2"]);
  225. var right_down = tf.minimum(boxes1["...", "2:"], boxes2["...", "2:"]);
  226. var inter_section = tf.maximum(right_down - left_up, 0.0);
  227. var inter_area = inter_section["...", "0"] * inter_section["...", "1"];
  228. var union_area = boxes1_area + boxes2_area - inter_area;
  229. var iou = 1.0 * inter_area / union_area;
  230. return iou;
  231. }
  232. }
  233. }