|
- # Copyright 2021 The KubeEdge Authors.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
-
- import logging
-
- import numpy as np
- import random
- import tensorflow as tf
- from PIL import Image
- from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
-
- LOG = logging.getLogger(__name__)
-
- flags = tf.flags.FLAGS
-
-
- class DataGen(object):
-
- def __init__(self, config, train_data, valid_data):
-
- LOG.info("DataGen build start .......")
-
- self.input_shape = flags.input_shape
-
- self.batch_size = flags.batch_size
- self.anchors = np.array([float(x) for x in config.anchors]).reshape(-1, 2)
- self.class_names = flags.class_names
- self.num_classes = len(self.class_names)
- self.max_boxes = config.max_boxes
-
- self.train_curr_index = 0
- self.train_data = train_data
- self.train_data_size = len(self.train_data)
- LOG.info('size of train data is : %d' % self.train_data_size)
-
- self.val_curr_index = 0
- self.val_data = valid_data
- self.val_data_size = len(self.val_data)
- LOG.info('size of validation data is : %d' % self.val_data_size)
-
- self.batch_index = 0
- self.cur_shape = flags.input_shape
-
- LOG.info("DataGen build end .......")
-
- def next_batch_train(self):
- multi_scales = [self.input_shape]
- for i in range(1, 3):
- multi_scales.append((self.input_shape[0] - 32 * i, self.input_shape[1] - 32 * i))
- multi_scales.append((self.input_shape[0] + 32 * i, self.input_shape[1] + 32 * i))
-
- if self.batch_index % 25 == 0:
- self.cur_shape = random.choice(multi_scales)
-
- self.batch_index += 1
- count, batch_data = self.next_batch(self.train_curr_index, self.train_data, self.train_data_size,
- self.cur_shape, True)
-
- if not count:
- self.train_curr_index = 0
- random.shuffle(self.train_data)
- return None
- else:
- self.train_curr_index += count
- batch_data['input_shape'] = self.cur_shape
- return batch_data
-
- def next_batch_validate(self):
- count, batch_data = self.next_batch(self.val_curr_index, self.val_data, self.val_data_size, self.input_shape,
- False)
- if not count:
- self.val_curr_index = 0
- return None
- else:
- self.val_curr_index += count
- return batch_data
-
- def next_batch(self, curr_index, dataset, data_size, input_shape, is_training):
-
- count = 0
- img_data_list = []
- box_data_list = []
- while curr_index < data_size:
- if curr_index % 10000 == 0:
- LOG.info("processing label line %d" % curr_index)
-
- curr_line = dataset[curr_index]
- count += 1
- curr_index += 1
-
- if len(curr_line.strip()) <= 0:
- LOG.info("current line length less than 0......")
- continue
-
- image_data, box_data = self.read_data(curr_line, input_shape, is_training, self.max_boxes)
- if image_data is None or box_data is None:
- continue
-
- img_data_list.append(image_data)
- box_data_list.append(box_data)
-
- if len(img_data_list) >= self.batch_size:
- batch_data = dict()
- batch_data['images'] = np.array(img_data_list)
- bbox_true_13, bbox_true_26, bbox_true_52 = self.preprocess_true_boxes(np.array(box_data_list),
- input_shape)
- batch_data['bbox_true_13'] = bbox_true_13 # np.array(bbox_13_list)
- batch_data['bbox_true_26'] = bbox_true_26 # np.array(bbox_26_list)
- batch_data['bbox_true_52'] = bbox_true_52 # np.array(bbox_52_list)
- return count, batch_data
-
- LOG.info('reaching the last line of data ~~~')
- return None, None
-
- def rand(self, a=0., b=1.):
- return np.random.rand() * (b - a) + a
-
- def read_data(self, annotation_line, input_shape=416, random=True, max_boxes=50, jitter=.3, hue=.1, sat=1.5,
- val=1.5, proc_img=True):
- """
- random preprocessing for real-time data augmentation
- """
-
- line = annotation_line.split()
- image = Image.open(line[0])
- iw, ih = image.size
- h, w = input_shape
- box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
-
- if not random:
- # resize image
- scale = min(float(w) / float(iw), float(h) / float(ih))
- nw = int(iw * scale)
- nh = int(ih * scale)
- dx = (w - nw) // 2
- dy = (h - nh) // 2
- image_data = 0
- if proc_img:
- image = image.resize((nw, nh), Image.BICUBIC)
- new_image = Image.new('RGB', (w, h), (128, 128, 128))
- new_image.paste(image, (dx, dy))
- image_data = np.array(new_image) / 255.
-
- # correct boxes
- box_data = np.zeros((max_boxes, 5))
- if len(box) > 0:
- np.random.shuffle(box)
- if len(box) > max_boxes: box = box[:max_boxes]
- box[:, [0, 2]] = box[:, [0, 2]] * scale + dx
- box[:, [1, 3]] = box[:, [1, 3]] * scale + dy
- box_data[:len(box)] = box
- return image_data, box_data
- else:
- return None, None
-
- # resize image
- new_ar = float(w) / float(h) * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
- scale = self.rand(.25, 2)
-
- if new_ar < 1:
- nh = int(scale * h)
- nw = int(nh * new_ar)
- else:
- nw = int(scale * w)
- nh = int(nw / new_ar)
- image = image.resize((nw, nh), Image.BICUBIC)
-
- # place image
- dx = int(self.rand(0, w - nw))
- dy = int(self.rand(0, h - nh))
- new_image = Image.new('RGB', (w, h), (128, 128, 128))
- new_image.paste(image, (dx, dy))
- image = new_image
-
- # flip image or not
- flip = self.rand() < .5
- if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
-
- # convert image to gray or not
- gray = self.rand() < .25
- if gray: image = image.convert('L').convert('RGB')
-
- # distort image
- hue = self.rand(-hue, hue)
- sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
- val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)
- x = rgb_to_hsv(np.array(image) / 255.)
- x[..., 0] += hue
- x[..., 0][x[..., 0] > 1] -= 1
- x[..., 0][x[..., 0] < 0] += 1
- x[..., 1] *= sat
- x[..., 2] *= val
- x[x > 1] = 1
- x[x < 0] = 0
- image_data = hsv_to_rgb(x) # numpy array, 0 to 1
-
- # correct boxes
- box_data = np.zeros((max_boxes, 5))
- if len(box) > 0:
- np.random.shuffle(box)
- box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
- box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
- if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
- box[:, 0:2][box[:, 0:2] < 0] = 0
- box[:, 2][box[:, 2] > w] = w
- box[:, 3][box[:, 3] > h] = h
- box_w = box[:, 2] - box[:, 0]
- box_h = box[:, 3] - box[:, 1]
- box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box
-
- if len(box) > max_boxes:
- box = box[:max_boxes]
- if len(box) == 0:
- return None, None
-
- box_data[:len(box)] = box
-
- return image_data, box_data
-
- def preprocess_true_boxes(self, true_boxes, in_shape=416):
- """Preprocesses the ground truth box of the training data
-
- :param true_boxes: ground truth box shape is [boxes, 5], x_min, y_min,
- x_max, y_max, class_id
- """
-
- num_layers = self.anchors.shape[0] // 3
- anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
- true_boxes = np.array(true_boxes, dtype='float32')
- # input_shape = np.array([in_shape, in_shape], dtype='int32')
- input_shape = np.array(in_shape, dtype='int32')
- boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2.
- boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
- true_boxes[..., 0:2] = boxes_xy / input_shape[::-1]
- true_boxes[..., 2:4] = boxes_wh / input_shape[::-1]
-
- m = true_boxes.shape[0]
- grid_shapes = [input_shape // 32, input_shape // 16, input_shape // 8]
- y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + self.num_classes),
- dtype='float32') for l in range(num_layers)]
- # The dimension is expanded to calculate the IOU between the
- # anchors of all boxes in each graph by broadcasting
- anchors = np.expand_dims(self.anchors, 0)
- anchors_max = anchors / 2.
- anchors_min = -anchors_max
- # Because we padded the box before, we need to remove all 0 lines
- valid_mask = boxes_wh[..., 0] > 0
-
- for b in range(m):
- wh = boxes_wh[b, valid_mask[b]]
- if len(wh) == 0: continue
-
- # Expanding dimensions for broadcasting applications
- wh = np.expand_dims(wh, -2)
- # wh shape is [box_num, 1, 2]
- boxes_max = wh / 2.
- boxes_min = -boxes_max
-
- intersect_min = np.maximum(boxes_min, anchors_min)
- intersect_max = np.minimum(boxes_max, anchors_max)
- intersect_wh = np.maximum(intersect_max - intersect_min, 0.)
- intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
- box_area = wh[..., 0] * wh[..., 1]
- anchor_area = anchors[..., 0] * anchors[..., 1]
- iou = intersect_area / (box_area + anchor_area - intersect_area)
-
- # Find out the largest anchor box with the IOU of the ground truth
- # box, and then set the corresponding positions of different
- # proportions responsible for the ground turn box as the
- # coordinates of the ground truth box
- best_anchor = np.argmax(iou, axis=-1)
- for t, n in enumerate(best_anchor):
- for l in range(num_layers):
- if n in anchor_mask[l]:
- i = np.floor(true_boxes[b, t, 0] * grid_shapes[l][1]).astype('int32')
- j = np.floor(true_boxes[b, t, 1] * grid_shapes[l][0]).astype('int32')
- k = anchor_mask[l].index(n)
-
- c = true_boxes[b, t, 4].astype('int32')
- y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
- y_true[l][b, j, i, k, 4] = 1.
- y_true[l][b, j, i, k, 5 + c] = 1.
- return y_true[0], y_true[1], y_true[2]
|