Browse Source

Add lib code for incremental learning feature

Signed-off-by: khalid-davis <huangqinkai1@huawei.com>
tags/v0.1.0
khalid-davis llhuii 4 years ago
parent
commit
6072a66a11
9 changed files with 210 additions and 7 deletions
  1. +13
    -0
      build/worker/base_images/tensorflow/tensorflow-2.3.Dockerfile
  2. +2
    -2
      examples/surface_defect_detection/training_worker/inference.py
  3. +1
    -1
      examples/surface_defect_detection/training_worker/train.py
  4. +1
    -1
      lib/neptune/__init__.py
  5. +1
    -0
      lib/neptune/common/constant.py
  6. +15
    -3
      lib/neptune/dataset/dataset.py
  7. +1
    -0
      lib/neptune/incremental_learning/__init__.py
  8. +174
    -0
      lib/neptune/incremental_learning/incremental_learning.py
  9. +2
    -0
      lib/requirements.txt

+ 13
- 0
build/worker/base_images/tensorflow/tensorflow-2.3.Dockerfile View File

@@ -0,0 +1,13 @@
FROM tensorflow/tensorflow:2.3.0

RUN apt update \
&& apt install -y libgl1-mesa-glx
COPY ./lib/requirements.txt /home
RUN pip install -r /home/requirements.txt

ENV PYTHONPATH "/home/lib"

WORKDIR /home/work
COPY ./lib /home/lib

ENTRYPOINT ["python"]

+ 2
- 2
examples/surface_defect_detection/training_worker/inference.py View File

@@ -2,13 +2,13 @@ import logging

import numpy as np

from neptune.ml_model import load_model
import neptune.ml_model
from neptune.ml_model import load_model

LOG = logging.getLogger(__name__)

if __name__ == '__main__':
valid_data = neptune.load_test_dataset(data_format="txt")
valid_data = neptune.load_test_dataset(data_format="txt", with_image=True)

x_valid = np.array([tup[0] for tup in valid_data])
y_valid = np.array([tup[1] for tup in valid_data])


+ 1
- 1
examples/surface_defect_detection/training_worker/train.py View File

@@ -8,7 +8,7 @@ from network import GlobalModelInspectionCNN

def main():
# load dataset.
train_data = neptune.load_train_dataset(data_format="txt")
train_data = neptune.load_train_dataset(data_format="txt", with_image=True)

x = np.array([tup[0] for tup in train_data])
y = np.array([tup[1] for tup in train_data])


+ 1
- 1
lib/neptune/__init__.py View File

@@ -1,6 +1,6 @@
import logging

from . import joint_inference, federated_learning
from . import joint_inference, federated_learning, incremental_learning
from .context import context
from .dataset.dataset import load_train_dataset, load_test_dataset



+ 1
- 0
lib/neptune/common/constant.py View File

@@ -18,6 +18,7 @@ class Framework(Enum):
class K8sResourceKind(Enum):
JOINT_INFERENCE_SERVICE = "jointinferenceservice"
FEDERATED_LEARNING_JOB = "federatedlearningjob"
INCREMENTAL_JOB = "incrementallearningjob"


class K8sResourceKindStatus(Enum):


+ 15
- 3
lib/neptune/dataset/dataset.py View File

@@ -21,9 +21,12 @@ def _load_dataset(dataset_url, format, **kwargs):
LOG.warning(f'dataset_url is None, please check the url.')
return None
if format == 'txt':
LOG.info("dataset format is txt, now loading txt from "
f"[{dataset_url}]")
return _load_txt_dataset(dataset_url)
LOG.info(
f"dataset format is txt, now loading txt from [{dataset_url}]")
if kwargs.get('with_image'):
return _load_txt_dataset_with_image(dataset_url)
else:
return _load_txt_dataset(dataset_url)


def load_train_dataset(data_format, **kwargs):
@@ -45,6 +48,15 @@ def load_test_dataset(data_format, **kwargs):


def _load_txt_dataset(dataset_url):
LOG.info(f'dataset_url is {dataset_url}, now reading dataset_url')
root_path = BaseConfig.data_path_prefix
with open(dataset_url) as f:
lines = f.readlines()
new_lines = [root_path + os.path.sep + l for l in lines]
return new_lines


def _load_txt_dataset_with_image(dataset_url):
import keras.preprocessing.image as img_preprocessing
root_path = os.path.dirname(dataset_url)
img_data = []


+ 1
- 0
lib/neptune/incremental_learning/__init__.py View File

@@ -0,0 +1 @@
from .incremental_learning import *

+ 174
- 0
lib/neptune/incremental_learning/incremental_learning.py View File

@@ -0,0 +1,174 @@
import logging

import os
import tensorflow as tf

import neptune
from neptune.common.config import BaseConfig
from neptune.common.constant import K8sResourceKindStatus, K8sResourceKind
from neptune.common.utils import clean_folder, remove_path_prefix
from neptune.hard_example_mining import CrossEntropyFilter, IBTFilter, \
ThresholdFilter
from neptune.joint_inference import TSLittleModel
from neptune.lc_client import LCClient

LOG = logging.getLogger(__name__)


class IncrementalConfig(BaseConfig):
def __init__(self):
BaseConfig.__init__(self)
self.model_urls = os.getenv("MODEL_URLS")
self.base_model_url = os.getenv("BASE_MODEL_URL")


def train(model, train_data, epochs, batch_size, class_names, input_shape,
obj_threshold, nms_threshold):
"""The train endpoint of incremental learning.

:param model: the train model
:param train_data: the data use for train
:param epochs: the number of epochs for training the model
:param batch_size: the number of samples in a training
:param class_names:
:param input_shape:
:param obj_threshold:
:param nms_threshold:
"""
il_config = IncrementalConfig()

clean_folder(il_config.model_url)
model.train(train_data, []) # validation data is empty.
tf.reset_default_graph()
model.save_model_pb()

ckpt_model_url = remove_path_prefix(il_config.model_url,
il_config.data_path_prefix)
pb_model_url = remove_path_prefix(
os.path.join(il_config.model_url, 'model.pb'),
il_config.data_path_prefix)

# TODO delete metrics whether affect lc
ckpt_result = {
"format": "ckpt",
"url": ckpt_model_url,
}

pb_result = {
"format": "pb",
"url": pb_model_url,
}

results = [ckpt_result, pb_result]

message = {
"name": il_config.worker_name,
"namespace": il_config.namespace,
"ownerName": il_config.job_name,
"ownerKind": K8sResourceKind.INCREMENTAL_JOB.value,
"kind": "train",
"status": K8sResourceKindStatus.COMPLETED.value,
"results": results
}
LCClient.send(il_config.worker_name, message)


def evaluate(model, test_data, class_names, input_shape):
"""The evaluation endpoint of incremental job.

:param model: the model used for evaluation
:param test_data:
:param class_names:
:param input_shape: the input shape of model
"""
il_config = IncrementalConfig()

results = []
for model_url in il_config.model_urls.split(';'):
precision, recall, all_precisions, all_recalls = model(
model_path=model_url,
test_dataset=test_data,
class_names=class_names,
input_shape=input_shape)

result = {
"format": "pb",
"url": remove_path_prefix(model_url, il_config.data_path_prefix),
"metrics": {
"recall": recall,
"precision": precision
}
}
results.append(result)

message = {
"name": il_config.worker_name,
"namespace": il_config.namespace,
"ownerName": il_config.job_name,
"ownerKind": K8sResourceKind.INCREMENTAL_JOB.value,
"kind": "eval",
"status": K8sResourceKindStatus.COMPLETED.value,
"results": results
}

LCClient.send(il_config.worker_name, message)


class TSModel(TSLittleModel):
def __init__(self, preprocess=None, postprocess=None, input_shape=(0, 0),
create_input_feed=None, create_output_fetch=None):
TSLittleModel.__init__(self, preprocess, postprocess, input_shape,
create_input_feed, create_output_fetch)


class InferenceResult:
def __init__(self, is_hard_example, infer_result):
self.is_hard_example = is_hard_example
self.infer_result = infer_result


class Inference:
def __init__(self, model: TSModel, hard_example_mining_algorithm=None):
if hard_example_mining_algorithm is None:
hem_name = BaseConfig.hem_name

if hem_name == "IBT":
threshold_box = float(neptune.context.get_hem_parameters(
"threshold_box", 0.8
))
threshold_img = float(neptune.context.get_hem_parameters(
"threshold_img", 0.8
))
hard_example_mining_algorithm = IBTFilter(threshold_img,
threshold_box)
elif hem_name == "CrossEntropy":
threshold_cross_entropy = float(
neptune.context.get_hem_parameters(
"threshold_cross_entropy", 0.5
)
)
hard_example_mining_algorithm = CrossEntropyFilter(
threshold_cross_entropy)
else:
hard_example_mining_algorithm = ThresholdFilter()
self.hard_example_mining_algorithm = hard_example_mining_algorithm
self.model = model

def inference(self, img_data) -> InferenceResult:
result = self.model.inference(img_data)
bboxes = deal_infer_rsl(result)
is_hard_example = self.hard_example_mining_algorithm.hard_judge(bboxes)
if is_hard_example:
return InferenceResult(True, result)
else:
return InferenceResult(False, result)


def deal_infer_rsl(model_output):
all_classes, all_scores, all_bboxes = model_output
bboxes = []
for c, s, bbox in zip(all_classes, all_scores, all_bboxes):
bbox[0], bbox[1], bbox[2], bbox[3] = bbox[1], bbox[0], bbox[3], bbox[2]
bboxes.append(bbox.tolist() + [s, c])

return bboxes

+ 2
- 0
lib/requirements.txt View File

@@ -4,3 +4,5 @@ opencv-python==4.4.0.44
websockets==8.1
Pillow==8.0.1
requests==2.24.0
tqdm==4.56.0
matplotlib==3.3.3

Loading…
Cancel
Save