Browse Source

!666 add lineage writer and st for optimizer

Merge pull request !666 from luopengting/optimizer_tests
tags/v1.0.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
48054f90e8
7 changed files with 449 additions and 2 deletions
  1. +2
    -1
      tests/st/func/lineagemgr/conftest.py
  2. +14
    -0
      tests/st/func/optimizer/__init__.py
  3. +66
    -0
      tests/st/func/optimizer/conftest.py
  4. +14
    -0
      tests/st/func/optimizer/targets/__init__.py
  5. +89
    -0
      tests/st/func/optimizer/targets/test_targets_restful_api.py
  6. +2
    -1
      tests/utils/lineage_writer/__init__.py
  7. +262
    -0
      tests/utils/lineage_writer/_lineage_writer.py

+ 2
- 1
tests/st/func/lineagemgr/conftest.py View File

@@ -26,7 +26,7 @@ from ....utils.mindspore.dataset.engine.serializer_deserializer import SERIALIZE

sys.modules['mindspore'] = mindspore

BASE_SUMMARY_DIR = tempfile.mkdtemp(prefix='test_lineage_summary_dir_base_')
BASE_SUMMARY_DIR = tempfile.NamedTemporaryFile(prefix='test_lineage_summary_dir_base_').name
SUMMARY_DIR = os.path.join(BASE_SUMMARY_DIR, 'run1')
SUMMARY_DIR_2 = os.path.join(BASE_SUMMARY_DIR, 'run2')
SUMMARY_DIR_3 = os.path.join(BASE_SUMMARY_DIR, 'except_run')
@@ -35,6 +35,7 @@ COLLECTION_MODULE = 'TestModelLineage'
API_MODULE = 'TestModelApi'
DATASET_GRAPH = SERIALIZED_PIPELINE


def get_module_name(nodeid):
"""Get the module name from nodeid."""
_, module_name, _ = nodeid.split("::")


+ 14
- 0
tests/st/func/optimizer/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 66
- 0
tests/st/func/optimizer/conftest.py View File

@@ -0,0 +1,66 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The st config for optimizer."""

import os
import shutil
import tempfile
from unittest.mock import Mock

import pytest
from flask import Response

from mindinsight.lineagemgr.cache_item_updater import LineageCacheItemUpdater
from mindinsight.datavisual.data_transform import data_manager
from mindinsight.datavisual.data_transform.data_manager import DataManager
from mindinsight.datavisual.utils import tools

SUMMARY_BASE_DIR = tempfile.NamedTemporaryFile(prefix='test_optimizer_summary_dir_base_').name
MOCK_DATA_MANAGER = DataManager(SUMMARY_BASE_DIR)
MOCK_DATA_MANAGER.register_brief_cache_item_updater(LineageCacheItemUpdater())
MOCK_DATA_MANAGER.start_load_data().join()


@pytest.fixture(scope="session")
def init_summary_logs():
"""Create summary directory."""
try:
if os.path.exists(SUMMARY_BASE_DIR):
shutil.rmtree(SUMMARY_BASE_DIR)
permissions = os.R_OK | os.W_OK | os.X_OK
mode = permissions << 6
if not os.path.exists(SUMMARY_BASE_DIR):
os.mkdir(SUMMARY_BASE_DIR, mode=mode)
yield
finally:
if os.path.exists(SUMMARY_BASE_DIR):
shutil.rmtree(SUMMARY_BASE_DIR)


@pytest.fixture
def client():
"""This fixture is flask client."""
data_manager.DATA_MANAGER = MOCK_DATA_MANAGER

packages = ["mindinsight.backend.optimizer"]

mock_obj = Mock(return_value=packages)
tools.find_app_package = mock_obj

from mindinsight.backend.application import APP
APP.response_class = Response
app_client = APP.test_client()

yield app_client

+ 14
- 0
tests/st/func/optimizer/targets/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 89
- 0
tests/st/func/optimizer/targets/test_targets_restful_api.py View File

@@ -0,0 +1,89 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Test targets restful api."""
import json
import pytest

from ..conftest import MOCK_DATA_MANAGER, SUMMARY_BASE_DIR
from .....utils.lineage_writer import LineageWriter
from .....utils.lineage_writer.base import Metadata

BASE_URL = '/v1/mindinsight/optimizer/targets/search'


class TestTargets:
"""Test Histograms."""

def setup_class(self):
"""Setup class."""
learning_rate = [0.01, 0.001, 0.02, 0.04, 0.05]
acc = [0.8, 0.9, 0.8, 0.7, 0.6]
self._train_ids = []
params = {}
for i, lr in enumerate(learning_rate):
train_id = f'./train_{i + 1}'
self._train_ids.append(train_id)
params.update({
train_id: {
'train': {Metadata.learning_rate: lr},
'eval': {Metadata.metrics: json.dumps({'acc': acc[i]})}
}
})

lineage_writer = LineageWriter(SUMMARY_BASE_DIR)
lineage_writer.create_summaries(train_job_num=5, params=params)

MOCK_DATA_MANAGER.start_load_data().join()

@pytest.mark.level0
@pytest.mark.env_single
@pytest.mark.platform_x86_cpu
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_gpu_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.usefixtures("init_summary_logs")
def test_targets_search_success(self, client):
"""Test searching targets successfully."""
search_conditions = {
'summary_dir': {
'in': self._train_ids
}
}
response = client.post(BASE_URL, data=json.dumps(search_conditions))
result = json.loads(response.data)

# test metric name
metric_names = [target.get('name') for target in result.get('targets')]
acc_name = '[M]acc'
assert acc_name in metric_names

# test target bucket
acc_index = metric_names.index(acc_name)
acc_info = result.get('targets')[acc_index]
buckets = acc_info.get('buckets')
test_bucket_index = 3
expected_bucket = [0.78, 0.06, 2]

for index, exp_value in enumerate(expected_bucket):
assert abs(buckets[test_bucket_index][index] - exp_value) < 1e-5

# test importance
hyper_params_info = acc_info.get('hyper_parameters')
hyper_names = [param_info.get('name') for param_info in hyper_params_info]
assert Metadata.learning_rate in hyper_names

exp_value = 0.9714143567416961
lr_index = hyper_names.index(Metadata.learning_rate)
assert abs(hyper_params_info[lr_index].get('importance') - exp_value) < 1e-5

+ 2
- 1
tests/utils/lineage_writer/__init__.py View File

@@ -15,5 +15,6 @@
"""Lineage writer module."""

from ._summary_record import LineageSummary
from ._lineage_writer import LineageWriter

__all__ = ["LineageSummary"]
__all__ = ["LineageSummary", "LineageWriter"]

+ 262
- 0
tests/utils/lineage_writer/_lineage_writer.py View File

@@ -0,0 +1,262 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Lineage writer to record lineage to summary log."""
import json
import os
import random
import shutil

from ._summary_record import LineageSummary
from .base import Metadata

CHILDREN_0 = {
'dataset_dir': '/home/anthony/MindData/tests/dataset/data/testMnistData',
'op_module': 'minddata.dataengine.datasets',
'num_shards': None,
'num_parallel_workers': None,
'shuffle': None,
'op_type': 'MnistDataset',
'shard_id': None,
'num_samples': 100,
'sampler': {
'sampler_module': 'minddata.dataengine.samplers',
'sampler_name': 'RandomSampler',
'replacement': True,
'num_samples': 100
},
'children': []
}

CHILDREN_1 = {
'op_type': 'MapDataset',
'op_module': 'minddata.dataengine.datasets',
'num_parallel_workers': None,
'input_columns': ['image'],
'operations': [],
'children': []
}

CHILDREN_2 = {
'op_type': 'MapDataset',
'op_module': 'minddata.dataengine.datasets',
'num_parallel_workers': None,
'output_columns': [None],
'input_columns': ['label'],
'operations': [{
'tensor_op_module': 'minddata.transforms.c_transforms',
'tensor_op_name': 'OneHot',
'num_classes': 10
}],
'children': []
}

CHILDREN_3 = {
'op_type': 'ShuffleDataset',
'op_module': 'minddata.dataengine.datasets',
'num_parallel_workers': None,
'buffer_size': 10,
'children': []
}


def _get_operations(rescale=0.003921, normalize_weight=0.48):
"""Get operations."""
operation_0 = {
'tensor_op_module': 'minddata.transforms.c_transforms',
'tensor_op_name': 'RandomCrop',
'weight': [32, 32, 4, 4, 4, 4],
'padding_mode': "constant",
'pad_if_needed': False,
'fill_value': 0
}
operation_1 = {
'tensor_op_module': 'minddata.transforms.c_transforms',
'tensor_op_name': 'Rescale',
'rescale': rescale,
'shift': 0,
'num_classes': 10
}
operation_2 = {
'tensor_op_module': 'minddata.transforms.c_transforms',
'tensor_op_name': 'Normalize',
'weights': [normalize_weight]
}

return [operation_0, operation_1, operation_2]


def generate_graph(dataset_name='MnistDataset', batch_size=16, buffer_size=10,
rescale=0.003921, num_samples=100, normalize_weight=0.48):
"""Generate dataset graph."""
children_0 = dict(CHILDREN_0)
children_0['op_type'] = dataset_name
children_0['num_samples'] = num_samples
children_0['sampler']['num_samples'] = num_samples

children_1 = dict(CHILDREN_1)
children_1['operations'] = _get_operations(rescale, normalize_weight)
children_1['children'] = [children_0]

children_2 = dict(CHILDREN_2)
children_2['buffer_size'] = buffer_size
children_2['children'] = [children_1]

children_3 = dict(CHILDREN_3)
children_3['children'] = [children_2]

dataset_graph = {
'num_parallel_workers': None,
'op_type': 'BatchDataset',
'op_module': 'minddata.dataengine.datasets',
'drop_remainder': True,
'batch_size': batch_size,
'children': [children_3]
}
return dataset_graph


def get_train_args():
"""Get default train args."""
train_args = dict()
train_args[Metadata.train_network] = "LeNet5"
train_args[Metadata.loss] = 0.01
train_args[Metadata.learning_rate] = 0.01
train_args[Metadata.optimizer] = "Momentum"
train_args[Metadata.loss_function] = "SoftmaxCrossEntropyWithLogits"
train_args[Metadata.epoch] = 500
train_args[Metadata.parallel_mode] = ""
train_args[Metadata.device_num] = 1
train_args[Metadata.batch_size] = 32
train_args[Metadata.train_dataset_path] = "/home/data/train"
train_args[Metadata.train_dataset_size] = 301234
train_args[Metadata.model_path] = "/home/demo/demo_model.pkl"
train_args[Metadata.model_size] = 100 * 1024 * 1024
train_args["user_defined_info"] = {"Version_train": "v1"}
train_args["dataset_graph"] = generate_graph()

return train_args


def get_eval_args():
"""Get default eval args."""
eval_args = dict()
eval_args[Metadata.metrics] = json.dumps({"acc": 0.88})
eval_args[Metadata.valid_dataset_path] = "/home/data/test"
eval_args[Metadata.valid_dataset_size] = 5000
eval_args["user_defined_info"] = {"Version_eval": "v1"}

return eval_args


class LineageWriter:
"""Lineage writer."""
def __init__(self, base_dir, summary_type=None):
if summary_type is None:
self._summary_type = ['train', 'eval']

self.base_dir = base_dir
self._init_summary_base_dir()

def _init_summary_base_dir(self, clean_base_dir=False):
"""Init summary base dir."""
if clean_base_dir and os.path.exists(self.base_dir):
shutil.rmtree(self.base_dir)
if not os.path.exists(self.base_dir):
os.makedirs(self.base_dir)

def _create_event(self, lineage_summary, args, mode='train'):
"""Create event."""
if mode == 'train':
lineage_summary.record_train_lineage(args)
lineage_summary.record_user_defined_info(args["user_defined_info"])
lineage_summary.record_dataset_graph(args["dataset_graph"])
else:
lineage_summary.record_evaluation_lineage(args)
lineage_summary.record_user_defined_info(args["user_defined_info"])

def _get_random_train_args(self):
"""Get random train args."""
network = ['ResNet', 'LeNet5', 'AlexNet']
optimizer = ['SGD', 'Adam', 'Momentum']
loss_function = ["SoftmaxCrossEntropyWithLogits", "CrossEntropyLoss"]
dataset = ['MindDataset', 'MnistDataset', 'Cifar10Datset']

train_args = dict()
train_args[Metadata.learning_rate] = random.uniform(0.001, 0.005)
train_args[Metadata.loss] = random.uniform(0.001, 0.005)
train_args[Metadata.epoch] = random.choice([100, 200, 300])
train_args[Metadata.batch_size] = random.choice([16, 32, 64])
train_args[Metadata.model_size] = random.randint(350, 450) * 1024 * 1024
train_args[Metadata.train_network] = random.choice(network)
train_args[Metadata.optimizer] = random.choice(optimizer)
train_args[Metadata.device_num] = random.choice([1, 2, 4, 6, 8])
train_args[Metadata.loss_function] = random.choice(loss_function)
train_args[Metadata.train_dataset_size] = random.choice([56, 67, 78]) * 10000

dataset_graph = generate_graph(
dataset_name=random.choice(dataset),
batch_size=random.choice([8, 16, 32, 64]),
buffer_size=random.choice([10, 20, 30]),
rescale=random.choice([0.003921, 0.005632, 0.0078, 0.005678]),
num_samples=random.choice([100, 200, 300]),
normalize_weight=random.choice([0.20, 0.50]) # random.uniform(0.2, 0.5)
)
train_args["dataset_graph"] = dataset_graph

return train_args

def _get_random_eval_args(self):
"""Get random eval args."""
eval_args = dict()
eval_args[Metadata.valid_dataset_size] = random.choice([13, 24, 28]) * 100
eval_args[Metadata.metrics] = json.dumps({'Accuracy': random.uniform(0.85, 0.96)})

return eval_args

def create_summary_for_one_train(self, train_id, mode='train', random_mode=True, user_defined_params=None):
"""Create summary for one train."""
summary_dir = os.path.join(self.base_dir, train_id)

if not os.path.exists(summary_dir):
os.makedirs(summary_dir)

lineage_summary = LineageSummary(summary_dir)
args = {}

if mode == 'train':
args = get_train_args()
params = self._get_random_train_args() if random_mode else {}
args.update(params)
elif mode == 'eval':
args = get_eval_args()
params = self._get_random_eval_args() if random_mode else {}
args.update(params)

if user_defined_params is not None:
args.update(user_defined_params)

self._create_event(lineage_summary, args, mode)

def create_summaries(self, train_id_prefix='train_', start_id=1, train_job_num=1, random_mode=True, params=None):
"""Create summaries for several trains."""
if params is None:
params = {}

train_ids = [f'./{train_id_prefix}{i}' for i in range(start_id, start_id + train_job_num)]

for train_id in train_ids:
user_defined_params = params.get(train_id, {})
for mode in self._summary_type:
self.create_summary_for_one_train(train_id, mode, random_mode, user_defined_params.get(mode))

Loading…
Cancel
Save