Browse Source

initial version

tags/v0.2.0-alpha
gaocongli 5 years ago
commit
e7a0496e87
100 changed files with 13954 additions and 0 deletions
  1. +89
    -0
      .gitignore
  2. +201
    -0
      LICENSE
  3. +7
    -0
      MANIFEST.in
  4. +2
    -0
      NOTICE
  5. +115
    -0
      README.md
  6. +9
    -0
      RELEASE.md
  7. +6
    -0
      SECURITY.md
  8. +121
    -0
      build/build.sh
  9. +90
    -0
      build/scripts/crc32.sh
  10. +49
    -0
      build/scripts/ui.sh
  11. +3
    -0
      docs/README.md
  12. BIN
      docs/arch.png
  13. +26
    -0
      mindinsight/__init__.py
  14. +19
    -0
      mindinsight/__main__.py
  15. +17
    -0
      mindinsight/_version.py
  16. +14
    -0
      mindinsight/backend/__init__.py
  17. +130
    -0
      mindinsight/backend/application.py
  18. +18
    -0
      mindinsight/backend/config/__init__.py
  19. +45
    -0
      mindinsight/backend/config/gunicorn_conf.py
  20. +38
    -0
      mindinsight/backend/datavisual/__init__.py
  21. +46
    -0
      mindinsight/backend/datavisual/static_resource_api.py
  22. +94
    -0
      mindinsight/backend/datavisual/task_manager_api.py
  23. +156
    -0
      mindinsight/backend/datavisual/train_visual_api.py
  24. +31
    -0
      mindinsight/backend/lineagemgr/__init__.py
  25. +191
    -0
      mindinsight/backend/lineagemgr/lineage_api.py
  26. +256
    -0
      mindinsight/backend/run.py
  27. +14
    -0
      mindinsight/common/__init__.py
  28. +14
    -0
      mindinsight/common/hook/__init__.py
  29. +89
    -0
      mindinsight/common/hook/datavisual.py
  30. +150
    -0
      mindinsight/conf/__init__.py
  31. +58
    -0
      mindinsight/conf/constants.py
  32. +32
    -0
      mindinsight/conf/defaults.py
  33. +14
    -0
      mindinsight/datavisual/__init__.py
  34. +14
    -0
      mindinsight/datavisual/common/__init__.py
  35. +39
    -0
      mindinsight/datavisual/common/enums.py
  36. +63
    -0
      mindinsight/datavisual/common/error_handler.py
  37. +83
    -0
      mindinsight/datavisual/common/exceptions.py
  38. +19
    -0
      mindinsight/datavisual/common/log.py
  39. +102
    -0
      mindinsight/datavisual/common/validation.py
  40. +14
    -0
      mindinsight/datavisual/data_access/__init__.py
  41. +68
    -0
      mindinsight/datavisual/data_access/base_file_system.py
  42. +290
    -0
      mindinsight/datavisual/data_access/file_handler.py
  43. +143
    -0
      mindinsight/datavisual/data_access/local_file_system.py
  44. +14
    -0
      mindinsight/datavisual/data_transform/__init__.py
  45. +70
    -0
      mindinsight/datavisual/data_transform/data_loader.py
  46. +514
    -0
      mindinsight/datavisual/data_transform/data_manager.py
  47. +216
    -0
      mindinsight/datavisual/data_transform/events_data.py
  48. +20
    -0
      mindinsight/datavisual/data_transform/graph/__init__.py
  49. +455
    -0
      mindinsight/datavisual/data_transform/graph/graph.py
  50. +274
    -0
      mindinsight/datavisual/data_transform/graph/msgraph.py
  51. +211
    -0
      mindinsight/datavisual/data_transform/graph/node.py
  52. +14
    -0
      mindinsight/datavisual/data_transform/loader_generators/__init__.py
  53. +246
    -0
      mindinsight/datavisual/data_transform/loader_generators/data_loader_generator.py
  54. +60
    -0
      mindinsight/datavisual/data_transform/loader_generators/loader_generator.py
  55. +64
    -0
      mindinsight/datavisual/data_transform/loader_generators/loader_struct.py
  56. +373
    -0
      mindinsight/datavisual/data_transform/ms_data_loader.py
  57. +108
    -0
      mindinsight/datavisual/data_transform/reservoir.py
  58. +344
    -0
      mindinsight/datavisual/data_transform/summary_watcher.py
  59. +14
    -0
      mindinsight/datavisual/processors/__init__.py
  60. +28
    -0
      mindinsight/datavisual/processors/base_processor.py
  61. +145
    -0
      mindinsight/datavisual/processors/graph_processor.py
  62. +92
    -0
      mindinsight/datavisual/processors/images_processor.py
  63. +43
    -0
      mindinsight/datavisual/processors/scalars_processor.py
  64. +65
    -0
      mindinsight/datavisual/processors/train_task_manager.py
  65. +14
    -0
      mindinsight/datavisual/proto_files/__init__.py
  66. +328
    -0
      mindinsight/datavisual/proto_files/mindinsight_anf_ir.proto
  67. +1381
    -0
      mindinsight/datavisual/proto_files/mindinsight_anf_ir_pb2.py
  68. +155
    -0
      mindinsight/datavisual/proto_files/mindinsight_summary.proto
  69. +1161
    -0
      mindinsight/datavisual/proto_files/mindinsight_summary_pb2.py
  70. +16
    -0
      mindinsight/datavisual/utils/__init__.py
  71. +57
    -0
      mindinsight/datavisual/utils/crc32/base.h
  72. +316
    -0
      mindinsight/datavisual/utils/crc32/crc32.cc
  73. +54
    -0
      mindinsight/datavisual/utils/crc32/crc32.h
  74. +155
    -0
      mindinsight/datavisual/utils/tools.py
  75. +33
    -0
      mindinsight/lineagemgr/__init__.py
  76. +14
    -0
      mindinsight/lineagemgr/api/__init__.py
  77. +292
    -0
      mindinsight/lineagemgr/api/model.py
  78. +14
    -0
      mindinsight/lineagemgr/collection/__init__.py
  79. +14
    -0
      mindinsight/lineagemgr/collection/model/__init__.py
  80. +37
    -0
      mindinsight/lineagemgr/collection/model/base.py
  81. +621
    -0
      mindinsight/lineagemgr/collection/model/model_lineage.py
  82. +14
    -0
      mindinsight/lineagemgr/common/__init__.py
  83. +14
    -0
      mindinsight/lineagemgr/common/exceptions/__init__.py
  84. +207
    -0
      mindinsight/lineagemgr/common/exceptions/error_code.py
  85. +191
    -0
      mindinsight/lineagemgr/common/exceptions/exceptions.py
  86. +20
    -0
      mindinsight/lineagemgr/common/log.py
  87. +149
    -0
      mindinsight/lineagemgr/common/path_parser.py
  88. +56
    -0
      mindinsight/lineagemgr/common/utils.py
  89. +14
    -0
      mindinsight/lineagemgr/common/validator/__init__.py
  90. +253
    -0
      mindinsight/lineagemgr/common/validator/model_parameter.py
  91. +395
    -0
      mindinsight/lineagemgr/common/validator/validate.py
  92. +120
    -0
      mindinsight/lineagemgr/common/validator/validate_path.py
  93. +14
    -0
      mindinsight/lineagemgr/querier/__init__.py
  94. +446
    -0
      mindinsight/lineagemgr/querier/querier.py
  95. +344
    -0
      mindinsight/lineagemgr/querier/query_model.py
  96. +14
    -0
      mindinsight/lineagemgr/summary/__init__.py
  97. +293
    -0
      mindinsight/lineagemgr/summary/_summary_adapter.py
  98. +95
    -0
      mindinsight/lineagemgr/summary/event_writer.py
  99. +95
    -0
      mindinsight/lineagemgr/summary/file_handler.py
  100. +209
    -0
      mindinsight/lineagemgr/summary/lineage_summary_analyzer.py

+ 89
- 0
.gitignore View File

@@ -0,0 +1,89 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
htmlcov
.trash

.pytest_cache/
# Distribution / packaging
bin/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Editors/IDEs
.idea/
*.sublime-*
*.swp
*.save
# test file
.coverage

.cache


# project wide git ignore

# Compiled artifacts
*.so
*.whl

# Python backup files
*.pyc

# Emacs backup files
*~
*#
.#*

# Vim file artifacts
.*.sw*

# Makefile dummy artifacts
.*-dummy

# log files
*.log

# code coverage
*.cov

# Test result xml files
report.xml
*.pprof
results.xml
TESTS*.xml

# local project settings
.settings
.project
.gradle
.idea

# tox
.tox/

# vscode settings
.vscode

package-lock.json

build/lib
build/bdist.*

output/
!output/README.md

third_party/securec/build

+ 201
- 0
LICENSE View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.

"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.

"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.

"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.

"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.

"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.

"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).

"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.

"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."

"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:

(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and

(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and

(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and

(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.

You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

+ 7
- 0
MANIFEST.in View File

@@ -0,0 +1,7 @@
recursive-include mindinsight *
recursive-exclude * .git
recursive-exclude * .gitignore
recursive-exclude * __pycache__
recursive-exclude * *.py[co] *.swp
recursive-exclude mindinsight/ui *
recursive-include mindinsight/ui/dist *

+ 2
- 0
NOTICE View File

@@ -0,0 +1,2 @@
MindSpore MindInsight
Copyright 2019-2020 Huawei Technologies Co., Ltd

+ 115
- 0
README.md View File

@@ -0,0 +1,115 @@
MindInsight provides MindSpore with easy-to-use debugging and tuning capabilities. It
enables users to visualize the experiments. The features of MindInsight are as follows.

- Visualization of training process:

Provide visualization of training process information,
such as computation graph, training process metrics, etc.

- Traceability of training result:

Provide visualization of model parameters information,
such as training data, model accuracy, etc.


# Index

- [More about MindInsight](#more-about-mindinsight)
- [Installation](#installation)
- [QuickStart](#quickstart)
- [Docs](#docs)
- [Community](#community)
- [Contributing](#contributing)
- [Release Notes](#release-notes)
- [License](#license)

# More about MindInsight

The architecture diagram of MindInsight is illustrated as follows:


![MindInsight Architecture](docs/arch.png)


## Summary log file

The summary log file consists of a series of operation events. Each event contains
the necessary data for visualization.

MindSpore uses the Callback mechanism to record graph, scalar, image and model
information into summary log file.

- The scalar and image is recorded by Summary operator.

- The computation graph is recorded by SummaryRecord after it was compiled.

- The model parameters is recorded by TrainLineage or EvalLineage.

MindInsight provides the capability to analyze summary log files and visualize
relative information.

## Visualization

MindInsight provides users with a full-process visualized GUI during
AI development, in order to help model developers to improve the model
precision efficiently.

MindInsight has the following visualization capabilities:

### Graph visualization

The GUI of MindInsight displays the structure of neural network, the data flow and control
flow of each operator during the entire training process.

### Scalar visualization

The GUI of MindInsight displays the change tendency of a specific scalar during the entire
training process, such as loss value and accuracy rate of each iteration.

Two scalar curves can be combined and displayed in one chart.

### Image visualization

The GUI of MindInsight displays both original images and enhanced images during the entire
training process.

### Model lineage visualization

The GUI of MindInsight displays the parameters and metrics of all models, such as the
learning rate, the number of samples and the loss function of each model.

### Dataset Graph visualization

The GUI of MindInsight displays the pipeline of dataset processing and augmentation.

### Dataset Lineage visualization

The GUI of MindInsight displays the parameters and operations of the dataset processing and augmentation.

# Installation

See [Install MindInsight](https://www.mindspore.cn/install/en).

# QuickStart

See [guidance](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/advanced_use/visualization_tutorials.html)

# Docs

See [API Reference](https://www.mindspore.cn/api/en/master/index.html)

# Community

- [MindSpore Slack](https://join.slack.com/t/mindspore/shared_invite/enQtOTcwMTIxMDI3NjM0LTNkMWM2MzI5NjIyZWU5ZWQ5M2EwMTQ5MWNiYzMxOGM4OWFhZjI4M2E5OGI2YTg3ODU1ODE2Njg1MThiNWI3YmQ) - Communication platform for developers.

# Contributing

Welcome contributions. See our [Contributor Wiki](https://gitee.com/mindspore/mindspore/blob/master/CONTRIBUTING.md) for more details.

# Release Notes

The release notes, see our [RELEASE](RELEASE.md).

# License

[Apache License 2.0](LICENSE)

+ 9
- 0
RELEASE.md View File

@@ -0,0 +1,9 @@
## MindInsight

# Release 0.1.0-alpha

* Training process observation
* Provides and displays training process information, including computational graphs and training process indicators.

* Training result tracing
* Provides functions of tracing and visualizing model training parameter information, including filtering and sorting of training data, model accuracy and training hyperparameters.

+ 6
- 0
SECURITY.md View File

@@ -0,0 +1,6 @@
# MindInsight Application Scenarios and Security Risks
1. MindInsight is a local tool developed using the HTTP protocol, which is insecure. You are not advised to use it in cloud services or scenarios with security requirements. Otherwise, data may be stolen.
2. The MindInsight source code restricts access from a localhost. If you modify the source code to cancel the localhost binding restriction, data leakage may occur.

# MindInsight Security Usage Suggestions
- You are advised to create an independent OS user to install and run the MindInsight service. Permissions among OS users are isolated to prevent data theft. In addition, you are advised to set a proper log directory size to prevent log recording exceptions due to insufficient disk space.

+ 121
- 0
build/build.sh View File

@@ -0,0 +1,121 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

SCRIPT_BASEDIR=$(
cd "$(dirname "$0")" || exit
pwd
)

rename_wheel() {
VERSION="$1"
PACKAGE_LIST=$(ls mindinsight-*-any.whl) || exit
for PACKAGE_ORIG in ${PACKAGE_LIST}; do
MINDINSIGHT_VERSION=$(echo "${PACKAGE_ORIG}" | awk -F"-" '{print $2}')
PYTHON_VERSION_NUM=$(echo "${VERSION}" | awk -F"." '{print $1$2}')
PYTHON_VERSION_TAG="cp${PYTHON_VERSION_NUM}"
PYTHON_ABI_TAG="cp${PYTHON_VERSION_NUM}m"
OS_NAME=$(uname | tr '[:upper:]' '[:lower:]')
MACHINE_TAG="${OS_NAME}_$(uname -i)"
PACKAGE_NEW="mindinsight-${MINDINSIGHT_VERSION}-${PYTHON_VERSION_TAG}-${PYTHON_ABI_TAG}-${MACHINE_TAG}.whl"
mv "${PACKAGE_ORIG}" "${PACKAGE_NEW}"
done
}

build_wheel() {
PROJECT_BASEDIR=$(cd "$(dirname "$SCRIPT_BASEDIR")" || exit; pwd)
cd "${PROJECT_BASEDIR}" || exit

if [ $# -gt 0 ]; then
if [ "$1" = "clean" ]; then
echo "start cleaning mindinsight"
clean_files
echo "clean mindinsight done"
else
echo "unknown command: $1"
fi
exit
fi

echo "start building mindinsight"
clean_files

PYTHON=$(command -v python3 || command -v python)
if [ -z "${PYTHON}" ]; then
echo "Could not find python3 or python command"
exit 1
fi
PYTHON_VERSION=$(${PYTHON} -c "import platform; print(platform.python_version())" | grep '^3.*')
if [ -z "${PYTHON_VERSION}" ]; then
echo "Could not find Python 3"
exit 1
fi

rm -f output
mkdir output

${PYTHON} setup.py bdist_wheel
if [ ! -x "dist" ]; then
echo "Build failed"
exit 1
fi

mv dist/mindinsight-*-any.whl output/

cd output || exit
rename_wheel "${PYTHON_VERSION}"
cd - >/dev/null 2>&1 || exit

clean_files

echo "Build success, output directory is: ${PROJECT_BASEDIR}/output"
}

clean_files() {
rm -rf third_party/build
rm -rf build/lib
rm -rf build/bdist.*
rm -rf mindinsight.egg-info
rm -rf dist
}

show_usage() {
echo "Build mindinsight"
echo ""
echo "usage: build.sh [-h] [clean]"
echo ""
echo "options:"
echo " -h show usage info"
echo " clean clean build files"
}

check_opts() {
while getopts ':h' OPT; do
case "$OPT" in
h)
show_usage
exit 0
;;
\?)
show_usage
exit 1
;;
esac
done
}

check_opts "$@"

cd "${SCRIPT_BASEDIR}" || exit
build_wheel "$@"

+ 90
- 0
build/scripts/crc32.sh View File

@@ -0,0 +1,90 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

SCRIPT_BASEDIR=$(
cd "$(dirname "$0")" || exit
pwd
)

THIRD_PARTY_DIR=$(realpath "${SCRIPT_BASEDIR}/../../third_party")
SECUREC_SOURCE_DIR="${THIRD_PARTY_DIR}/securec"

build_securec() {
CMAKE=$(command -v cmake)
if [ -z "${CMAKE}" ]; then
echo "Could not find cmake command"
exit 1
fi

cd "${SECUREC_SOURCE_DIR}" || exit
rm -rf build
mkdir build
cd build || exit
${CMAKE} ..
make
cd - >/dev/null 2>&1 || exit
}

build_crc32() {
CPP=$(command -v c++)
if [ -z "${CPP}" ]; then
echo "Could not find c++ command"
exit 1
fi

PYTHON=$(command -v python3 || command -v python)
if [ -z "${PYTHON}" ]; then
echo "Could not find python3 or python command"
exit 1
fi
PYTHON_VERSION=$(${PYTHON} -c "import platform; print(platform.python_version())" | grep '^3.*')
if [ -z "${PYTHON_VERSION}" ]; then
echo "Could not find Python 3"
exit 1
fi

DATAVISUAL_DIR=$(realpath "${SCRIPT_BASEDIR}/../../mindinsight/datavisual")
CRC32_SOURCE_DIR="${DATAVISUAL_DIR}/utils/crc32"
CRC32_OUTPUT_DIR="${DATAVISUAL_DIR}/utils"
CRC32_SO_FILE="crc32$(python3-config --extension-suffix)"

rm -f "${CRC32_SOURCE_DIR}/${CRC32_SO_FILE}"
rm -f "${CRC32_OUTPUT_DIR}/${CRC32_SO_FILE}"
cd "${CRC32_SOURCE_DIR}" || exit
PYBIND11_INCLUDES=$(${PYTHON} -m pybind11 --includes)
if [ -z "${PYBIND11_INCLUDES}" ]; then
echo "Could not find pybind11 module"
exit 1
fi

PYTHON_INCLUDE=$(echo "${PYBIND11_INCLUDES}" | awk '{print $1}' | sed "s/^-I//g")
PYTHON_HEADERS=$(echo "${PYBIND11_INCLUDES}" | awk '{print $2}' | sed "s/^-I//g")
${CPP} -O2 -O3 -shared -std=c++11 -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 \
-Wno-maybe-uninitialized -Wno-unused-parameter -Wall -Wl,-z,relro,-z,now,-z,noexecstack \
-I"${THIRD_PARTY_DIR}" -I"${DATAVISUAL_DIR}/utils" -I"${PYTHON_INCLUDE}" -I"${PYTHON_HEADERS}" \
-o "${CRC32_SO_FILE}" crc32.cc "${SECUREC_SOURCE_DIR}/build/src/libsecurec.a"

if [ ! -f "${CRC32_SO_FILE}" ]; then
echo "crc so file does not exist, build failed"
exit 1
fi
mv "${CRC32_SO_FILE}" "${CRC32_OUTPUT_DIR}"
}

cd "${SCRIPT_BASEDIR}" || exit
build_securec

cd "${SCRIPT_BASEDIR}" || exit
build_crc32

+ 49
- 0
build/scripts/ui.sh View File

@@ -0,0 +1,49 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

SCRIPT_BASEDIR=$(
cd "$(dirname "$0")" || exit
pwd
)

build_ui() {
NPM=$(command -v npm)
if [ -z "${NPM}" ]; then
echo "Could not find npm command"
exit 1
fi

UI_SOURCE_DIR=$(realpath "${SCRIPT_BASEDIR}/../../mindinsight/ui")

cd "${UI_SOURCE_DIR}" || exit
rm -rf dist

${NPM} config set strict-ssl false
${NPM} config set unsafe-perm true
${NPM} config set user 0

${NPM} install
${NPM} run build

if [ ! -f "dist/index.html" ]; then
echo "dist does not have file index.html, build failed"
exit 1
fi

rm -rf node_modules
}

cd "${SCRIPT_BASEDIR}" || exit
build_ui

+ 3
- 0
docs/README.md View File

@@ -0,0 +1,3 @@
# MindInsight Documentation

The MindInsight documentation is in the [MindSpore Docs](https://gitee.com/mindspore/docs) repository.

BIN
docs/arch.png View File

Before After
Width: 652  |  Height: 439  |  Size: 12 kB

+ 26
- 0
mindinsight/__init__.py View File

@@ -0,0 +1,26 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Mindinsight init module."""

from mindinsight._version import VERSION


__version__ = VERSION
__version_info__ = tuple(VERSION.split('.'))

__all__ = [
'__version__',
'__version_info__'
]

+ 19
- 0
mindinsight/__main__.py View File

@@ -0,0 +1,19 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Mindinsight main module."""
from mindinsight.utils.command import main
main()

+ 17
- 0
mindinsight/_version.py View File

@@ -0,0 +1,17 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Mindinsight version module."""

VERSION = '0.1.0'

+ 14
- 0
mindinsight/backend/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 130
- 0
mindinsight/backend/application.py View File

@@ -0,0 +1,130 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Web application module."""
import os
from importlib import import_module
from werkzeug.datastructures import Headers
from werkzeug.exceptions import HTTPException

from flask import Flask
from flask import request
from flask import Response
from flask_cors import CORS

from mindinsight.conf import settings
from mindinsight.utils.hook import HookUtils
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.common.exceptions import RequestMethodNotAllowed
from mindinsight.datavisual.common import error_handler
from mindinsight.datavisual.utils.tools import find_app_package
from mindinsight.datavisual.utils.tools import get_img_mimetype
from mindinsight.utils.exceptions import MindInsightException


def get_security_headers():
"""Get security headers."""
domain_white_list = []
for hook in HookUtils.instance().hooks():
domain_white_list += hook.register_secure_domains()

content_security_policy = {
'img-src': ["'self'", 'data:'],
'style-src': ["'self'", "'unsafe-inline'"],
'frame-src': ["'self'"] + domain_white_list,
'frame-ancestors': ["'self'"] + domain_white_list,
'default-src': ["'self'"],
}

headers = {
'X-Frame-Options': 'SAMEORIGIN',
'X-XSS-Protection': '1; mode=block',
'X-Content-Type-Options': 'nosniff',
'Access-Control-Allow-Methods': ', '.join(settings.SUPPORT_REQUEST_METHODS),
'Content-Security-Policy': '; '.join([
f"{k} {' '.join(v)}" for k, v in content_security_policy.items()
]),
'X-Download-Options': 'noopen',
'Cache-Control': 'no-store',
'Pragma': 'no-cache'
}

return list(headers.items())


SECURITY_HEADERS = get_security_headers()


class CustomResponse(Response):
"""Define custom response."""
def __init__(self, response=None, **kwargs):
headers = kwargs.get("headers")
if isinstance(response, bytes):
mimetype = get_img_mimetype(response)
SECURITY_HEADERS.append(('Content-Type', mimetype))
if headers is None:
headers = Headers(SECURITY_HEADERS)
else:
for header in SECURITY_HEADERS:
headers.add(*header)
kwargs['headers'] = headers
super(CustomResponse, self).__init__(response, **kwargs)


def _init_app_module(app):
"""
Init app module.

Args:
app (Flask): An instance of Flask.
"""
packages = find_app_package()
for package in packages:
try:
app_module = import_module(package)
app_module.init_module(app)
except AttributeError:
logger.debug('[%s].init_module not exists.', package)


def before_request():
"""A function to run before each request."""
if request.method not in settings.SUPPORT_REQUEST_METHODS:
raise RequestMethodNotAllowed()


def create_app():
"""Set flask APP config, and start the data manager."""
static_url_path = "/static"
static_folder_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.pardir, 'ui', 'dist', 'static'))

app = Flask(__name__, static_url_path=static_url_path, static_folder=static_folder_path)

if settings.ENABLE_CORS:
CORS(app, supports_credentials=True)

app.before_request(before_request)

app.register_error_handler(HTTPException, error_handler.handle_http_exception_error)
app.register_error_handler(MindInsightException, error_handler.handle_mindinsight_error)
app.register_error_handler(Exception, error_handler.handle_unknown_error)

app.response_class = CustomResponse

_init_app_module(app)

return app


APP = create_app()

+ 18
- 0
mindinsight/backend/config/__init__.py View File

@@ -0,0 +1,18 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Config."""
import os

WEB_CONFIG_DIR = os.path.dirname(__file__)

+ 45
- 0
mindinsight/backend/config/gunicorn_conf.py View File

@@ -0,0 +1,45 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Config file for gunicorn."""

import os
import threading
from importlib import import_module

import gunicorn


gunicorn.SERVER_SOFTWARE = 'unknown'

worker_class = 'sync'
workers = 1
threads = min(30, os.cpu_count() * 2 + 1)
worker_connections = 1000

timeout = 30
graceful_timeout = 30
daemon = True

captureoutput = True

# write gunicorn default log to stream, and using mindinsight logger write gunicorn log to file.
accesslog = '-'


def on_starting(server):
"""Hook function on starting gunicorn process."""
hook_module = import_module('mindinsight.utils.hook')
for hook in hook_module.HookUtils.instance().hooks():
threading.Thread(target=hook.on_startup, args=(server.log,)).start()

+ 38
- 0
mindinsight/backend/datavisual/__init__.py View File

@@ -0,0 +1,38 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Datavisual."""

from mindinsight.backend.datavisual.static_resource_api import init_module as static_init_module
from mindinsight.backend.datavisual.task_manager_api import init_module as task_init_module
from mindinsight.backend.datavisual.train_visual_api import init_module as train_init_module

from mindinsight.conf import settings
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER


def init_module(app):
"""
Interface to init module.

Args:
app (Flask): An instance of Flask.

"""
static_init_module(app)
task_init_module(app)
train_init_module(app)

DATA_MANAGER.start_load_data(reload_interval=int(settings.RELOAD_INTERVAL),
max_threads_count=int(settings.MAX_THREADS_COUNT))

+ 46
- 0
mindinsight/backend/datavisual/static_resource_api.py View File

@@ -0,0 +1,46 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Static resource api."""
import os
import sys

from flask import current_app
from flask import send_from_directory
from flask import Blueprint


APP_PATH = os.path.realpath(os.path.dirname(sys.argv[0]))
BLUEPRINT = Blueprint("static_resource", __name__)


@BLUEPRINT.route("/", methods=["GET"])
def index():
"""Interface to return static index.html."""
return send_from_directory(get_index_resource_dir(), "index.html")


def get_index_resource_dir():
"""Interface to return index.html resource directory."""
return os.path.realpath(os.path.join(APP_PATH, current_app.static_folder, os.pardir))


def init_module(app):
"""
Init module entry.

Args:
app: the application obj.
"""
app.register_blueprint(BLUEPRINT)

+ 94
- 0
mindinsight/backend/datavisual/task_manager_api.py View File

@@ -0,0 +1,94 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Task manager api.

This module provides the interfaces to task manage functions.
"""

import os

from flask import Blueprint
from flask import request
from flask import jsonify

from mindinsight.conf import settings
from mindinsight.datavisual.utils.tools import str_to_bool
from mindinsight.datavisual.utils.tools import get_train_id
from mindinsight.datavisual.processors.train_task_manager import TrainTaskManager
from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER


BLUEPRINT = Blueprint("task_manager", __name__, url_prefix=settings.URL_PREFIX)


@BLUEPRINT.route("/datavisual/single-job", methods=["GET"])
def query_single_train_task():
"""Query single train task"""
plugin_name = request.args.get('plugin_name')
train_id = get_train_id(request)

processor = TrainTaskManager(DATA_MANAGER)
tasks = processor.get_single_train_task(train_id=train_id, plugin_name=plugin_name)
return jsonify(tasks)


@BLUEPRINT.route("/datavisual/plugins", methods=["GET"])
def query_plugins():
"""Query plugins."""
train_id = get_train_id(request)

manual_update = request.args.get('manual_update', default='false')
manual_update = str_to_bool(manual_update, "manual_update")

processor = TrainTaskManager(DATA_MANAGER)
plugins = processor.get_plugins(train_id, manual_update)
return jsonify(plugins)


@BLUEPRINT.route("/datavisual/train-jobs", methods=["GET"])
def query_train_jobs():
"""Query train jobs."""
offset = request.args.get("offset", default=0)
limit = request.args.get("limit", default=10)

summary_watcher = SummaryWatcher()
total, directories = summary_watcher.list_summary_directories_by_pagination(
settings.SUMMARY_BASE_DIR, offset, limit)

train_jobs = [{
'train_id': directory['relative_path'],
'relative_path': directory['relative_path'],
'create_time': directory['create_time'].strftime('%Y-%m-%d %H:%M:%S'),
'update_time': directory['update_time'].strftime('%Y-%m-%d %H:%M:%S'),
} for directory in directories]

return jsonify({
'name': os.path.basename(os.path.realpath(settings.SUMMARY_BASE_DIR)),
'total': total,
'train_jobs': train_jobs,
})


def init_module(app):
"""
Init module entry.

Args:
app: the application obj.

"""
app.register_blueprint(BLUEPRINT)

+ 156
- 0
mindinsight/backend/datavisual/train_visual_api.py View File

@@ -0,0 +1,156 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Backend interface module.

This module provides the interfaces to train processors functions.
"""
from flask import Blueprint
from flask import request
from flask import jsonify

from mindinsight.conf import settings
from mindinsight.datavisual.utils.tools import get_train_id
from mindinsight.datavisual.utils.tools import if_nan_inf_to_none
from mindinsight.datavisual.processors.images_processor import ImageProcessor
from mindinsight.datavisual.processors.scalars_processor import ScalarsProcessor
from mindinsight.datavisual.processors.graph_processor import GraphProcessor
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER


BLUEPRINT = Blueprint("train_visual", __name__, url_prefix=settings.URL_PREFIX)


@BLUEPRINT.route("/datavisual/image/metadata", methods=["GET"])
def image_metadata():
"""
Interface to fetch metadata about the images for the particular run,tag, and zero-indexed sample.

Returns:
Response, which contains a list in JSON containing image events, each
one of which is an object containing items wall_time, step, width,
height, and query.
"""
tag = request.args.get("tag")
train_id = get_train_id(request)

processor = ImageProcessor(DATA_MANAGER)
response = processor.get_metadata_list(train_id, tag)
return jsonify(response)


@BLUEPRINT.route("/datavisual/image/single-image", methods=["GET"])
def single_image():
"""
Interface to fetch raw image data for a particular image.

Returns:
Response, which contains a byte string of image.
"""
tag = request.args.get("tag")
step = request.args.get("step")
train_id = get_train_id(request)

processor = ImageProcessor(DATA_MANAGER)
img_data = processor.get_single_image(train_id, tag, step)
return img_data


@BLUEPRINT.route("/datavisual/scalar/metadata", methods=["GET"])
def scalar_metadata():
"""
Interface to fetch metadata about the scalars for the particular run and tag.

Returns:
Response, which contains a list in JSON containing scalar events, each
one of which is an object containing items' wall_time, step and value.
"""
tag = request.args.get("tag")
train_id = request.args.get("train_id")

processor = ScalarsProcessor(DATA_MANAGER)
response = processor.get_metadata_list(train_id, tag)

metadatas = response['metadatas']
for metadata in metadatas:
value = metadata.get("value")
metadata["value"] = if_nan_inf_to_none('scalar_value', value)

return jsonify(response)


@BLUEPRINT.route("/datavisual/graphs/nodes", methods=["GET"])
def graph_nodes():
"""
Interface to get graph nodes.

Returns:
Response, which contains a JSON object.

"""
name = request.args.get('name', default=None)
node_type = request.args.get('type', default='name_scope')
tag = request.args.get("tag", default=None)
train_id = get_train_id(request)

graph_process = GraphProcessor(train_id, DATA_MANAGER, tag)
response = graph_process.get_nodes(name=name, node_type=node_type)
return jsonify(response)


@BLUEPRINT.route("/datavisual/graphs/nodes/names", methods=["GET"])
def graph_node_names():
"""
Interface to query node names.

Returns:
Response, which contains a JSON object.
"""
search_content = request.args.get("search")
offset = request.args.get("offset", default=0)
limit = request.args.get("limit", default=100)
tag = request.args.get("tag", default=None)
train_id = get_train_id(request)

graph_process = GraphProcessor(train_id, DATA_MANAGER, tag)
resp = graph_process.search_node_names(search_content, offset, limit)
return jsonify(resp)


@BLUEPRINT.route("/datavisual/graphs/single-node", methods=["GET"])
def graph_search_single_node():
"""
Interface to search single node.

Returns:
Response, which contains a JSON object.
"""
name = request.args.get("name")
tag = request.args.get("tag", default=None)
train_id = get_train_id(request)

graph_process = GraphProcessor(train_id, DATA_MANAGER, tag)
resp = graph_process.search_single_node(name)
return jsonify(resp)


def init_module(app):
"""
Init module entry.

Args:
app (Flask): The application obj.
"""
app.register_blueprint(BLUEPRINT)

+ 31
- 0
mindinsight/backend/lineagemgr/__init__.py View File

@@ -0,0 +1,31 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
module init file.
"""
from mindinsight.backend.lineagemgr.lineage_api import init_module as init_query_module


def init_module(app):
"""
Init module entry.

Args:
app: Flask. A Flask instance.

Returns:

"""
init_query_module(app)

+ 191
- 0
mindinsight/backend/lineagemgr/lineage_api.py View File

@@ -0,0 +1,191 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Lineage restful api."""
import json
import os

from flask import Blueprint, jsonify, request

from mindinsight.conf import settings
from mindinsight.datavisual.utils.tools import get_train_id
from mindinsight.lineagemgr import filter_summary_lineage, get_summary_lineage
from mindinsight.lineagemgr.common.validator.validate import validate_path
from mindinsight.utils.exceptions import MindInsightException, ParamValueError

BLUEPRINT = Blueprint("lineage", __name__, url_prefix=settings.URL_PREFIX.rstrip("/"))


@BLUEPRINT.route("/models/model_lineage", methods=["POST"])
def search_model():
"""
Get model lineage info.

Get model info by summary base dir return a model lineage information list of dict
contains model's all kinds of param and count of summary log.

Returns:
str, the model lineage information.

Raises:
MindInsightException: If method fails to be called.
ParamValueError: If parsing json data search_condition fails.

Examples:
>>> POST http://xxxx/v1/mindinsight/models/model_lineage
"""
search_condition = request.stream.read()
try:
search_condition = json.loads(search_condition if search_condition else "{}")
except Exception:
raise ParamValueError("Json data parse failed.")

model_lineage_info = _get_lineage_info(
lineage_type="model",
search_condition=search_condition
)

return jsonify(model_lineage_info)


@BLUEPRINT.route("/datasets/dataset_lineage", methods=["POST"])
def get_datasets_lineage():
"""
Get dataset lineage.

Returns:
str, the dataset lineage information.

Raises:
MindInsightException: If method fails to be called.
ParamValueError: If parsing json data search_condition fails.

Examples:
>>> POST http://xxxx/v1/minddata/datasets/dataset_lineage
"""
search_condition = request.stream.read()
try:
search_condition = json.loads(search_condition if search_condition else "{}")
except Exception:
raise ParamValueError("Json data parse failed.")

dataset_lineage_info = _get_lineage_info(
lineage_type="dataset",
search_condition=search_condition
)

return jsonify(dataset_lineage_info)


def _get_lineage_info(lineage_type, search_condition):
"""
Get lineage info for dataset or model.

Args:
lineage_type (str): Lineage type, 'dataset' or 'model'.
search_condition (dict): Search condition.

Returns:
dict, lineage info.

Raises:
MindInsightException: If method fails to be called.
"""
if 'lineage_type' in search_condition:
raise ParamValueError("Lineage type does not need to be assigned in a specific interface.")
if lineage_type == 'dataset':
search_condition.update({'lineage_type': 'dataset'})
summary_base_dir = str(settings.SUMMARY_BASE_DIR)
try:
lineage_info = filter_summary_lineage(
summary_base_dir, search_condition)

lineages = lineage_info['object']

summary_base_dir = os.path.realpath(summary_base_dir)
length = len(summary_base_dir)

for lineage in lineages:
summary_dir = lineage['summary_dir']
summary_dir = os.path.realpath(summary_dir)
if summary_base_dir == summary_dir:
relative_dir = './'
else:
relative_dir = os.path.join(os.curdir, summary_dir[length+1:])
lineage['summary_dir'] = relative_dir

except MindInsightException as exception:
raise MindInsightException(exception.error, exception.message, http_code=400)

return lineage_info


@BLUEPRINT.route("/datasets/dataset_graph", methods=["GET"])
def get_dataset_graph():
"""
Get dataset graph.

Returns:
str, the dataset graph information.

Raises:
MindInsightException: If method fails to be called.
ParamValueError: If summary_dir is invalid.

Examples:
>>> GET http://xxxx/v1/mindinsight/datasets/dataset_graph?train_id=xxx
"""

summary_base_dir = str(settings.SUMMARY_BASE_DIR)
summary_dir = get_train_id(request)
if summary_dir.startswith('/'):
validate_path(summary_dir)
elif summary_dir.startswith('./'):
summary_dir = os.path.join(summary_base_dir, summary_dir[2:])
summary_dir = validate_path(summary_dir)
else:
raise ParamValueError(
"Summary dir should be absolute path or "
"relative path that relate to summary base dir."
)
try:
dataset_graph = get_summary_lineage(
summary_dir=summary_dir,
keys=['dataset_graph']
)
except MindInsightException as exception:
raise MindInsightException(exception.error, exception.message, http_code=400)

if dataset_graph:
summary_dir_result = dataset_graph.get('summary_dir')
base_dir_len = len(summary_base_dir)
if summary_base_dir == summary_dir_result:
relative_dir = './'
else:
relative_dir = os.path.join(
os.curdir, summary_dir[base_dir_len + 1:]
)
dataset_graph['summary_dir'] = relative_dir

return jsonify(dataset_graph)


def init_module(app):
"""
Init module entry.

Args:
app (Flask): The application obj.
"""
app.register_blueprint(BLUEPRINT)

+ 256
- 0
mindinsight/backend/run.py View File

@@ -0,0 +1,256 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Web service entrance."""
import os
import stat
import re
import subprocess
import time
import shlex

from gunicorn.glogging import Logger

from mindinsight.backend.config import gunicorn_conf
from mindinsight.backend.config import WEB_CONFIG_DIR
from mindinsight.conf import settings
from mindinsight.utils.log import setup_logger


MINDBOARD_APP_MODULE = "mindinsight.backend.application:APP"
GUNICORN_LOGGER = "mindinsight.backend.run.GunicornLogger"

_MIN_PORT = 1
_MAX_PORT = 65535


def _get_file_size(file_path):
"""
Get the file size.

Args:
file_path (str): The file path.

Returns:
int, the file size. If file is not existed, then return 0.
"""
try:
file_size = os.path.getsize(file_path)
except FileNotFoundError:
file_size = 0
return file_size


def _is_match_one(sub_string_list, src_string):
"""
Whether the sub-string in the list can match the source string.

Args:
sub_string_list (list): The sub-string list.
src_string (str): The source string.

Returns:
bool, if matched return True, else return False.
"""
for match_info in sub_string_list:
if match_info in src_string:
return True
return False


def _check_stat_from_log(log_info):
"""
Determine the service startup status based on the log information.

Args:
log_info (str): The output log of service startup.

Returns:
str, the state value that is one of the follows: "unknown", "failed" and "success".
"""
server_state = "unknown"
match_success_info = "Listening at: http://%s:%d" % \
(settings.HOST, int(settings.PORT))
common_failed_info_list = [
"[ERROR] Retrying in 1 second",
"[INFO] Reason: App failed to load",
"[ERROR] Exception in worker process"
]
re_pattern = "\\[ERROR\\].+%s.+%d" % \
(settings.HOST, int(settings.PORT))

# matched failed output log by fuzzy match
if re.search(re_pattern, log_info) or \
_is_match_one(common_failed_info_list, log_info):
server_state = "failed"

if match_success_info in log_info:
server_state = "success"

return server_state


def _get_error_log_path():
"""
Get gunicorn error log path.

Returns:
str, the path of error log.
"""

path = os.path.join(settings.WORKSPACE, 'log/gunicorn/error.log')
errorlog_abspath = os.path.realpath(path)
return errorlog_abspath


def _get_access_log_path():
"""Get gunicorn access log path."""
access_log_path = os.path.join(settings.WORKSPACE, 'log/gunicorn/access.log')
access_log_path = os.path.realpath(access_log_path)
return access_log_path


def _check_state_from_log(log_abspath, start_pos=0):
"""
Check the service startup status based on the log file.

Args:
log_abspath (str): Absolute path of the log file.
start_pos (int): Offset position of the log file.

Returns:
dict, a dict with "state" and "prompt_message" key.
The value of the "state" key is as follows:"unknown", "failed" and "success".
The value of the "prompt_message" key is a list of prompt messages.

"""
server_is_start = False
state_result = {"state": "unknown", "prompt_message": []}
prompt_messages = []
match_start_log = "Starting gunicorn"
with open(log_abspath) as f_log:
f_log.seek(start_pos)
for line in f_log.readlines():
if match_start_log in line:
if server_is_start:
break
server_is_start = True
continue
if server_is_start:
log_result = _check_stat_from_log(line)
# ignore "unknown" result
if log_result != "unknown":
state_result["state"] = log_result

if log_result == "failed":
prompt_messages.append(line.strip())
prompt_messages.append(
"more failed details in log: %s" % log_abspath)
break
state_result["prompt_message"].append(
"service start state: %s" % state_result["state"])
for prompt_message in prompt_messages:
state_result["prompt_message"].append(prompt_message)

return state_result


def _check_server_start_stat(log_abspath, start_pos=None):
"""
Checking the Server Startup Status.

Args:
log_abspath (str): The log file path.
start_pos (int): The log file start position.

Returns:
dict, an dict object that contains the state and prompt_message fields.
The state values are as follows: "unknown", "failed" and "success".

"""
state_result = {"state": "unknown", "prompt_message": []}
# return unknown when not config gunicorn error log file
if not log_abspath:
return state_result

log_pos = _get_file_size(log_abspath) if start_pos is None else start_pos
try_cnt = 0
try_cnt_max = 2

while try_cnt < try_cnt_max:
try_cnt += 1
time.sleep(1)
if _get_file_size(log_abspath) > log_pos:
state_result.update(_check_state_from_log(log_abspath, log_pos))
break

return state_result


class GunicornLogger(Logger):
"""Rewrite gunicorn default logger."""

def __init__(self, cfg):
self.access_log = setup_logger('gunicorn', 'access')
self.error_log = setup_logger('gunicorn', 'error')
super(GunicornLogger, self).__init__(cfg)
access_log_path = _get_access_log_path()
error_log_path = _get_error_log_path()
os.chmod(access_log_path, stat.S_IREAD | stat.S_IWRITE)
os.chmod(error_log_path, stat.S_IREAD | stat.S_IWRITE)


def start():
"""Start web service."""
errorlog_abspath = _get_error_log_path()

gunicorn_conf_file = os.path.join(WEB_CONFIG_DIR, "gunicorn_conf.py")
cmd = "gunicorn " \
"-b {host}:{port} {app_module} " \
"-c {conf_file} " \
"--logger-class {logger_class} " \
"--access-logformat {log_format}"\
.format(host=settings.HOST,
port=settings.PORT,
conf_file=gunicorn_conf_file,
app_module=MINDBOARD_APP_MODULE,
logger_class=GUNICORN_LOGGER,
log_format=settings.GUNICORN_ACCESS_FORMAT
)

log_size = _get_file_size(errorlog_abspath)

# start server
process = subprocess.Popen(
shlex.split(cmd),
shell=False,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
_, stderr = process.communicate()
if stderr:
print(stderr.decode())

# wait command success to end when gunicorn running in daemon.
if gunicorn_conf.daemon and process.wait() == 0:
state_result = _check_server_start_stat(errorlog_abspath, log_size)
# print gunicorn start state to stdout
print('Web address: http://{}:{}'.format(settings.HOST, settings.PORT))
for line in state_result["prompt_message"]:
print(line)


if __name__ == '__main__':
start()

+ 14
- 0
mindinsight/common/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 14
- 0
mindinsight/common/hook/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 89
- 0
mindinsight/common/hook/datavisual.py View File

@@ -0,0 +1,89 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Datavisual hook."""
import argparse
import os
from mindinsight.conf import settings
from mindinsight.utils.hook import BaseHook
class ReloadIntervalAction(argparse.Action):
"""Reload interval action class definition."""
def __call__(self, parser, namespace, values, option_string=None):
"""
Inherited __call__ method from argparse.Action.
Args:
parser (ArgumentParser): Passed-in argument parser.
namespace (Namespace): Namespace object to hold arguments.
values (object): Argument values with type depending on argument definition.
option_string (str): Option string for specific argument name.
"""
reload_interval = values
if reload_interval < 0:
parser.error(f'{option_string} should be greater than or equal to 0')
setattr(namespace, self.dest, reload_interval)
class SummaryBaseDirAction(argparse.Action):
"""Summary base dir action class definition."""
def __call__(self, parser, namespace, values, option_string=None):
"""
Inherited __call__ method from argparse.Action.
Args:
parser (ArgumentParser): Passed-in argument parser.
namespace (Namespace): Namespace object to hold arguments.
values (object): Argument values with type depending on argument definition.
option_string (str): Option string for specific argument name.
"""
summary_base_dir = os.path.realpath(values)
setattr(namespace, self.dest, summary_base_dir)
class Hook(BaseHook):
"""Hook class definition."""
def register_startup_arguments(self, parser):
"""
Hook function to register startup arguments.
Args:
parser (ArgumentParser): Specify parser to which arguments are added.
"""
parser.add_argument(
'--reload-interval',
type=int,
action=ReloadIntervalAction,
help="""
data reload time(Seconds). It should be greater than 0 or equal to 0.
If it equals 0, load data only once. Default value is %s seconds.
""" % settings.RELOAD_INTERVAL)
parser.add_argument(
'--summary-base-dir',
type=str,
action=SummaryBaseDirAction,
help="""
directory where MindInsight will walk through its direct subdirectories
and look for summary files naming with regex 'summary.\\d+' or '\\.pb$'. Any direct
subdirectory containing summary files will turn out to be the summary
file directory. Summary file existing in summary-base-dir indicates that
sumamry-base-dir is one of the summary file directories as well. Default
value is current directory.""")

+ 150
- 0
mindinsight/conf/__init__.py View File

@@ -0,0 +1,150 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Conf module."""

import os
import json
import types
from importlib import import_module


class Settings:
"""
Definition of Settings class.

Examples:
>>> from mindinsight.conf import settings
>>> print(settings.PORT)
"""

_prefix = 'MINDINSIGHT_'
_explicit_settings = set()
_default_settings = set()

def __init__(self):
"""Initialization of Settings."""
self.load_from_defaults()
self.load_from_constants()
self.refresh()

def refresh(self):
"""Refresh settings from config file and environment variables."""
self.update_from_file()
self.update_from_env()

def load_from_defaults(self):
"""Update settings from defaults module."""
default_settings = import_module('mindinsight.conf.defaults')
for setting in dir(default_settings):
if setting.isupper():
setattr(self, setting, getattr(default_settings, setting))
self._default_settings.add(setting)

def load_from_constants(self):
"""Update settings from constants module"""
constant_settings = import_module('mindinsight.conf.constants')
for setting in dir(constant_settings):
if setting.isupper():
setattr(self, setting, getattr(constant_settings, setting))

def update_from_file(self):
"""Update settings from config file."""
config_path = os.environ.get('MINDINSIGHT_CONFIG', '')
if not config_path:
return

config_module = None

# python:full.path.for.config.module
if config_path.startswith('python:'):
config_module = import_module(config_path[len('python:'):])

# file:full/path/for/config.py
elif config_path.startswith('file:'):
config_path = config_path[len('file:'):]
module_name = '__mindinsightconfig__'
config_module = types.ModuleType(module_name)
machinery = import_module('importlib.machinery')
loader = machinery.SourceFileLoader(module_name, config_path)
loader.exec_module(config_module)

if config_module is None:
return

for setting in dir(config_module):
if setting.isupper() and setting in self._default_settings:
setting_value = getattr(config_module, setting)
setattr(self, setting, setting_value)
self._explicit_settings.add(setting)

def update_from_env(self):
"""Update settings from environment variables."""
for key, value in os.environ.items():
if not key.startswith(self._prefix):
continue

setting = key[len(self._prefix):]
if setting not in self._default_settings:
continue

setting_value = getattr(self, setting)
if isinstance(setting_value, bool):
value = (value == 'True')
elif isinstance(setting_value, (int, float)):
value = type(setting_value)(value)
elif isinstance(setting_value, (list, dict)):
value = json.loads(value)

setattr(self, setting, value)
self._explicit_settings.add(setting)

def config_workspace(self, workspace):
"""
Config workspace value.

Args:
workspace (str): Path of workspace.
"""
setattr(self, 'WORKSPACE', workspace)
self._explicit_settings.add('WORKSPACE')

def is_overridden(self, setting_name):
"""
Check if specified setting is overridden.

Args:
setting_name (str): Setting name to be checked.

Returns:
bool, indicate whether given setting name is overridden.
"""
return setting_name in self._explicit_settings

def dump(self):
"""
Dump settings data.

Returns:
dict, json formatted data of settings.
"""
config = {}
for setting in dir(self):
if setting.isupper():
config[setting] = getattr(self, setting)

return config


settings = Settings()

+ 58
- 0
mindinsight/conf/constants.py View File

@@ -0,0 +1,58 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Constants module for mindinsight settings."""
import logging

####################################
# Global default settings.
####################################
LOG_FORMAT = '[%(levelname)s] MI(%(process)d:%(thread)d,%(processName)s):%(asctime)s ' \
'[%(filepath)s:%(lineno)d][%(sub_module)s] %(message)s'

GUNICORN_ACCESS_FORMAT = "'%(h)s <%(r)s> %(s)s %(b)s <%(f)s> <%(a)s> %(D)s'"

LOG_LEVEL = logging.INFO
# rotating max bytes, default is 50M
LOG_ROTATING_MAXBYTES = 52428800

# rotating backup count, default is 30
LOG_ROTATING_BACKUPCOUNT = 30

####################################
# Web default settings.
####################################
HOST = '127.0.0.1'

# Allow to support cross origin resource sharing(CORS) enable. Default is disable.
# If enable CORS, `SUPPORT_REQUEST_METHODS` should enable 'OPTIONS' method.
ENABLE_CORS = False

SUPPORT_REQUEST_METHODS = {'POST', 'GET', 'PUT', 'DELETE'}

# url prefix should not end with slash, correct format is /v1/url
URL_PREFIX = '/v1/mindinsight'

####################################
# Datavisual default settings.
####################################
MAX_THREADS_COUNT = 15

MAX_TAG_SIZE_PER_EVENTS_DATA = 300
DEFAULT_STEP_SIZES_PER_TAG = 500

MAX_GRAPH_TAG_SIZE = 10
MAX_IMAGE_STEP_SIZE_PER_TAG = 10
MAX_SCALAR_STEP_SIZE_PER_TAG = 1000
MAX_GRAPH_STEP_SIZE_PER_TAG = 1

+ 32
- 0
mindinsight/conf/defaults.py View File

@@ -0,0 +1,32 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Defaults module for mindinsight settings."""
import os

####################################
# Global default settings.
####################################
WORKSPACE = os.path.join(os.environ['HOME'], 'mindinsight')

####################################
# Web default settings.
####################################
PORT = 8080

####################################
# Datavisual default settings.
####################################
RELOAD_INTERVAL = 3 # Seconds
SUMMARY_BASE_DIR = os.getcwd()

+ 14
- 0
mindinsight/datavisual/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 14
- 0
mindinsight/datavisual/common/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 39
- 0
mindinsight/datavisual/common/enums.py View File

@@ -0,0 +1,39 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Enums."""

from enum import Enum

class BaseEnum(Enum):

@classmethod
def list_members(cls):
"""List all members."""
return [member.value for member in cls]


class DataManagerStatus(BaseEnum):
"""Data manager status."""
INIT = 'INIT'
LOADING = 'LOADING'
DONE = 'DONE'
INVALID = 'INVALID'


class PluginNameEnum(BaseEnum):
"""Plugin Name Enum."""
IMAGE = 'image'
SCALAR = 'scalar'
GRAPH = 'graph'

+ 63
- 0
mindinsight/datavisual/common/error_handler.py View File

@@ -0,0 +1,63 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Handle custom error."""
from urllib.parse import quote
from werkzeug.exceptions import NotFound
from werkzeug.exceptions import MethodNotAllowed

from flask import request, jsonify

from mindinsight.datavisual.common.exceptions import RequestMethodNotAllowed
from mindinsight.datavisual.common.exceptions import RestfulApiNotExist
from mindinsight.datavisual.common.log import restful_logger as logger
from mindinsight.utils.exceptions import UnknownError
from mindinsight.utils.exceptions import FileSystemPermissionError


def handle_http_exception_error(ex):
"""Handle http exception error."""
logger.warning('%r %r, detail: %r', request.method, quote(request.path), str(ex))
if isinstance(ex, NotFound):
error = RestfulApiNotExist()
elif isinstance(ex, MethodNotAllowed):
error = RequestMethodNotAllowed()
else:
logger.exception(ex)
error = UnknownError('System error or http error.')
res_body = {"error_code": error.error_code, "error_msg": error.message}
return jsonify(res_body), error.http_code


def handle_mindinsight_error(ex):
"""Handle mindinsight error."""
if int(ex.http_code) < 500:
logger.warning('%r %r detail: %r', request.method, quote(request.path), ex.message)
else:
logger.error('%r %r detail: %r', request.method, quote(request.path), ex.message)
logger.exception(ex)
res_body = dict(error_code=ex.error_code, error_msg=ex.message)
return jsonify(res_body), ex.http_code


def handle_unknown_error(ex):
"""Handle unknown error."""
logger.error('%r %r detail: %r', request.method, quote(request.path), str(ex))
logger.exception(ex)
if isinstance(ex, PermissionError):
error = FileSystemPermissionError('File System Permission Error')
else:
error = UnknownError('System error.')
res_body = dict(error_code=error.error_code, error_msg=error.message)
return jsonify(res_body), error.http_code

+ 83
- 0
mindinsight/datavisual/common/exceptions.py View File

@@ -0,0 +1,83 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Define custom exception."""

from mindinsight.utils.constant import DataVisualErrors
from mindinsight.utils.exceptions import MindInsightException


class RestfulApiNotExist(MindInsightException):
"""404 not found."""
def __init__(self):
error_msg = '404 Not Found.'
super(RestfulApiNotExist, self).__init__(DataVisualErrors.RESTFUL_API_NOT_EXIST,
error_msg,
http_code=404)


class RequestMethodNotAllowed(MindInsightException):
"""Request method not allowed."""
def __init__(self):
error_msg = '405 Method Not Allowed.'
super(RequestMethodNotAllowed, self).__init__(DataVisualErrors.REQUEST_METHOD_NOT_ALLOWED,
error_msg,
http_code=405)


class PathNotDirectoryError(MindInsightException):
"""Raised when specified path do not exist."""
def __init__(self, error_detail):
"""Initialize PathNotExistError"""
error_msg = 'Specified path is not a directory. Detail: {}'.format(error_detail)
super(PathNotDirectoryError, self).__init__(DataVisualErrors.PATH_NOT_DIRECTORY_ERROR,
error_msg,
http_code=400)


class SummaryLogPathInvalid(MindInsightException):
"""No valid log file in the path."""
def __init__(self):
error_msg = 'No valid summary log file in path'
super(SummaryLogPathInvalid, self).__init__(DataVisualErrors.SUMMARY_LOG_PATH_INVALID,
error_msg,
http_code=400)


class CRCFailedError(MindInsightException):
"""CRC fail, record corrupted."""
def __init__(self):
error_msg = 'CRC Failed.'
super(CRCFailedError, self).__init__(DataVisualErrors.CRC_FAILED,
error_msg,
http_code=400)


class SummaryLogIsLoading(MindInsightException):
"""Data is loading."""

def __init__(self, error_detail):
error_msg = "Data is loading. Detail: %s" % error_detail
super(SummaryLogIsLoading, self).__init__(DataVisualErrors.SUMMARY_LOG_IS_LOADING,
error_msg,
http_code=400)


class NodeNotInGraphError(MindInsightException):
"""Can not find node in graph error."""
def __init__(self):
error_msg = "Can not find node in graph by given node name."
super(NodeNotInGraphError, self).__init__(DataVisualErrors.NODE_NOT_IN_GRAPH_ERROR,
error_msg,
http_code=400)

+ 19
- 0
mindinsight/datavisual/common/log.py View File

@@ -0,0 +1,19 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Create a logger."""
from mindinsight.utils.log import setup_logger

logger = setup_logger("datavisual", "datavisual")
restful_logger = setup_logger("restful_api", "restful_api")

+ 102
- 0
mindinsight/datavisual/common/validation.py View File

@@ -0,0 +1,102 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Define a validation class which contain all check methods of datavisual module."""
from numbers import Number
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.utils.exceptions import ParamMissError
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.utils.tools import to_int


class Validation:
"""Validation class, define all check methods."""

@classmethod
def check_offset(cls, offset, default_value=0):
"""
Check offset parameter, it must be greater or equal 0.

Args:
offset (Union[str, int]): Value can be string number or int.
default_value (int): Default value for checked offset. Default: 0.

Returns:
int, offset.
"""

if offset is None:
return default_value
offset = to_int(offset, 'offset')
if offset < 0:
raise ParamValueError("'offset' should be greater than or equal to 0.")
return offset

@classmethod
def check_limit(cls, limit, min_value=1, max_value=1000, default_value=100):
"""
Check limit parameter, it should between min_value and max_value.

Args:
limit (Union[str, int]): Value can be string number or int.
min_value (int): Limit should greater or equal this value. Default: 1.
max_value (int): Limit should less or equal this value. Default: 1000.
default_value (int): Default value for limit. Default: 100.

Returns:
int, limit.
"""

if limit is None:
return default_value

limit = to_int(limit, 'limit')
if limit < min_value or limit > max_value:
raise ParamValueError("'limit' should in [{}, {}].".format(min_value, max_value))
return limit

@classmethod
def check_param_empty(cls, **kwargs):
"""
Check param.

Args:
kwargs (Any): Check if arg is truthy.

Raises:
ParamMissError: When param missing.
"""
for key, value in kwargs.items():
# When value is 0, 0.0 or False, it is not empty.
if isinstance(value, Number):
continue

if not value:
raise ParamMissError(key)

@classmethod
def check_plugin_name(cls, plugin_name):
"""
Check plugin name.

Args:
plugin_name (str): The plugin name.

Raises:
ParamValueError: When plugin name is not valid.
"""
plugin_name_list = PluginNameEnum.list_members()
if plugin_name not in plugin_name_list:
raise ParamValueError("'plugin_name' only can be one of {}"
"".format(plugin_name_list))

+ 14
- 0
mindinsight/datavisual/data_access/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 68
- 0
mindinsight/datavisual/data_access/base_file_system.py View File

@@ -0,0 +1,68 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Base file system."""
from abc import ABC, abstractmethod
from collections import namedtuple
StatInfo = namedtuple("Info", ["size", "mtime"])


class BaseFileSystem(ABC):
"""Base class for file systems."""

@abstractmethod
def list_dir(self, path):
"""
Abstract method for listing directories by path.

Args:
path (str): Directory path or file path.
"""

@abstractmethod
def is_dir(self, path):
"""
Abstract method for determining if it is a directory.

Args:
path (str): Directory path or file path.
"""

@abstractmethod
def exists(self, path):
"""
Abstract method for determining if it exists.

Args:
path (str): Directory path or file path.
"""

@abstractmethod
def file_stat(self, file_path):
"""
Abstract method for getting file stat information.

Args:
file_path (str): File path.
"""

@abstractmethod
def join(self, path, *paths):
"""
Abstract method for combining paths.

Args:
path (str): Directory path.
*paths (str): Path or paths.
"""

+ 290
- 0
mindinsight/datavisual/data_access/file_handler.py View File

@@ -0,0 +1,290 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""File handler for file operations."""
from mindinsight.utils.exceptions import PathNotExistError
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.utils.tools import to_str
from mindinsight.datavisual.data_access.local_file_system import LocalFileSystem

_DEFAULT_BUFFER_SIZE = 24 * 1024 * 1024

# _FILE_SYSTEMS, key: FileProtocolHead, value: FileSystem
_FILE_SYSTEMS = dict()
_FILE_SYSTEMS[""] = LocalFileSystem()


class FileHandler:
"""File handler."""

def __init__(self, file_path, mode='rb'):
"""
Init FileHandler.

Args:
file_path (str): File path.
mode (Literal['r', 'rb', 'br', 'w', 'wb', 'bw']): It must be
in ['r', 'rb', 'br', 'w', 'wb', 'bw'].
"""
logger.debug("The __init__ method enter, param: file_path=%s"
"mode=%s", file_path, mode)

if mode not in ('r', 'rb', 'br', 'w', 'wb', 'bw'):
raise ValueError("mode %s is not supported by FileHandler." % mode)

self._file_path = to_str(file_path)
self._file_system = self.get_file_system(self._file_path)
self._buff_chunk_size = _DEFAULT_BUFFER_SIZE
self._buff = None
self._buff_offset = 0
self._offset = 0
self._binary_mode = 'b' in mode

@staticmethod
def get_file_system(path):
"""
Get file system object from path.

Args:
path (str): Directory path or file path.

Returns:
BaseFileSystem, a file system object.
"""
path = to_str(path)
prefix_index = path.find("://")
prefix = path[:prefix_index] if prefix_index >= 0 else ""
file_system = _FILE_SYSTEMS.get(prefix, None)

if file_system is None:
raise ValueError("No filesystem can be found for prefix %s" % prefix)
return file_system

@staticmethod
def walk(node, forward=True, onerror=None):
"""
Traverse path for directory and file tree.

Read from the buffer first.If there is not enough data in the buffer,
data will be read from the file system.

Args:
node (str): Current path.
forward (bool): If True, it will return the sub-directories and files in the top-level
directory first and then iterate the files in the sub-directories. Default: True.
onerror (Optional[Callable]): If None, it indicates that errors during file traversal
will be ignored. Default: None.

Yields:
Tuple, (node, sub_dirs, files).

"""
logger.debug("The walk method enter, param: node=%s, "
"forward=%s, onerror=%s.", node, forward, type(onerror))

file_system = FileHandler.get_file_system(node)
node = to_str(node)
dirs = []

try:
dirs = file_system.list_dir(node)
except PathNotExistError as err:
if onerror:
onerror(err)
else:
logger.warning("Get dir list error, dir_path=%s error=%s.", node, str(err))
return

sub_dirs, files = [], []
for item in dirs:
full_path = file_system.join(node, to_str(item))
if file_system.is_dir(full_path):
sub_dirs.append(item)
else:
files.append(item)

result = (node, sub_dirs, files)

if forward:
logger.debug("The walk method return, pre result=%s.", result)
yield result

for subdir in sub_dirs:
joined_subdir = file_system.join(node, to_str(subdir))
for sub_results in FileHandler.walk(joined_subdir, forward, onerror):
yield sub_results

if not forward:
logger.debug("The walk method return, post result=%s.", result)
yield result

def read(self, size=None):
"""
Read bytes from buffer or file by size.

Args:
size (Union[None, int]): Number of bytes to read, If set None, read the whole file. Default: None.

Returns:
str, a certain number of bytes.
"""
if size is None:
result = self._file_system.read(self._file_path, self._binary_mode)
self._offset = len(result)
return result

result = None
if self._buff and len(self._buff) > self._buff_offset:
read_offset = self._buff_offset + size if size is not None else len(self._buff)
result = self._read_buffer_by_offset(read_offset)
if size is not None:
if len(result) == size:
return result
size -= len(result)

read_size = max(self._buff_chunk_size, size) if size is not None else None
self._buff = self._file_system.read(self._file_path, self._binary_mode,
read_size, self._offset)
self._buff_offset = 0

read_offset = size if size is not None else len(self._buff)
chunk = self._read_buffer_by_offset(read_offset)

result = result + chunk if result else chunk

return result

def _read_buffer_by_offset(self, new_buff_offset):
"""
Read buffer by offset.

Args:
new_buff_offset (int): Ending offset to read.

Returns:
str, bytes from old offset to new offset.

"""
old_buff_offset = self._buff_offset
read_size = min(len(self._buff), new_buff_offset) - old_buff_offset
self._offset += read_size
self._buff_offset += read_size
return self._buff[old_buff_offset:old_buff_offset + read_size]

def reset_offset(self, offset):
"""
Reset offset and buff_offset, clean buff.

Args:
offset (int): Offset.

"""
self._offset = offset
self._buff = None
self._buff_offset = 0

@staticmethod
def list_dir(path):
"""
List directories by path.

Args:
path (str): Directory path or file path.

Returns:
list[str], directories.
"""
file_system = FileHandler.get_file_system(path)
return file_system.list_dir(path)

@staticmethod
def is_dir(path):
"""
Determine if it is a directory.

Args:
path (str): Directory path or file path.

Returns:
bool, if it is a directory path, return True.
"""
file_system = FileHandler.get_file_system(path)
return file_system.is_dir(path)

@staticmethod
def is_file(path):
"""
Determine if it is a file.

Args:
path (str): Directory path or file path.

Returns:
bool, if it is a file path, return True.
"""
file_system = FileHandler.get_file_system(path)
return file_system.is_file(path)

@staticmethod
def exists(path):
"""
Determine if it exists.

Args:
path (str): Directory path or file path.

Returns:
bool, if it exists, return True.
"""
file_system = FileHandler.get_file_system(path)
return file_system.exists(path)

@staticmethod
def file_stat(file_path):
"""
Get file stat information.

Args:
file_path (str): File path.

Returns:
Nametuple, the (size, mtime) of file.
"""
file_system = FileHandler.get_file_system(file_path)
return file_system.file_stat(file_path)

@staticmethod
def join(path, *paths):
"""
Join paths.

Args:
path (str): Directory path.
paths (str): Path or paths.

Returns:
str, the joined path.
"""
file_system = FileHandler.get_file_system(path)
return file_system.join(path, *paths)

@property
def offset(self):
"""Get offset."""
return self._offset

@property
def file_path(self):
"""Get file path."""
return self._file_path

+ 143
- 0
mindinsight/datavisual/data_access/local_file_system.py View File

@@ -0,0 +1,143 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Local File System."""
import io
import os

from mindinsight.datavisual.common import exceptions
from mindinsight.datavisual.utils.tools import to_str
from mindinsight.datavisual.data_access.base_file_system import BaseFileSystem
from mindinsight.datavisual.data_access.base_file_system import StatInfo
from mindinsight.utils.exceptions import PathNotExistError


class LocalFileSystem(BaseFileSystem):
"""Local file system."""

def list_dir(self, path):
"""
List directories by path.

Args:
path (str): Directory path or file path.

Returns:
list[str], directories.
"""
path = to_str(path)
if not self.is_dir(path):
raise exceptions.PathNotDirectoryError("Path is %s." % path)
return os.listdir(path)

def is_dir(self, path):
"""
Determine if it is a directory.

Args:
path (str): Directory path or file path.

Returns:
bool, if it is a directory path, return True.
"""
return os.path.isdir(to_str(path))

def is_file(self, path):
"""
Determine if it is a file.

Args:
path (str): Directory path or file path.

Returns:
bool, if it is a file path, return True.
"""
return os.path.isfile(to_str(path))

def exists(self, path):
"""
Determine if it exists.

Args:
path (str): Directory path or file path.

Returns:
bool, if it exists, return True.
"""
return os.path.exists(to_str(path))

def file_stat(self, file_path):
"""
Get file stat information.

Args:
file_path (str): File path.

Returns:
Nametuple, the (size, mtime) of file.
"""
try:
file_info = os.stat(to_str(file_path))
except OSError:
raise PathNotExistError("File %s is not exist." % file_path)
return StatInfo(size=file_info.st_size, mtime=file_info.st_mtime)

@staticmethod
def read_access(file_path):
"""
Determine if it has read permission.

Args:
file_path (str): File path.

Returns:
bool, if it has read permission, return True.
"""
return os.access(to_str(file_path), os.R_OK)

def join(self, path, *paths):
"""
Join paths.

Args:
path (str): Directory path.
paths (str): Path or paths.

Returns:
str, the joined path.
"""
return os.path.join(path, *paths)

@staticmethod
def read(file_path, binary_mode=False, size=None, offset=None):
"""
Read file.

Args:
file_path (str): File path.
binary_mode (bool): If true, mode will be 'rb'. Else, 'r'.
size (int): Size of bytes to read.
offset (int): Offset of file to read.
Returns:
bytes, the content read.
"""
mode = "rb" if binary_mode else "r"
encoding = None if binary_mode else "utf8"
with io.open(file_path, mode, encoding=encoding) as file:
if offset is not None:
file.seek(offset)
if size is not None:
return file.read(size)

return file.read()

+ 14
- 0
mindinsight/datavisual/data_transform/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 70
- 0
mindinsight/datavisual/data_transform/data_loader.py View File

@@ -0,0 +1,70 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
DataLoader is an adapter for all other loaders.

This module can identify what loader should be used to load data.
"""

from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.data_transform.ms_data_loader import MSDataLoader
from mindinsight.datavisual.common import exceptions


class DataLoader:
"""
The adapter of all kinds of loaders.

Args:
summary_dir (str): A directory path.
"""
def __init__(self, summary_dir):
self._summary_dir = summary_dir
self._loader = None

def load(self):
"""Load the data when loader is exist."""
if self._loader is None:
ms_dataloader = MSDataLoader(self._summary_dir)
loaders = [ms_dataloader]
for loader in loaders:
if loader.filter_valid_files():
self._loader = loader
break

if self._loader is None:
logger.warning("No valid files can be loaded, summary_dir: %s.", self._summary_dir)
raise exceptions.SummaryLogPathInvalid()

self._loader.load()

def get_events_data(self):
"""
Get events data from log file.

Returns:
Optional[EventsData], None or events data.
"""
return self._loader.get_events_data()

def has_valid_files(self):
"""
Check the directory for valid files.

Returns:
bool, if the directory has valid files, return True.
"""
ms_dataloader = MSDataLoader(self._summary_dir)
return bool(ms_dataloader.filter_valid_files())

+ 514
- 0
mindinsight/datavisual/data_transform/data_manager.py View File

@@ -0,0 +1,514 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Management of all events data.

This module exists to all loaders.
It can read events data through the DataLoader.

This module also acts as a thread pool manager.
"""
import threading
import time

from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED

from mindinsight.conf import settings
from mindinsight.datavisual.common import exceptions
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.common.enums import DataManagerStatus
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.data_transform.loader_generators.loader_generator import MAX_DATA_LOADER_SIZE
from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator
from mindinsight.utils.exceptions import MindInsightException
from mindinsight.utils.exceptions import ParamValueError


class DataManager:
"""
DataManager manages a pool of loader which help access events data.

Each loader helps deal the data of the events.
A loader corresponds to an events_data.
The DataManager build a pool including all the data_loader.
The data_loader provides extracting
method to get the information of events.
"""
def __init__(self, loader_generators):
"""
Initialize the pool of loader and the dict of name-to-path.

Args:
loader_generators (list[LoaderGenerator]): Loader generators help generate loaders.

self._status: Refer `datavisual.common.enums.DataManagerStatus`.
self._loader_pool: {'loader_id': <LoaderStruct>}.

"""
self._loader_pool = {}
self._deleted_id_list = []
self._status = DataManagerStatus.INIT.value
self._status_mutex = threading.Lock()
self._loader_pool_mutex = threading.Lock()
self._max_threads_count = 30
self._reload_interval = 3

self._loader_generators = loader_generators

def _add_loader(self, loader):
"""
Add a loader to load data.

Args:
loader (LoaderStruct): A object of `Loader`.

"""
if len(self._loader_pool) >= MAX_DATA_LOADER_SIZE:
delete_number = len(self._loader_pool) - MAX_DATA_LOADER_SIZE + 1
sorted_loaders = sorted(self._loader_pool.items(),
key=lambda loader: loader[1].latest_update_time)
for index in range(delete_number):
delete_loader_id = sorted_loaders[index][0]
self._delete_loader(delete_loader_id)
self._loader_pool.update({loader.loader_id: loader})

def _delete_loader(self, loader_id):
"""
Delete loader from loader pool by loader id.

Args:
loader_id (str): ID of loader.
"""
if self._loader_pool.get(loader_id) is not None:
logger.debug("delete loader %s", loader_id)
self._loader_pool.pop(loader_id)

def _execute_loader(self, loader_id):
"""
Load data form data_loader.

If there is something wrong by loading, add logs and delete the loader.

Args:
loader_id (str): An ID for `Loader`.

"""
try:
with self._loader_pool_mutex:
loader = self._loader_pool.get(loader_id, None)
if loader is None:
logger.debug("Loader %r has been deleted, will not load data.", loader_id)
return
loader.data_loader.load()
except MindInsightException as ex:
logger.warning("Data loader %r load data failed. "
"Delete data_loader. Detail: %s", loader_id, ex)

with self._loader_pool_mutex:
self._delete_loader(loader_id)

def start_load_data(self,
reload_interval=settings.RELOAD_INTERVAL,
max_threads_count=MAX_DATA_LOADER_SIZE):
"""
Start threads for loading data.

Args:
reload_interval (int): Time to reload data once.
max_threads_count (int): Max number of threads of execution.

"""
logger.info("Start to load data, reload_interval: %s, "
"max_threads_count: %s.", reload_interval, max_threads_count)
DataManager.check_reload_interval(reload_interval)
DataManager.check_max_threads_count(max_threads_count)

self._reload_interval = reload_interval
self._max_threads_count = max_threads_count

thread = threading.Thread(target=self._reload_data,
name='start_load_data_thread')
thread.daemon = True
thread.start()

def _reload_data(self):
"""This function periodically loads the data."""
# Let gunicorn load other modules first.
time.sleep(1)
while True:
self._load_data()

if not self._reload_interval:
break
time.sleep(self._reload_interval)

def reload_data(self):
"""
Reload the data once.

This function needs to be used after `start_load_data` function.
"""
logger.debug("start to reload data")
thread = threading.Thread(target=self._load_data,
name='reload_data_thread')
thread.daemon = False
thread.start()

def _load_data(self):
"""This function will load data once and ignore it if the status is loading."""
logger.info("Start to load data, reload interval: %r.", self._reload_interval)
with self._status_mutex:
if self.status == DataManagerStatus.LOADING.value:
logger.debug("Current status is %s , will ignore to load data.", self.status)
return
self.status = DataManagerStatus.LOADING.value

self._generate_loaders()
self._execute_load_data()

if not self._loader_pool:
self.status = DataManagerStatus.INVALID.value
else:
self.status = DataManagerStatus.DONE.value

logger.info("Load event data end, status: %r, and loader pool size is %r.",
self.status, len(self._loader_pool))

def _generate_loaders(self):
"""This function generates the loader from given path."""
loader_dict = {}
for generator in self._loader_generators:
loader_dict.update(generator.generate_loaders(self._loader_pool))

sorted_loaders = sorted(loader_dict.items(), key=lambda loader: loader[1].latest_update_time)
latest_loaders = sorted_loaders[-MAX_DATA_LOADER_SIZE:]
self._deal_loaders(latest_loaders)

def _deal_loaders(self, latest_loaders):
"""
This function determines which loaders to keep or remove or added.

It is based on the given dict of loaders.

Args:
latest_loaders (list[dict]): A list of <loader_id: LoaderStruct>.
"""

with self._loader_pool_mutex:
for loader_id, loader in latest_loaders:
if self._loader_pool.get(loader_id, None) is None:
self._add_loader(loader)
continue

# If this loader was updated manually before,
# its latest_update_time may bigger than update_time in summary.
if self._loader_pool[loader_id].latest_update_time < loader.latest_update_time:
self._update_loader_latest_update_time(loader_id, loader.latest_update_time)

def _execute_load_data(self):
"""Load data through multiple threads."""
threads_count = self._get_threads_count()
if not threads_count:
logger.info("Can not find any valid train log path to load, loader pool is empty.")
return

logger.info("Start to execute load data. threads_count: %s.", threads_count)

with ThreadPoolExecutor(max_workers=threads_count) as executor:
futures = []
loader_pool = self._get_snapshot_loader_pool()
for loader_id in loader_pool:
future = executor.submit(self._execute_loader, loader_id)
futures.append(future)
wait(futures, return_when=ALL_COMPLETED)

@staticmethod
def check_reload_interval(reload_interval):
"""
Check reload interval is valid.

Args:
reload_interval (int): Reload interval >= 0.
"""
if not isinstance(reload_interval, int):
raise ParamValueError("The value of reload interval should be integer.")

if reload_interval < 0:
raise ParamValueError("The value of reload interval should be >= 0.")

@staticmethod
def check_max_threads_count(max_threads_count):
"""
Threads count should be a integer, and should > 0.

Args:
max_threads_count (int), should > 0.
"""
if not isinstance(max_threads_count, int):
raise ParamValueError("The value of max threads count should be integer.")
if max_threads_count <= 0:
raise ParamValueError("The value of max threads count should be > 0.")

def _get_threads_count(self):
"""
Use the maximum number of threads available.

Returns:
int, number of threads.

"""
threads_count = min(self._max_threads_count, len(self._loader_pool))

return threads_count

def get_train_job_by_plugin(self, train_id, plugin_name):
"""
Get a train job by train job id.

If the given train job does not has the given plugin data, the tag list will be empty.

Args:
train_id (str): Get train job info by the given id.
plugin_name (str): Get tags by given plugin.

Returns:
TypedDict('TrainJobEntity', {'id': str, 'name': str, 'tags': List[str]}),
a train job object.

"""
self._check_status_valid()
self._check_train_job_exist(train_id, self._loader_pool)

loader = self._get_loader(train_id)
if loader is None:
logger.warning("No valid summary log in train job %s, "
"or it is not in the cache.", train_id)
return None

name = loader.name
data_loader = loader.data_loader

tags = []
try:
events_data = data_loader.get_events_data()
tags = events_data.list_tags_by_plugin(plugin_name)
except KeyError:
logger.debug("Plugin name %r does not exist "
"in train job %r, and set tags to empty list.", plugin_name, name)
except AttributeError:
logger.debug("Train job %r has been deleted or it has not loaded data, "
"and set tags to empty list.", name)

result = dict(id=train_id, name=name, tags=tags)
return result

def delete_train_job(self, train_id):
"""
Delete train job with a train id.

Args:
train_id (str): ID for train job.

"""
with self._loader_pool_mutex:
self._delete_loader(train_id)

def list_tensors(self, train_id, tag):
"""
List tensors of the given train job and tag.

If the tensor can not find by the given tag, will raise exception.

Args:
train_id (str): ID for train job.
tag (str): The tag name.

Returns:
NamedTuple, the tuple format is `collections.namedtuple('_Tensor', ['wall_time', 'event_step', 'value'])`.
the value will contain the given tag data.

"""
self._check_status_valid()
loader_pool = self._get_snapshot_loader_pool()
if not self._is_loader_in_loader_pool(train_id, loader_pool):
raise ParamValueError("Can not find any data in loader pool about the train job.")

data_loader = loader_pool[train_id].data_loader
events_data = data_loader.get_events_data()

try:
tensors = events_data.tensors(tag)
except KeyError:
error_msg = "Can not find any data in this train job by given tag."
raise ParamValueError(error_msg)

return tensors

def _check_train_job_exist(self, train_id, loader_pool):
"""
Check train job exist, if not exist, will raise exception.

Args:
train_id (str): The given train job id.
loader_pool (dict[str, LoaderStruct]): Refer to self._loader_pool.

Raises:
ParamValueError: Can not found train job in data manager.
"""
is_exist = False
if train_id in loader_pool:
return
for generator in self._loader_generators:
if generator.check_train_job_exist(train_id):
is_exist = True
break
if not is_exist:
raise ParamValueError("Can not find the train job in data manager.")

def _is_loader_in_loader_pool(self, train_id, loader_pool):
"""
Check train job exist, if not exist, return False. Else, return True.

Args:
train_id (str): The given train job id.
loader_pool (dict): See self._loader_pool.

Returns:
bool, if loader in loader pool, return True.
"""
if train_id in loader_pool:
return True
return False

def _get_snapshot_loader_pool(self):
"""
Create a snapshot of data loader pool to avoid concurrent mutation and iteration issues.

Returns:
dict, a copy of `self._loader_pool`.
"""
with self._loader_pool_mutex:
return dict(self._loader_pool)

def _check_status_valid(self):
"""Check if the status is valid to load data."""

if self.status == DataManagerStatus.INIT.value:
raise exceptions.SummaryLogIsLoading("Data is being loaded, "
"current status: %s." % self._status)

def get_single_train_job(self, train_id, manual_update=False):
"""
Get train job by train ID.

Args:
train_id (str): Train ID for train job.
manual_update (bool): If manual update, True.

Returns:
dict, single train job, if can not find any data, will return None.
"""
self._check_status_valid()
self._check_train_job_exist(train_id, self._loader_pool)

loader = self._get_loader(train_id, manual_update)
if loader is None:
logger.warning("No valid summary log in train job %s, "
"or it is not in the cache.", train_id)
return None

train_job = loader.to_dict()
train_job.pop('data_loader')

plugin_data = {}
for plugin_name in PluginNameEnum.list_members():
job = self.get_train_job_by_plugin(train_id, plugin_name=plugin_name)
if job is None:
plugin_data[plugin_name] = []
else:
plugin_data[plugin_name] = job['tags']

train_job.update({'tag_mapping': plugin_data})

return train_job

def _get_loader(self, train_id, manual_update=False):
"""
Get loader by train id.

Args:
train_id (str): Train Id.
manual_update (bool): If manual, True. Else False.

Returns:
LoaderStruct, the loader.
"""
loader = None
is_reload = False
with self._loader_pool_mutex:
if self._is_loader_in_loader_pool(train_id, self._loader_pool):
loader = self._loader_pool.get(train_id)

if manual_update and loader is None:
for generator in self._loader_generators:
tmp_loader = generator.generate_loader_by_train_id(train_id)
if loader and loader.latest_update_time > tmp_loader.latest_update_time:
continue
loader = tmp_loader

if loader is None:
return None

self._add_loader(loader)
is_reload = True

if manual_update:
self._update_loader_latest_update_time(loader.loader_id)

if is_reload:
self.reload_data()

return loader

def _update_loader_latest_update_time(self, loader_id, latest_update_time=None):
"""
Update loader with latest_update_time.

Args:
loader_id (str): ID of loader.
latest_update_time (float): Timestamp.
"""
if latest_update_time is None:
latest_update_time = time.time()
self._loader_pool[loader_id].latest_update_time = latest_update_time

@property
def status(self):
"""
Get the status of data manager.

Returns:
DataManagerStatus, the status of data manager.
"""
return self._status

@status.setter
def status(self, status):
"""Set data manger status."""
self._status = status


_loader_generators = [DataLoaderGenerator(settings.SUMMARY_BASE_DIR)]
DATA_MANAGER = DataManager(_loader_generators)

+ 216
- 0
mindinsight/datavisual/data_transform/events_data.py View File

@@ -0,0 +1,216 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Takes a generator of values, and collects them for a frontend."""

import collections
import threading

from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.data_transform import reservoir
from mindinsight.conf import settings


# Type of the tensor event from external component
_Tensor = collections.namedtuple('_Tensor', ['wall_time', 'step', 'value'])
TensorEvent = collections.namedtuple(
'TensorEvent', ['wall_time', 'step', 'tag', 'plugin_name', 'value'])

# config for `EventsData`
_DEFAULT_STEP_SIZES_PER_TAG = settings.DEFAULT_STEP_SIZES_PER_TAG

CONFIG = {
'max_total_tag_sizes': settings.MAX_TAG_SIZE_PER_EVENTS_DATA,
'max_tag_sizes_per_plugin':
{
PluginNameEnum.GRAPH.value: settings.MAX_GRAPH_TAG_SIZE,
},
'max_step_sizes_per_tag':
{
PluginNameEnum.SCALAR.value: settings.MAX_SCALAR_STEP_SIZE_PER_TAG,
PluginNameEnum.IMAGE.value: settings.MAX_IMAGE_STEP_SIZE_PER_TAG,
PluginNameEnum.GRAPH.value: settings.MAX_GRAPH_STEP_SIZE_PER_TAG,
}
}


class EventsData:
"""
EventsData is an event data manager.

It manages the log events generated during a training process.
The log event records information such as graph, tag, and tensor.
Data such as tensor can be retrieved based on its tag.
"""

def __init__(self):
self._config = CONFIG
self._max_step_sizes_per_tag = self._config['max_step_sizes_per_tag']

self._tags = list()
self._reservoir_by_tag = {}
self._reservoir_mutex_lock = threading.Lock()

self._tags_by_plugin = collections.defaultdict(list)
self._tags_by_plugin_mutex_lock = collections.defaultdict(threading.Lock)

def add_tensor_event(self, tensor_event):
"""
Add a new tensor event to the tensors_data.

Args:
tensor_event (TensorEvent): Refer to `TensorEvent` object.
"""
if not isinstance(tensor_event, TensorEvent):
raise TypeError('Expect to get data of type `TensorEvent`.')

tag = tensor_event.tag
plugin_name = tensor_event.plugin_name

if tag not in set(self._tags):
deleted_tag = self._check_tag_out_of_spec(plugin_name)
if deleted_tag is not None:
self.delete_tensor_event(deleted_tag)

self._tags.append(tag)

with self._tags_by_plugin_mutex_lock[plugin_name]:
if tag not in self._tags_by_plugin[plugin_name]:
self._tags_by_plugin[plugin_name].append(tag)

with self._reservoir_mutex_lock:
if tag not in self._reservoir_by_tag:
reservoir_size = self._get_reservoir_size(tensor_event.plugin_name)
self._reservoir_by_tag[tag] = reservoir.Reservoir(reservoir_size)

tensor = _Tensor(wall_time=tensor_event.wall_time,
step=tensor_event.step,
value=tensor_event.value)

if self._is_out_of_order_step(tensor_event.step, tensor_event.tag):
self.purge_reservoir_data(tensor_event.step, self._reservoir_by_tag[tag])

self._reservoir_by_tag[tag].add_sample(tensor)

def delete_tensor_event(self, tag):
"""
This function will delete tensor event by the given tag in memory record.

Args:
tag (str): The tag name.
"""
self._tags.remove(tag)
for plugin_name, lock in self._tags_by_plugin_mutex_lock.items():
with lock:
if tag in self._tags_by_plugin[plugin_name]:
self._tags_by_plugin[plugin_name].remove(tag)
break

with self._reservoir_mutex_lock:
if tag in self._reservoir_by_tag:
self._reservoir_by_tag.pop(tag)

def list_tags_by_plugin(self, plugin_name):
"""
Return all the tag names of the plugin.

Args:
plugin_name (str): The Plugin name.

Returns:
list[str], tags of the plugin.

Raises:
KeyError: when plugin name could not be found.
"""
if plugin_name not in self._tags_by_plugin:
raise KeyError('Plugin %r could not be found.' % plugin_name)
with self._tags_by_plugin_mutex_lock[plugin_name]:
# Return a snapshot to avoid concurrent mutation and iteration issues.
return list(self._tags_by_plugin[plugin_name])

def tensors(self, tag):
"""
Return all tensors of the tag.

Args:
tag (str): The tag name.

Returns:
list[_Tensor], the list of tensors to the tag.
"""
if tag not in self._reservoir_by_tag:
raise KeyError('TAG %r could not be found.' % tag)
return self._reservoir_by_tag[tag].samples()

def _is_out_of_order_step(self, step, tag):
"""
If the current step is smaller than the latest one, it is out-of-order step.

Args:
step (int): Check if the given step out of order.
tag (str): The checked tensor of the given tag.

Returns:
bool, boolean value.
"""
if self.tensors(tag):
tensors = self.tensors(tag)
last_step = tensors[-1].step
if step <= last_step:
return True
return False

@staticmethod
def purge_reservoir_data(start_step, tensor_reservoir):
"""
Purge all tensor event that are out-of-order step after the given start step.

Args:
start_step (int): Urge start step. All previously seen events with
a greater or equal to step will be purged.
tensor_reservoir (Reservoir): A `Reservoir` object.

Returns:
int, the number of items removed.
"""
cnt_out_of_order = tensor_reservoir.remove_sample(lambda x: x.step < start_step)

return cnt_out_of_order

def _get_reservoir_size(self, plugin_name):
max_step_sizes_per_tag = self._config['max_step_sizes_per_tag']
return max_step_sizes_per_tag.get(plugin_name, _DEFAULT_STEP_SIZES_PER_TAG)

def _check_tag_out_of_spec(self, plugin_name):
"""
Check whether the tag is out of specification.

Args:
plugin_name (str): The given plugin name.

Returns:
Union[str, None], if out of specification, will return the first tag, else return None.

"""
tag_specifications = self._config['max_tag_sizes_per_plugin'].get(plugin_name)
if tag_specifications is not None and len(self._tags_by_plugin[plugin_name]) >= tag_specifications:
deleted_tag = self._tags_by_plugin[plugin_name][0]
return deleted_tag

if len(self._tags) >= self._config['max_total_tag_sizes']:
deleted_tag = self._tags[0]
return deleted_tag

return None

+ 20
- 0
mindinsight/datavisual/data_transform/graph/__init__.py View File

@@ -0,0 +1,20 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This file is used to define the graph."""

from .msgraph import MSGraph
from .node import NodeTypeEnum

__all__ = ['MSGraph', 'NodeTypeEnum']

+ 455
- 0
mindinsight/datavisual/data_transform/graph/graph.py View File

@@ -0,0 +1,455 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
This file is used to define the basic graph.
"""
import copy
import time

from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.common import exceptions
from .node import NodeTypeEnum
from .node import Node


class EdgeTypeEnum:
"""Node edge type enum."""
control = 'control'
data = 'data'


class DataTypeEnum:
"""Data type enum."""
DT_TENSOR = 13


class Graph:
"""The `Graph` object is used to describe a graph file."""
MIN_POLYMERIC_NODE_COUNT = 5

def __init__(self):
# Store nodes contain leaf nodes, name scope node, except polymeric nodes
self._normal_nodes = {}

# Store polymeric nodes.
self._polymeric_nodes = {}

# Store all nodes resolved from the file.
self._leaf_nodes = {}

# The format of node groups is {'group_name': {'node_name': <Node>}}
self._node_groups = {}

def exist_node(self, name):
"""
Check node exist in graph.

Args:
name (str): The node name.

Returns:
bool, if node is exist will return True.

"""
if self._normal_nodes.get(name) is None:
return False
return True

def get_normal_nodes(self, namescope=None):
"""
Get nodes by namescope.

Args:
namescope (str): A namescope of nodes.

Returns:
list[dict], a list object contain `Node` object.

"""
nodes = []
if namescope is None:
for name, node in self._normal_nodes.items():
if '/' not in name:
# Get first layer nodes
nodes.append(node.to_dict())
return nodes

namescope = namescope + '/'
for name, node in self._normal_nodes.items():
if name.startswith(namescope) and '/' not in name.split(namescope)[1]:
nodes.append(node.to_dict())

return nodes

def get_polymeric_nodes(self, polymeric_scope):
"""
Get polymeric nodes by polymeric scope.

Args:
polymeric_scope (str): The polymeric scope name of nodes.

Returns:
list[dict], a list object contain `Node` object.
"""
nodes = []
for node in self._polymeric_nodes.values():
if node.polymeric_scope_name == polymeric_scope:
nodes.append(node.to_dict())
return nodes

def search_node_names(self, content, offset, limit):
"""
Search node names by content.

Args:
content (Union[str, None]): This content can be the key content of the node to search,
if None, will get all node names.
offset (int): An offset for page. Ex, offset is 0, mean current page is 1.
limit (int): An offset for page. Ex, offset is 0, mean current page is 1.

Returns:
list[str], a list of node names.
"""
all_names = []
all_names.extend(list(self._normal_nodes.keys()))
all_names.extend(list(self._polymeric_nodes.keys()))
if content is not None:
content = content.lower()
catch_names = [name for name in all_names if content in name.lower()]
else:
catch_names = all_names
catch_names = sorted(catch_names)
real_offset = offset * limit
return catch_names[real_offset:real_offset+limit]

def search_single_node(self, node_name):
"""
Search node, and return every layer nodes until this node.

Args:
node_name (str): The name of node.

Returns:
dict, a dict object, format is :
item_object = {'nodes': [<Node object>],
'scope_name': '<Node scope>',
'children': {<item_object>}}
"""
if node_name and self._polymeric_nodes.get(node_name) is None \
and self._normal_nodes.get(node_name) is None:
raise exceptions.NodeNotInGraphError()

response = {}
nodes = self.get_normal_nodes()
response.update({
'nodes': nodes,
'scope_name': '',
'children': {}
})

names = node_name.split('/')
children = response['children']
for i in range(1, len(names)+1):
if i == len(names):
polymeric_node = self._polymeric_nodes.get(node_name)
if polymeric_node:
polymeric_scope = polymeric_node.polymeric_scope_name
nodes = self.get_polymeric_nodes(polymeric_scope)
children.update({'nodes': nodes,
'scope_name': polymeric_scope,
'children': {}})
break

name_scope = '/'.join(names[:i])
nodes = self.get_normal_nodes(name_scope)
children.update({
'nodes': nodes,
'scope_name': name_scope,
'children': {}
})
children = children['children']

return response

def _build_polymeric_nodes(self):
"""Build polymeric node."""
logger.debug("Start to build polymeric nodes")

self._find_polymeric_nodes()

group_count_map = {}
for group_name, group in self._node_groups.items():
name = group_name.split('/')[-1]
count = group_count_map.get(name, 0)
count += 1
group_count_map[name] = count
polymeric_node_name = group_name + '_{}_[{}]'.format(count, len(group))
polymeric_node = Node(polymeric_node_name, node_id=polymeric_node_name)
polymeric_node.node_type = NodeTypeEnum.POLYMERIC_SCOPE.value
polymeric_node.name_scope = '/'.join(group_name.split('/')[:-1])
polymeric_node.subnode_count = len(group)

for name_tmp, node_tmp in group.items():
node_tmp.polymeric_scope_name = polymeric_node_name
self._polymeric_nodes.update({name_tmp: node_tmp})
polymeric_node.update_input(node_tmp.input)
polymeric_node.update_output(node_tmp.output)

self._normal_nodes.update({polymeric_node_name: polymeric_node})

self._update_input_output()

def _find_polymeric_nodes(self):
"""Find polymeric nodes from node groups."""
node_groups = copy.deepcopy(self._node_groups)
for group_name, group in node_groups.items():
if len(group) < self.MIN_POLYMERIC_NODE_COUNT:
self._normal_nodes.update(group)
self._node_groups.pop(group_name)
continue

move_node_names = []
is_move_group = False
for node_name, group_node in group.items():
node_list = []
is_in_group = False
for dst_name in group_node.output:
node_tmp = self._leaf_nodes[dst_name]
node_list.append(node_tmp)

start = time.time()
run_count = 0
visit_nodes = {}
while node_list:
# Iterate to find if the output of the node in the group causes a loop
# example: there is a group A, and node_a is a Node in group.
# if there is a loop in node_a, like A/node_a -> B/node_b -> A/node_b
# we will remove the node_a from group A.
node_tmp = node_list[0]
node_list = node_list[1:]
visit_nodes.update({node_tmp.name: True})
if node_tmp in group.values():
is_in_group = True
break
for dst_name_tmp in node_tmp.output:
run_count += 1
node_tmp = self._leaf_nodes[dst_name_tmp]
if visit_nodes.get(dst_name_tmp):
continue
node_list.append(node_tmp)
logger.debug("Find group %s node end, is_in_group: %s, use time: %s, "
"run count: %s.", group_name, is_in_group,
time.time() - start, run_count)

if is_in_group:
move_node_names.append(node_name)

if (len(group) - len(move_node_names)) < self.MIN_POLYMERIC_NODE_COUNT:
is_move_group = True
break

if is_move_group:
self._normal_nodes.update(group)
self._node_groups.pop(group_name)
else:
for name_tmp in move_node_names:
node_tmp = self._node_groups[group_name].pop(name_tmp)
self._normal_nodes.update({name_tmp: node_tmp})

def _update_input_output(self):
"""We need to update input and output attribute after build polymeric node."""
for node in self._normal_nodes.values():
for src_name, input_attr in node.input.items():
if self._polymeric_nodes.get(src_name):
input_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value
node.update_input({src_name: input_attr})

for dst_name, output_attr in node.output.items():
if self._polymeric_nodes.get(dst_name):
output_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value
node.update_output({dst_name: output_attr})

for node in self._polymeric_nodes.values():
for src_name, input_attr in node.input.items():
if self._polymeric_nodes.get(src_name):
input_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value
node.update_input({src_name: input_attr})

for dst_name, output_attr in node.output.items():
if self._polymeric_nodes.get(dst_name):
output_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value
node.update_output({dst_name: output_attr})

def _calc_polymeric_input_output(self):
"""Calc polymeric input and output after build polymeric node."""
for name, node in self._normal_nodes.items():
polymeric_input = {}
for src_name in node.input:
src_node = self._polymeric_nodes.get(src_name)
if node.node_type == NodeTypeEnum.POLYMERIC_SCOPE.value:
src_name = src_name if not src_node else src_node.polymeric_scope_name
output_name = self._calc_dummy_node_name(name, src_name)
polymeric_input.update({output_name: {'edge_type': EdgeTypeEnum.data}})
continue

if not src_node:
continue

if not node.name_scope and src_node.name_scope:
# if current node is in first layer, and the src node is not in
# the first layer, the src node will not be the polymeric input of current node.
continue

if node.name_scope == src_node.name_scope \
or node.name_scope.startswith(src_node.name_scope):
polymeric_input.update(
{src_node.polymeric_scope_name: {'edge_type': EdgeTypeEnum.data}})

node.update_polymeric_input(polymeric_input)

polymeric_output = {}
for dst_name in node.output:
dst_node = self._polymeric_nodes.get(dst_name)

if node.node_type == NodeTypeEnum.POLYMERIC_SCOPE.value:
dst_name = dst_name if not dst_node else dst_node.polymeric_scope_name
output_name = self._calc_dummy_node_name(name, dst_name)
polymeric_output.update({output_name: {'edge_type': EdgeTypeEnum.data}})
continue

if not dst_node:
continue

if not node.name_scope and dst_node.name_scope:
continue

if node.name_scope == dst_node.name_scope \
or node.name_scope.startswith(dst_node.name_scope):
polymeric_output.update(
{dst_node.polymeric_scope_name: {'edge_type': EdgeTypeEnum.data}})

node.update_polymeric_output(polymeric_output)

for name, node in self._polymeric_nodes.items():
polymeric_input = {}
for src_name in node.input:
output_name = self._calc_dummy_node_name(name, src_name)
polymeric_input.update({output_name: {'edge_type': EdgeTypeEnum.data}})
node.update_polymeric_input(polymeric_input)

polymeric_output = {}
for dst_name in node.output:
polymeric_output = {}
output_name = self._calc_dummy_node_name(name, dst_name)
polymeric_output.update({output_name: {'edge_type': EdgeTypeEnum.data}})
node.update_polymeric_output(polymeric_output)

def _calc_dummy_node_name(self, current_node_name, other_node_name):
"""
Calc dummy node name.

Args:
current_node_name (str): The name of current node.
other_node_name (str): The target dummy node name.

Returns:
str, the dummy node name.
"""
name_tmp = other_node_name
if self._polymeric_nodes.get(other_node_name):
name_tmp = self._polymeric_nodes[other_node_name].polymeric_scope_name
name_tmp_list = name_tmp.split('/')
current_name_list = current_node_name.split('/')
index = 0
min_len = min(len(name_tmp_list), len(current_name_list))
for i in range(min_len):
index = i
if name_tmp_list[index] != current_name_list[index]:
break
dummy_node_name = '/'.join(name_tmp_list[:index+1])
return dummy_node_name

def _build_name_scope_nodes(self):
"""Build name scope node by every node name."""
normal_nodes = dict(self._normal_nodes)

rename_node_names = {}
for name, node in normal_nodes.items():
name_list = name.split('/')
for i in range(1, len(name_list)):
name_scope = '/'.join(name_list[:i])
name_scope_node = self._normal_nodes.get(name_scope)
if name_scope_node is None:
name_scope_node = Node(name_scope, node_id=name_scope)
name_scope_node.node_type = NodeTypeEnum.NAME_SCOPE.value
name_scope_node.name_scope = '/'.join(name_list[:i-1])
elif name_scope_node.node_type != NodeTypeEnum.NAME_SCOPE.value:
# The name of this node conflicts with namescope, so rename this node
old_name = name_scope_node.name
old_names = name_scope_node.name.split('/')
old_names[-1] = f'({old_names[-1]})'
new_name = '/'.join(old_names)
name_scope_node.name = new_name
self._normal_nodes.pop(old_name)
self._normal_nodes.update({new_name: name_scope_node})
rename_node_names.update({old_name: new_name})

# create new namescope
name_scope_node = Node(name_scope, node_id=name_scope)
name_scope_node.node_type = NodeTypeEnum.NAME_SCOPE.value
name_scope_node.name_scope = '/'.join(name_list[:i-1])

# update the input and output of this to namescope node
name_scope_with_slash = name_scope + '/'
for src_name, input_attr in node.input.items():
if src_name.startswith(name_scope_with_slash):
continue
name_scope_node.update_input({src_name: input_attr})

for dst_name, output_attr in node.output.items():
if dst_name.startswith(name_scope_with_slash):
continue
name_scope_node.update_output({dst_name: output_attr})

self._normal_nodes.update({name_scope: name_scope_node})

if rename_node_names:
# If existing nodes are renamed, the inputs and outputs of all nodes need to be refreshed
nodes = []
nodes.extend(self._normal_nodes.values())
nodes.extend(self._polymeric_nodes.values())
for node in nodes:
attrs = ['input', 'output', 'polymeric_input', 'polymeric_output']
for item in attrs:
tmp_dict = dict(getattr(node, item))
for name, value in tmp_dict.items():
new_name = rename_node_names.get(name, False)
if new_name:
getattr(node, item).pop(name)
getattr(node, f'update_{item}')({new_name: value})

self._calc_subnode_count()

def _calc_subnode_count(self):
"""Calc the sub node count of scope node."""
name_scope_mapping = {}
for node in self._normal_nodes.values():
if node.name_scope:
count = name_scope_mapping.get(node.name_scope, 0)
name_scope_mapping[node.name_scope] = count + 1

for name_scope, count in name_scope_mapping.items():
node = self._normal_nodes[name_scope]
node.subnode_count = count

+ 274
- 0
mindinsight/datavisual/data_transform/graph/msgraph.py View File

@@ -0,0 +1,274 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This file is used to define the MindSpore graph."""
import re
import copy

from mindinsight.datavisual.common.log import logger
from .node import Node
from .node import NodeTypeEnum
from .graph import Graph
from .graph import EdgeTypeEnum
from .graph import DataTypeEnum


class MSGraph(Graph):
"""The object describes the MindSpore graph, and it is defined in the anf_if proto file."""

def build_graph(self, graph_proto):
"""
Build graph by graph proto which refer to `anf_ir_pb2.GraphProto`, and set status to loading.

Args:
graph_proto (anf_ir_pb2.GraphProto): Refer to `anf_ir_pb2.GraphProto`.
"""
logger.info("Start to build graph.")

self._build_leaf_nodes(graph_proto)
self._build_polymeric_nodes()
self._build_name_scope_nodes()
self._calc_polymeric_input_output()
logger.info("Build graph end, normal node count: %s, polymeric node "
"count: %s.", len(self._normal_nodes), len(self._polymeric_nodes))

def _build_leaf_nodes(self, graph_proto):
"""
Build leaf node from graph proto.

Left node will contain operation node, parameter node, const node.

Args:
graph_proto (anf_ir_pb2.model_proto.graph): Refer to anf_ir_pb2.model_proto.graph.
"""
logger.info("Start to build leaf nodes.")
leaf_node_id_map_name = {}
const_nodes_map = {}

for node_def in graph_proto.node:
node = self._parse_graph_proto_node(node_def)
leaf_node_id_map_name.update({node.node_id: node.name})

for parameter in graph_proto.parameters:
node = self._parse_graph_proto_parameter(parameter)
const_nodes_map.update({node.name: node})

for i, const in enumerate(graph_proto.const_vals):
node_id = 'const_{}'.format(i)
node = self._parse_graph_proto_const(const, node_id)
const_nodes_map.update({const.key: node})

self._calc_input(leaf_node_id_map_name, graph_proto, const_nodes_map)
self._calc_output()

logger.info("Build leaf nodes end, normal nodes count: %s, group count: %s, "
"left node count: %s.", len(self._normal_nodes), len(self._node_groups),
len(self._leaf_nodes))

def _calc_input(self, leaf_node_id_map_name, graph_proto, const_nodes_map):
"""
Calc input for every leaf node.

Args:
leaf_node_id_map_name (dict[str, str]): Format is {'node_id': 'node_name'}.
graph_proto (anf_ir_pb2.model_proto.graph): See anf_ir_pb2.model_proto.graph.
const_nodes_map (dict[str, Node]): Format is {'node name': <Const node>}.
"""
logger.debug("Start to calc input.")
for node_def in graph_proto.node:
node_name = leaf_node_id_map_name[node_def.name]
node = self._leaf_nodes[node_name]
for input_def in node_def.input:
edge_type = EdgeTypeEnum.data
if input_def.type == "CONTROL_EDGE":
edge_type = EdgeTypeEnum.control

if const_nodes_map.get(input_def.name):
const_node = copy.deepcopy(const_nodes_map[input_def.name])
src_name = '{}/{}'.format(node.name_scope, input_def.name)
if not self._normal_nodes.get(src_name):
const_node.name = src_name
const_node.name_scope = node.name_scope
self._normal_nodes.update({src_name: const_node})
self._leaf_nodes.update({src_name: const_node})
src_node = self._leaf_nodes.get(src_name)
else:
src_name = leaf_node_id_map_name.get(input_def.name)
if not src_name:
logger.warning("The input_def name '%s' in node '%s' is invalid, "
"will be ignore.", input_def.name, node_name)
continue

src_node = self._leaf_nodes.get(src_name)
if src_node is None:
logger.warning("The input '%s' in node '%s' is not in "
"leaf nodes.", src_name, node_name)
continue

input_item = {
src_name: {
"shape": src_node.shape,
"edge_type": edge_type,
"scope": NodeTypeEnum.NAME_SCOPE.value
}
}
node.update_input(input_item)

if self._normal_nodes.get(node_name):
self._normal_nodes[node_name] = node
else:
group_name = self._create_group_name(node.name_scope, node.node_type, node.name)
self._node_groups[group_name][node.name] = node

def _calc_output(self):
"""Calc output of every node."""
logger.debug("Start to calc output.")

for name, node in self._leaf_nodes.items():
if node.node_type == NodeTypeEnum.CONST.value:
continue
for src_name, input_attr in node.input.items():
src_node = self._leaf_nodes[src_name]
if src_node.node_type == NodeTypeEnum.CONST.value:
continue

if self._normal_nodes.get(src_name):
self._normal_nodes[src_name].update_output({name: input_attr})
else:
group_name = self._create_group_name(src_node.name_scope,
src_node.node_type, src_node.name)
self._node_groups[group_name][src_name].update_output({name: input_attr})

def _parse_graph_proto_node(self, node_def):
"""
Parse `anf_ir_pb2.model_proto.graph.node_def`, and create a a node.

Args:
node_def (anf_ir_pb2.model_proto.graph.node_def): Refer to anf_ir_pb2.model_proto.graph.node_def.

Returns:
Node, a `Node` object.
"""
node_name = '/'.join([node_def.scope, node_def.op_type])+node_def.name
node = Node(name=node_name, node_id=node_def.name)
node.node_type = node_def.op_type
logger.debug("Foreach graph proto nodes, node id: %s, node name: %s, node def name: %s, "
"input count: %s", node.node_id, node.name, node_def.name, len(node_def.input))

for attr in node_def.attribute:
node.update_attr({attr.name: str(attr.value)})

node.output_i = node_def.output_i
node.name_scope = node_def.scope

output_type = node_def.output_type
shape = self._parse_type_proto(output_type)
node.shape = shape

self._leaf_nodes.update({node.name: node})
group_name = self._create_group_name(node.name_scope, node.node_type, node.name)
if group_name is not None:
node_dict = self._node_groups.get(group_name, {})
node_dict.update({node.name: node})
self._node_groups.update({group_name: node_dict})
else:
self._normal_nodes.update({node.name: node})

return node

def _parse_graph_proto_parameter(self, parameter):
"""
Parse anf_ir_pb2.model_proto.graph.parameter, and create a parameter node.

Args:
parameter (anf_ir_pb2.model_proto.graph.parameter): Refer to anf_ir_pb2.model_proto.graph.parameter.

Returns:
Node, a `Node` object.
"""
node = Node(name=parameter.name, node_id=parameter.name)
node.node_type = NodeTypeEnum.PARAMETER.value
node.shape = self._parse_type_proto(parameter.type)
logger.debug("Foreach graph proto parameters, node id: %s, node name: %s, "
"node def name: %s", node.node_id, node.name, parameter.name)
return node

def _parse_graph_proto_const(self, const, const_node_id):
"""
Parse anf_ir_pb2.model_proto.graph.const, and create a const node.

Args:
const (anf_ir_pb2.model_proto.graph.const): Refer to anf_ir_pb2.model_proto.graph.const
const_node_id (str): The id of the new const node, it should be unique in graph.

Returns:
Node, a `Node` object.
"""
node = Node(name=const.key, node_id=const_node_id)
node.node_type = NodeTypeEnum.CONST.value
node.update_attr({const.key: str(const.value)})
if const.value.dtype == DataTypeEnum.DT_TENSOR:
shape = []
for dim in const.value.tensor_val.dims:
shape.append(dim)
node.shape = shape
return node

def _parse_type_proto(self, type_proto):
"""
Parse proto's `message TypeProto` to get shape information.

Args:
type_proto (anf_ir_pb2.TypeProto): Refer to anf_ir_pb2.TypeProto.

Returns:
list, a list of shape.
"""
shapes = []
if type_proto.HasField('tensor_type'):
tensor_type = type_proto.tensor_type
tensor_shape_proto = tensor_type.shape
for dim in tensor_shape_proto.dim:
shapes.append(dim.size)
if type_proto.HasField('sequence_type'):
for elem_type in type_proto.sequence_type.elem_types:
shapes.append(self._parse_type_proto(elem_type))
return shapes

def _create_group_name(self, name_scope, node_type, node_name):
"""
Create group name by node name, name scope, node type.

Only nodes that conform to the rules are aggregated.

Args:
name_scope (str): The node name scope.
node_type (str): The node type.
node_name (str): The node name.

Returns:
Optional[str], if match the rules will return a group name, else return None.
"""
group_types = ['Reshape', 'Variable']
pattern_names = r'.*?/Cast-op\d+'

if node_type in group_types:
group_name = name_scope + '/' + node_type if name_scope else node_type
return group_name

if node_type == 'FrameworkOp' and re.search(pattern_names, node_name):
group_name = name_scope + '/' + 'Cast-op' if name_scope else 'Cast-op'
return group_name

return None

+ 211
- 0
mindinsight/datavisual/data_transform/graph/node.py View File

@@ -0,0 +1,211 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
This file is used to define the node of graph and associated base types.
"""
from enum import Enum

class NodeTypeEnum(Enum):
"""Node type enum. The following types are new to our custom."""
NAME_SCOPE = 'name_scope'
POLYMERIC_SCOPE = 'polymeric_scope'
PARAMETER = 'Parameter'
CONST = 'Const'


class Node:
"""
Define a node object.

Args:
name (str): Name of new node.
node_id (str): The id of this node, and node id is unique in graph.
"""

def __init__(self, name, node_id):
self._node_id = node_id
self._name = name
self._type = ""
self._attr = dict()
self._input = dict()
self._output_i = -1
self._output = {}
self._polymeric_input = {}
self._polymeric_output = {}
self._polymeric_scope_name = ""
self._subnode_count = 0
self._name_scope = ""
self.shape = []

def to_dict(self):
"""Converts the node object to dictionary format."""
return {
'name': self._name,
'type': self._type,
'attr': self._attr,
'input': self._input,
'output_i': self._output_i,
'output': self._output,
'polymeric_input': self._polymeric_input,
'polymeric_output': self._polymeric_output,
'subnode_count': self._subnode_count,
'polymeric_scope_name': self._polymeric_scope_name
}

@property
def node_id(self):
"""The id of this node, and id is unique in graph."""
return self._node_id

@property
def name(self):
"""Get node name."""
return self._name

@name.setter
def name(self, name):
"""Set node name."""
self._name = name

@property
def node_type(self):
"""Get node type."""
return self._type

@node_type.setter
def node_type(self, node_type):
"""Set node type."""
self._type = node_type

@property
def attr(self):
"""Get node attr."""
return self._attr

def update_attr(self, attr_dict):
"""
Update node attr.

Args:
attr_dict (dict[str, str]): Format is {'<key>': '<value>'}.
"""
self._attr.update(attr_dict)

@property
def input(self):
"""
Get all input of current node.

Returns:
dict[str, dict], format is {'<src_name>': {'shape': [], 'edge_type', 'scope'}}.
"""
return self._input

def update_input(self, input_dict):
"""
Update input.

Args:
input_dict (dict[str, dict]): Format is {'<src_name>': {'shape': [], 'edge_type', 'scope'}}.
"""
self._input.update(input_dict)

@property
def output_i(self):
"""The memory address of this node when it is in run time."""
return self._output_i

@output_i.setter
def output_i(self, output_i):
"""Set memory address."""
self._output_i = output_i

@property
def polymeric_input(self):
"""
The polymeric input is the input of the polymeric nodes.

Returns:
dict[str, dict], format is {'<src_name>': {'edge_type': '<value>'}}.
"""
return self._polymeric_input

def update_polymeric_input(self, polymeric_input):
"""The polymeric input is the input of the polymeric nodes."""
self._polymeric_input.update(polymeric_input)

@property
def output(self):
"""The output node of this node."""
return self._output

def update_output(self, output):
"""
Update output node.

Args:
output (dict[str, TypedDict('NodeType', {'type': str})]): Format
is {"<node_name>": {"type": "<node type>"}}.
"""
self._output.update(output)

@property
def polymeric_output(self):
"""Get polymeric output."""
return self._polymeric_output

def update_polymeric_output(self, polymeric_output):
"""
Update polymeric output.

Args:
polymeric_output (dict[str, dict): Format is {dst_node.polymeric_scope_name:
{'edge_type': EdgeTypeEnum.data}}).

"""
self._polymeric_output.update(polymeric_output)

@property
def polymeric_scope_name(self):
"""Get polymeric scope name."""
return self._polymeric_scope_name

@polymeric_scope_name.setter
def polymeric_scope_name(self, name):
"""Set polymeric scope name."""
self._polymeric_scope_name = name

@property
def subnode_count(self):
"""The sub node count of this node, if this node is a scope node, this count will not be zero."""
return self._subnode_count

@subnode_count.setter
def subnode_count(self, count):
"""Set sub node count."""
self._subnode_count = count

@property
def name_scope(self):
"""Get name scope of this node."""
return self._name_scope

@name_scope.setter
def name_scope(self, name_scope):
"""Set name scope."""
self._name_scope = name_scope

def __str__(self):
return f'<Node, name: {self._name}, type: {self._type}>'

+ 14
- 0
mindinsight/datavisual/data_transform/loader_generators/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 246
- 0
mindinsight/datavisual/data_transform/loader_generators/data_loader_generator.py View File

@@ -0,0 +1,246 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Data Loader Generator.

This module generate loaders from summary logs.
"""
import os
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.data_access.file_handler import FileHandler
from mindinsight.datavisual.data_transform.data_loader import DataLoader
from mindinsight.datavisual.data_transform.loader_generators.loader_generator import MAX_DATA_LOADER_SIZE
from mindinsight.datavisual.data_transform.loader_generators.loader_struct import LoaderStruct
from mindinsight.datavisual.data_transform.loader_generators.loader_generator import LoaderGenerator
from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
from mindinsight.utils.exceptions import ParamValueError


class DataLoaderGenerator(LoaderGenerator):
"""
DataLoaderGenerator generate a loader_dict of loader from summary logs.

Each loader helps deal the data of the events.
It helps DataManager to generate loaders.
"""
def __init__(self, summary_path):
"""
Init DataLoaderGenerator.

Args:
summary_path (str): A directory path, e.g. '/data/ImageNet/'.
"""
self._summary_path = self._check_and_normalize_summary_path(summary_path)
self._summary_watcher = SummaryWatcher()

def _check_and_normalize_summary_path(self, summary_path):
"""
Check and normalize summary path.

Args:
summary_path (str): A directory path, e.g. '/data/ImageNet/'.

Returns:
str, normalized summary path.

"""
if summary_path is None:
logger.warning("Summary path is None. It will not init data loader generator.")
raise ParamValueError("Summary path is None.")

summary_path = os.path.realpath(summary_path)

return summary_path

def generate_loaders(self, loader_pool):
"""
Generate loader from summary path, if summary path is empty, will return empty list.

Args:
loader_pool (dict[str, LoaderStruct]): Current loader pool in data_manager.

Returns:
dict[str, LoaderStruct], a dict of `Loader`.
"""
loader_dict = {}

if not FileHandler.exists(self._summary_path):
logger.warning("Summary path does not exist. It will not start loading events data. "
"Current path is %r.", self._summary_path)
return loader_dict

dir_map_mtime_dict = {}
min_modify_time = None
summaries_info = self._summary_watcher.list_summary_directories(self._summary_path)

for item in summaries_info:
relative_path = item.get("relative_path")
current_dir = FileHandler.join(self._summary_path, relative_path)
dataloader = DataLoader(current_dir)

if not dataloader.has_valid_files():
logger.debug("Can not find valid train log file in folder %s , "
"will ignore.", relative_path)
continue

modify_time = item.get("update_time").timestamp()

# if loader exists in loader pool and newer time, update its time
loader_id = self._generate_loader_id(relative_path)
loader = loader_pool.get(loader_id)
if loader is not None and loader.latest_update_time > modify_time:
modify_time = loader.latest_update_time

if not min_modify_time:
# The first load, init min modify time
min_modify_time = modify_time

# We need to find `MAX_DATA_LOADER_SIZE` newly modified folders.
if len(dir_map_mtime_dict) < MAX_DATA_LOADER_SIZE:
if modify_time < min_modify_time:
min_modify_time = modify_time
dir_map_mtime_dict.update({relative_path: modify_time})

else:
if modify_time >= min_modify_time:
dir_map_mtime_dict.update({relative_path: modify_time})

sorted_dir_tuple = sorted(dir_map_mtime_dict.items(),
key=lambda d: d[1])[-MAX_DATA_LOADER_SIZE:]

for relative_path, modify_time in sorted_dir_tuple:
loader_id = self._generate_loader_id(relative_path)
loader = self._generate_loader_by_relative_path(relative_path)
loader_dict.update({loader_id: loader})

return loader_dict

def _generate_loader_by_relative_path(self, relative_path):
"""
Generate loader by relative path.

Args:
relative_path (str): Relative path of a summary directory, e.g. './log1'.

Returns:
dict[str, LoaderStruct], a dict of `Loader`.
"""
current_dir = os.path.realpath(FileHandler.join(self._summary_path, relative_path))
data_loader = DataLoader(current_dir)
loader_id = self._generate_loader_id(relative_path)
loader = LoaderStruct(loader_id=loader_id,
name=self._generate_loader_name(relative_path),
path=current_dir,
latest_update_time=FileHandler.file_stat(current_dir).mtime,
data_loader=data_loader)
return loader

def _generate_loader_id(self, relative_path):
"""
Generate loader id from relative path.

Args:
relative_path (str): Relative path of a summary directory, e.g. './log1'.

Returns:
str, loader_id for `Loader`.

"""
loader_id = relative_path
return loader_id

def _generate_loader_name(self, relative_path):
"""
Generate loader name from relative path.

Args:
relative_path (str): Relative path of a summary directory, e.g. './log1'.

Returns:
str, loader_name for `Loader`.

"""
loader_name = relative_path
return loader_name

def _get_relative_path_from_train_id(self, train_id):
"""
Get relative from train_id.

Args:
train_id (str): Train ID of a summary directory, e.g. './log1'.

Returns:
str, relative path of `Loader`.

"""
relative_path = train_id

return relative_path

def check_train_job_exist(self, train_id):
"""
Check if train job exists.

Args:
train_id (str): Train ID of a summary directory, e.g. './log1'.

Returns:
bool, if train job exists, return True.

"""
if not self._is_train_id_valid(train_id):
return False

relative_path = self._get_relative_path_from_train_id(train_id)
if self._summary_watcher.is_summary_directory(self._summary_path, relative_path):
return True

return False

def _is_train_id_valid(self, train_id):
"""
Check if train_id is valid.

Args:
train_id (str): Train ID of a summary directory, e.g. './log1'.

Returns:
bool, if train id is valid, return True.

"""
if not train_id.startswith('./'):
logger.warning("The train_id does not start with './'.")
return False
if len(train_id.split("/")) > 2:
logger.warning("The train_id contains multiple '/'.")
return False
return True

def generate_loader_by_train_id(self, train_id):
"""
Generate loader by train_id.

Args:
train_id (str): Train ID of a summary directory, e.g. './log1'.

Returns:
dict[str, LoaderStruct], a dict of `Loader`.

"""
relative_path = self._get_relative_path_from_train_id(train_id)
loader = self._generate_loader_by_relative_path(relative_path)

return loader

+ 60
- 0
mindinsight/datavisual/data_transform/loader_generators/loader_generator.py View File

@@ -0,0 +1,60 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Base loader generator."""
from abc import abstractmethod

MAX_DATA_LOADER_SIZE = 15


class LoaderGenerator:
"""Base loader generator for loader generators."""
@abstractmethod
def generate_loaders(self, loader_pool):
"""
Abstract method for generating loaders.

Args:
loader_pool (dict[str, LoaderStruct]): Current loader pool in data_manager.

Returns:
dict[str, LoaderStruct], a dict of `Loader`.

"""

@abstractmethod
def check_train_job_exist(self, train_id):
"""
Abstract method for checking if train job exists.

Args:
train_id (str): Train ID.

Returns:
bool, if train job exists, return True.

"""

@abstractmethod
def generate_loader_by_train_id(self, train_id):
"""
Abstract method for generating loader by train id.

Args:
train_id (str): Train ID.

Returns:
dict[str, LoaderStruct], a dict of `Loader`.

"""

+ 64
- 0
mindinsight/datavisual/data_transform/loader_generators/loader_struct.py View File

@@ -0,0 +1,64 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Loader struct."""


class LoaderStruct:
"""
Loader to save summary info.

LoaderStruct contains: loader_id, name, path, latest_update_time, status, data_loader.
"""
def __init__(self, loader_id, name, path, latest_update_time, data_loader):
self._loader_id = loader_id
self._name = name
self._path = path
self._latest_update_time = latest_update_time
self._data_loader = data_loader

@property
def loader_id(self):
"""Get loader ID."""
return self._loader_id

@property
def name(self):
"""Get loader name."""
return self._name

@property
def latest_update_time(self):
"""Get the latest update time of loader."""
return self._latest_update_time

@property
def data_loader(self):
"""Get data loader."""
return self._data_loader

@latest_update_time.setter
def latest_update_time(self, latest_update_time):
"""Set the latest update time of loader."""
self._latest_update_time = latest_update_time

def to_dict(self):
"""Transform LoaderStruct to dict."""
return dict(
loader_id=self._loader_id,
name=self._name,
path=self._path,
latest_update_time=self._latest_update_time,
data_loader=self._data_loader
)

+ 373
- 0
mindinsight/datavisual/data_transform/ms_data_loader.py View File

@@ -0,0 +1,373 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
DataLoader for MindSpore data.

This module is used to load the MindSpore training log file.
Each instance will read an entire run, a run can contain one or
more log file.
"""
import re
import struct

from google.protobuf.message import DecodeError
from google.protobuf.text_format import ParseError

from mindinsight.datavisual.common import exceptions
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.data_access.file_handler import FileHandler
from mindinsight.datavisual.data_transform.events_data import EventsData
from mindinsight.datavisual.data_transform.events_data import TensorEvent
from mindinsight.datavisual.data_transform.graph import MSGraph
from mindinsight.datavisual.proto_files import mindinsight_summary_pb2 as summary_pb2
from mindinsight.datavisual.proto_files import mindinsight_anf_ir_pb2 as anf_ir_pb2
from mindinsight.datavisual.utils import crc32
from mindinsight.utils.exceptions import UnknownError

HEADER_SIZE = 8
CRC_STR_SIZE = 4


class MSDataLoader:
"""
MSDataLoader class, load MindSpore event data.

Args:
summary_dir (str): Log directory.
"""

def __init__(self, summary_dir):
self._init_instance(summary_dir)

def _init_instance(self, summary_dir):
self._summary_dir = summary_dir
self._valid_filenames = []
self._events_data = EventsData()
self._latest_summary_filename = ''
self._latest_summary_file_size = 0
self._summary_file_handler = None
self._latest_pb_file_mtime = 0

def get_events_data(self):
"""Return events data read from log file."""
return self._events_data

def _check_files_deleted(self, filenames, old_filenames):
"""
Check the file list for updates.

Args:
filenames (list[str]): The latest files list.
old_filenames (list[str]): List of old files.
"""
deleted_files = set(old_filenames) - set(filenames)
if deleted_files:
logger.warning("There are some files has been deleted, "
"we will reload all files in path %s.", self._summary_dir)
self._init_instance(self._summary_dir)

def load(self):
"""
Load all log valid files.

When the file is reloaded, it will continue to load from where it left off.
"""
logger.debug("Start to load data in ms data loader.")
filenames = self.filter_valid_files()
if not filenames:
logger.warning("No valid files can be loaded, summary_dir: %s.", self._summary_dir)
raise exceptions.SummaryLogPathInvalid()
old_filenames = list(self._valid_filenames)
self._valid_filenames = filenames
self._check_files_deleted(filenames, old_filenames)

self._load_summary_files(self._valid_filenames)
self._load_pb_files(self._valid_filenames)

def _load_summary_files(self, filenames):
"""
Load summary file and parse file content.

Args:
filenames (list[str]): File name list.
"""
summary_files = self._filter_summary_files(filenames)
summary_files = self._sorted_summary_files(summary_files)

for filename in summary_files:
if self._latest_summary_filename and \
(self._compare_summary_file(self._latest_summary_filename, filename)):
continue

file_path = FileHandler.join(self._summary_dir, filename)

if filename != self._latest_summary_filename:
self._summary_file_handler = FileHandler(file_path, 'rb')
self._latest_summary_filename = filename
self._latest_summary_file_size = 0

new_size = FileHandler.file_stat(file_path).size
if new_size == self._latest_summary_file_size:
continue

self._latest_summary_file_size = new_size
try:
self._load_single_file(self._summary_file_handler)
except UnknownError as ex:
logger.warning("Parse summary file failed, detail: %r,"
"file path: %s.", str(ex), file_path)

def _load_single_file(self, file_handler):
"""
Load a log file data.

Args:
file_handler (FileHandler): A file handler.
"""
logger.debug("Load single summary file, file path: %s.", file_handler.file_path)
while True:
start_offset = file_handler.offset
try:
event_str = self._event_load(file_handler)
if event_str is None:
file_handler.reset_offset(start_offset)
break

event = summary_pb2.Event.FromString(event_str)
self._event_parse(event)
except exceptions.CRCFailedError:
file_handler.reset_offset(start_offset)
logger.warning("Check crc faild and ignore this file, file_path=%s, "
"offset=%s.", file_handler.file_path, file_handler.offset)
break
except (OSError, DecodeError, exceptions.MindInsightException) as ex:
logger.warning("Parse log file fail, and ignore this file, detail: %r,"
"file path: %s.", str(ex), file_handler.file_path)
break
except Exception as ex:
logger.exception(ex)
raise UnknownError(str(ex))

def _event_load(self, file_handler):
"""
Load binary string to event string.

Args:
file_handler (FileHandler): A file handler.

Returns:
bytes, MindSpore event in bytes.
"""
# read the header
header_str = file_handler.read(HEADER_SIZE)
if not header_str:
logger.info("End of file, file_path=%s.", file_handler.file_path)
return None
header_crc_str = file_handler.read(CRC_STR_SIZE)
if not header_crc_str:
header_crc_str = ''

if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE:
logger.warning("Check header size and crc, record truncated at offset %s, "
"file_path=%s.", file_handler.offset, file_handler.file_path)
return None
if crc32.GetValueFromStr(header_crc_str) != crc32.GetMaskCrc32cValue(header_str, HEADER_SIZE):
raise exceptions.CRCFailedError()

# read the event body if integrity of header is verified
header = struct.unpack('Q', header_str)
event_len = int(header[0])

event_str = file_handler.read(event_len)
if not event_str:
event_str = ''
event_crc_str = file_handler.read(CRC_STR_SIZE)
if not event_crc_str:
event_crc_str = ''

if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE:
logger.warning("Check event crc, record truncated at offset %d, file_path: %s.",
file_handler.offset, file_handler.file_path)
return None
if crc32.GetValueFromStr(event_crc_str) != crc32.GetMaskCrc32cValue(event_str, event_len):
raise exceptions.CRCFailedError()

return event_str

def _event_parse(self, event):
"""
Transform `Event` data to tensor_event and update it to EventsData.

Args:
event (Event): Message event in summary proto, data read from file handler.
"""
if event.HasField('summary'):
for value in event.summary.value:
if value.HasField('scalar_value'):
tag = '{}/{}'.format(value.tag, PluginNameEnum.SCALAR.value)
tensor_event = TensorEvent(wall_time=event.wall_time,
step=event.step,
tag=tag,
plugin_name=PluginNameEnum.SCALAR.value,
value=value.scalar_value)
self._events_data.add_tensor_event(tensor_event)

if value.HasField('image'):
tag = '{}/{}'.format(value.tag, PluginNameEnum.IMAGE.value)
tensor_event = TensorEvent(wall_time=event.wall_time,
step=event.step,
tag=tag,
plugin_name=PluginNameEnum.IMAGE.value,
value=value.image)
self._events_data.add_tensor_event(tensor_event)

if event.HasField('graph_def'):
graph_proto = event.graph_def
graph = MSGraph()
graph.build_graph(graph_proto)
tensor_event = TensorEvent(wall_time=event.wall_time,
step=event.step,
tag=self._latest_summary_filename,
plugin_name=PluginNameEnum.GRAPH.value,
value=graph)

try:
graph_tags = self._events_data.list_tags_by_plugin(PluginNameEnum.GRAPH.value)
except KeyError:
graph_tags = []
summary_tags = self._filter_summary_files(graph_tags)
for tag in summary_tags:
self._events_data.delete_tensor_event(tag)

self._events_data.add_tensor_event(tensor_event)

def filter_valid_files(self):
"""
Gets a list of valid files from the given file path.

Returns:
list[str], file name list.

"""
filenames = []
for filename in FileHandler.list_dir(self._summary_dir):
if FileHandler.is_file(FileHandler.join(self._summary_dir, filename)):
filenames.append(filename)

valid_filenames = []
valid_filenames.extend(self._filter_summary_files(filenames))
valid_filenames.extend(self._filter_pb_files(filenames))
return list(set(valid_filenames))

@staticmethod
def _filter_summary_files(filenames):
"""
Gets a list of summary files.

Args:
filenames (list[str]): File name list, like [filename1, filename2].

Returns:
list[str], filename list.
"""
return list(filter(
lambda filename: (re.search(r'summary\.\d+', filename)
and not filename.endswith("_lineage")), filenames))

@staticmethod
def _compare_summary_file(current_file, dst_file):
"""
Compare the creation times of the two summary log files.

Args:
current_file (str): Must be the summary log file path.
dst_file (str): Must be the summary log file path.

Returns:
bool, returns True if the current file is new, or False if not.
"""
current_time = int(re.search(r'summary\.(\d+)', current_file)[1])
dst_time = int(re.search(r'summary\.(\d+)', dst_file)[1])
if current_time > dst_time or (current_time == dst_time and current_file > dst_file):
return True
return False

@staticmethod
def _sorted_summary_files(summary_files):
"""Sort by creating time increments and filenames decrement."""
filenames = sorted(summary_files,
key=lambda filename: (-int(re.search(r'summary\.(\d+)', filename)[1]), filename),
reverse=True)
return filenames

@staticmethod
def _filter_pb_files(filenames):
"""
Get a list of pb files.

Args:
filenames (list[str]): File name list, like [filename1, filename2].

Returns:
list[str], filename list.
"""
return list(filter(lambda filename: re.search(r'\.pb$', filename), filenames))

def _load_pb_files(self, filenames):
"""
Load and parse the pb files.

Args:
filenames (list[str]): File name list, like [filename1, filename2].

Returns:
list[str], filename list.
"""
pb_filenames = self._filter_pb_files(filenames)
pb_filenames = sorted(pb_filenames, key=lambda file: FileHandler.file_stat(
FileHandler.join(self._summary_dir, file)).mtime)
for filename in pb_filenames:
mtime = FileHandler.file_stat(FileHandler.join(self._summary_dir, filename)).mtime
if mtime <= self._latest_pb_file_mtime:
continue
self._latest_pb_file_mtime = mtime
self._parse_pb_file(filename)

def _parse_pb_file(self, filename):
"""
Parse pb file and write content to `EventsData`.

Args:
filename (str): The file path of pb file.
"""
file_path = FileHandler.join(self._summary_dir, filename)
logger.info("Start to load graph from pb file, file path: %s.", file_path)
filehandler = FileHandler(file_path)
model_proto = anf_ir_pb2.ModelProto()
try:
model_proto.ParseFromString(filehandler.read())
except ParseError:
logger.warning("The given file is not a valid pb file, file path: %s.", file_path)
return

graph = MSGraph()
graph.build_graph(model_proto.graph)
tensor_event = TensorEvent(wall_time=FileHandler.file_stat(file_path),
step=0,
tag=filename,
plugin_name=PluginNameEnum.GRAPH.value,
value=graph)
self._events_data.add_tensor_event(tensor_event)

+ 108
- 0
mindinsight/datavisual/data_transform/reservoir.py View File

@@ -0,0 +1,108 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""A reservoir sampling on the values."""

import random
import threading

from mindinsight.utils.exceptions import ParamValueError


class Reservoir:
"""
A container based on Reservoir Sampling algorithm.

The newly added sample will be preserved. If the container is full, an old
sample will be replaced randomly. The probability of each sample being
replaced is the same.
"""

def __init__(self, size):
"""
A Container constructor which create a new Reservoir.

Args:
size (int): Container Size. If the size is 0, the container is not limited.

Raises:
ValueError: If size is negative integer.
"""
if not isinstance(size, (int,)) or size < 0:
raise ParamValueError('size must be nonnegative integer, was %s' % size)

self._samples_max_size = size
self._samples = []
self._sample_counter = 0
self._sample_selector = random.Random(0)
self._mutex = threading.Lock()

def samples(self):
"""Return all stored samples."""
with self._mutex:
return list(self._samples)

def add_sample(self, sample):
"""
Add a sample to Reservoir.

Replace the old sample when the capacity is full.
New added samples are guaranteed to be added to the reservoir.

Args:
sample (Any): The sample to add to the Reservoir.
"""
with self._mutex:
if len(self._samples) < self._samples_max_size or self._samples_max_size == 0:
self._samples.append(sample)
else:
# Use the Reservoir Sampling algorithm to replace the old sample.
rand_int = self._sample_selector.randint(
0, self._sample_counter)
if rand_int < self._samples_max_size:
self._samples.pop(rand_int)
self._samples.append(sample)
else:
self._samples[-1] = sample
self._sample_counter += 1

def remove_sample(self, filter_fun):
"""
Remove the samples from Reservoir that do not meet the filter criteria.

Args:
filter_fun (Callable[..., Any]): Determines whether a sample meets
the deletion condition.

Returns:
int, the number of samples removed.
"""
remove_size = 0

with self._mutex:
before_remove_size = len(self._samples)
if before_remove_size > 0:
# remove samples that meet the filter criteria.
self._samples = list(filter(filter_fun, self._samples))
after_remove_size = len(self._samples)
remove_size = before_remove_size - after_remove_size

if remove_size > 0:
# update _sample_counter when samples has been removed.
sample_remaining_rate = float(
after_remove_size) / before_remove_size
self._sample_counter = int(
round(self._sample_counter * sample_remaining_rate))

return remove_size

+ 344
- 0
mindinsight/datavisual/data_transform/summary_watcher.py View File

@@ -0,0 +1,344 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Summary watcher module."""
import os
import re
import datetime
from pathlib import Path
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.common.validation import Validation
from mindinsight.utils.exceptions import FileSystemPermissionError
class SummaryWatcher:
"""SummaryWatcher class."""
SUMMARY_FILENAME_REGEX = r'summary\.(?P<timestamp>\d+)'
PB_FILENAME_REGEX = r'\.pb$'
MAX_SUMMARY_DIR_COUNT = 999
# scan at most 20000 files/directories (approximately 1 seconds)
# if overall=False in SummaryWatcher.list_summary_directories
# to avoid long-time blocking
MAX_SCAN_COUNT = 20000
def list_summary_directories(self, summary_base_dir, overall=True):
"""
List summary directories within base directory.
Args:
summary_base_dir (str): Path of summary base directory.
Returns:
list, list of summary directory info, each of which including the following attributes.
- relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
starting with "./".
- create_time (datetime): Creation time of summary file.
- update_time (datetime): Modification time of summary file.
Examples:
>>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
>>> summary_watcher = SummaryWatcher()
>>> directories = summary_watcher.list_summary_directories('/summary/base/dir')
"""
if self._contains_null_byte(summary_base_dir=summary_base_dir):
return []
if not os.path.exists(summary_base_dir):
logger.warning('Path of summary base directory not exists.')
return []
if not os.path.isdir(summary_base_dir):
logger.warning('Path of summary base directory is not a valid directory.')
return []
summary_dict = {}
scan_count = 0
try:
entries = os.scandir(summary_base_dir)
except PermissionError:
logger.error('Path of summary base directory is not accessible.')
raise FileSystemPermissionError('Path of summary base directory is not accessible.')
for entry in entries:
if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
break
relative_path = os.path.join('.', '')
if entry.is_symlink():
pass
elif entry.is_file():
self._update_summary_dict(summary_dict, relative_path, entry)
elif entry.is_dir():
full_path = os.path.realpath(os.path.join(summary_base_dir, entry.name))
try:
subdir_entries = os.scandir(full_path)
except PermissionError:
logger.warning('Path of %s under summary base directory is not accessible.', entry.name)
else:
for subdir_entry in subdir_entries:
if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
break
subdir_relative_path = os.path.join('.', entry.name)
if subdir_entry.is_symlink():
pass
elif subdir_entry.is_file():
self._update_summary_dict(summary_dict, subdir_relative_path, subdir_entry)
scan_count += 1
if not overall and scan_count >= self.MAX_SCAN_COUNT:
break
scan_count += 1
if not overall and scan_count >= self.MAX_SCAN_COUNT:
logger.info('Stop further scanning due to overall is False and '
'number of scanned files exceeds upper limit.')
break
directories = [{
'relative_path': key,
'create_time': value['ctime'],
'update_time': value['mtime'],
} for key, value in summary_dict.items()]
# sort by update time in descending order and relative path in ascending order
directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x['relative_path']))
return directories
def _contains_null_byte(self, **kwargs):
"""
Check if arg contains null byte.
Args:
kwargs (Any): Check if arg contains null byte.
Returns:
bool, indicates if any arg contains null byte.
"""
for key, value in kwargs.items():
if not isinstance(value, str):
continue
if '\x00' in value:
logger.warning('%s contains null byte \\x00.', key)
return True
return False
def _is_valid_summary_directory(self, summary_base_dir, relative_path):
"""
Check if the given summary directory is valid.
Args:
summary_base_dir (str): Path of summary base directory.
relative_path (str): Relative path of summary directory, referring to summary base directory,
starting with "./" .
Returns:
bool, indicates if summary directory is valid.
"""
summary_base_dir = os.path.realpath(summary_base_dir)
summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
if summary_base_dir == summary_directory:
return True
if not os.path.exists(summary_directory):
logger.warning('Path of summary directory not exists.')
return False
if not os.path.isdir(summary_directory):
logger.warning('Path of summary directory is not a valid directory.')
return False
try:
Path(summary_directory).relative_to(Path(summary_base_dir))
except ValueError:
logger.warning('Relative path %s is not subdirectory of summary_base_dir', relative_path)
return False
return True
def _update_summary_dict(self, summary_dict, relative_path, entry):
"""
Update summary_dict with ctime and mtime.
Args:
summary_dict (dict): Temporary data structure to hold summary directory info.
relative_path (str): Relative path of summary directory, referring to summary base directory,
starting with "./" .
entry (DirEntry): Directory entry instance needed to check with regular expression.
"""
summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
if summary_pattern is None and pb_pattern is None:
return
if summary_pattern is not None:
timestamp = int(summary_pattern.groupdict().get('timestamp'))
try:
# extract created time from filename
ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
except OverflowError:
return
else:
ctime = datetime.datetime.fromtimestamp(entry.stat().st_ctime).astimezone()
# extract modified time from filesystem
mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone()
if relative_path not in summary_dict or summary_dict[relative_path]['ctime'] < ctime:
summary_dict[relative_path] = {
'ctime': ctime,
'mtime': mtime,
}
def is_summary_directory(self, summary_base_dir, relative_path):
"""
Check if the given summary directory is valid.
Args:
summary_base_dir (str): Path of summary base directory.
relative_path (str): Relative path of summary directory, referring to summary base directory,
starting with "./" .
Returns:
bool, indicates if the given summary directory is valid.
Examples:
>>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
>>> summary_watcher = SummaryWatcher()
>>> summaries = summary_watcher.is_summary_directory('/summary/base/dir', './job-01')
"""
if self._contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
return False
if not self._is_valid_summary_directory(summary_base_dir, relative_path):
return False
summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
try:
entries = os.scandir(summary_directory)
except PermissionError:
logger.error('Path of summary base directory is not accessible.')
raise FileSystemPermissionError('Path of summary base directory is not accessible.')
for entry in entries:
if entry.is_symlink() or not entry.is_file():
continue
summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
if summary_pattern or pb_pattern:
return True
return False
def list_summary_directories_by_pagination(self, summary_base_dir, offset=0, limit=10):
"""
List summary directories within base directory.
Args:
summary_base_dir (str): Path of summary base directory.
offset (int): An offset for page. Ex, offset is 0, mean current page is 1. Default value is 0.
limit (int): The max data items for per page. Default value is 10.
Returns:
tuple[total, directories], total indicates the overall number of summary directories and directories
indicate list of summary directory info including the following attributes.
- relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
starting with "./".
- create_time (datetime): Creation time of summary file.
- update_time (datetime): Modification time of summary file.
Raises:
ParamValueError, if offset < 0 or limit is out of valid value range.
ParamTypeError, if offset or limit is not valid integer.
Examples:
>>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
>>> summary_watcher = SummaryWatcher()
>>> total, directories = summary_watcher.list_summary_directories_by_pagination(
'/summary/base/dir', offset=0, limit=10)
"""
offset = Validation.check_offset(offset=offset)
limit = Validation.check_limit(limit, min_value=1, max_value=999)
directories = self.list_summary_directories(summary_base_dir, overall=False)
return len(directories), directories[offset * limit:(offset + 1) * limit]
def list_summaries(self, summary_base_dir, relative_path='./'):
"""
Get info of latest summary file within the given summary directory.
Args:
summary_base_dir (str): Path of summary base directory.
relative_path (str): Relative path of summary directory, referring to summary base directory,
starting with "./" .
Returns:
list, list of summary file including the following attributes.
- file_name (str): Summary file name.
- create_time (datetime): Creation time of summary file.
- update_time (datetime): Modification time of summary file.
Examples:
>>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
>>> summary_watcher = SummaryWatcher()
>>> summaries = summary_watcher.list_summaries('/summary/base/dir', './job-01')
"""
if self._contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
return []
if not self._is_valid_summary_directory(summary_base_dir, relative_path):
return []
summaries = []
summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
try:
entries = os.scandir(summary_directory)
except PermissionError:
logger.error('Path of summary directory is not accessible.')
raise FileSystemPermissionError('Path of summary directory is not accessible.')
for entry in entries:
if entry.is_symlink() or not entry.is_file():
continue
pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
if pattern is None:
continue
timestamp = int(pattern.groupdict().get('timestamp'))
try:
# extract created time from filename
ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
except OverflowError:
continue
# extract modified time from filesystem
mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone()
summaries.append({
'file_name': entry.name,
'create_time': ctime,
'update_time': mtime,
})
# sort by update time in descending order and filename in ascending order
summaries.sort(key=lambda x: (-int(x['update_time'].timestamp()), x['file_name']))
return summaries

+ 14
- 0
mindinsight/datavisual/processors/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 28
- 0
mindinsight/datavisual/processors/base_processor.py View File

@@ -0,0 +1,28 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Base processor, and init data manager parameter."""


class BaseProcessor:
"""Base processors processor. All processors should inherit this class."""

def __init__(self, data_manager):
"""
Init image processor.

Args:
data_manager (DataManager): A DataManager instance.
"""
self._data_manager = data_manager

+ 145
- 0
mindinsight/datavisual/processors/graph_processor.py View File

@@ -0,0 +1,145 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
This file is to process `data_transform.data_manager` to handle graph,
and the status of graph will be checked before calling `Graph` object.
"""

from mindinsight.datavisual.common import exceptions
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.data_transform.graph import NodeTypeEnum
from mindinsight.datavisual.processors.base_processor import BaseProcessor
from mindinsight.utils.exceptions import ParamValueError


class GraphProcessor(BaseProcessor):
"""
This object is to handle `DataManager` object, and process graph object.

Args:
train_id (str): To get train job data by this given id.
data_manager (DataManager): A `DataManager` object.
tag (str): The tag of graph, if tag is None, will load the first graph.
"""
def __init__(self, train_id, data_manager, tag=None):
Validation.check_param_empty(train_id=train_id)
super(GraphProcessor, self).__init__(data_manager)

train_job = self._data_manager.get_train_job_by_plugin(train_id, PluginNameEnum.GRAPH.value)
if train_job is None:
raise exceptions.SummaryLogPathInvalid()
if not train_job['tags']:
raise ParamValueError("Can not find any graph data in the train job.")

if tag is None:
tag = train_job['tags'][0]

tensors = self._data_manager.list_tensors(train_id, tag=tag)
self._graph = tensors[0].value

def get_nodes(self, name, node_type):
"""
Get the nodes of every layer in graph.

Args:
name (str): The name of a node.
node_type (Any): The type of node, either 'name_scope' or 'polymeric'.

Returns:
TypedDict('Nodes', {'nodes': list[Node]}), format is {'nodes': [<Node object>]}.
example:
{
"nodes" : [
{
"attr" :
{
"index" : "i: 0\n"
},
"input" : {},
"name" : "input_tensor",
"output" :
{
"Default/TensorAdd-op17" :
{
"edge_type" : "data",
"scope" : "name_scope",
"shape" : [1, 16, 128, 128]
}
},
"output_i" : -1,
"polymeric_input" : {},
"polymeric_output" : {},
"polymeric_scope_name" : "",
"subnode_count" : 0,
"type" : "Data"
}
]
}
"""
if node_type not in [NodeTypeEnum.NAME_SCOPE.value, NodeTypeEnum.POLYMERIC_SCOPE.value]:
raise ParamValueError(
'The node type is not support, only either %s or %s.'
'' % (NodeTypeEnum.NAME_SCOPE.value, NodeTypeEnum.POLYMERIC_SCOPE.value))

if name and not self._graph.exist_node(name):
raise ParamValueError("The node name is not in graph.")
nodes = []
if node_type == NodeTypeEnum.NAME_SCOPE.value:
nodes = self._graph.get_normal_nodes(name)

if node_type == NodeTypeEnum.POLYMERIC_SCOPE.value:
if not name:
raise ParamValueError('The node name "%s" not in graph, node type is %s.' %
(name, node_type))
polymeric_scope_name = name
nodes = self._graph.get_polymeric_nodes(polymeric_scope_name)

return {'nodes': nodes}

def search_node_names(self, search_content, offset, limit):
"""
Search node names by search content.

Args:
search_content (Any): This content can be the key content of the node to search.
offset (int): An offset for page. Ex, offset is 0, mean current page is 1.
limit (int): The max data items for per page.

Returns:
TypedDict('Names', {'names': list[str]}), {"names": ["node_names"]}.
"""
offset = Validation.check_offset(offset=offset)
limit = Validation.check_limit(limit, min_value=1, max_value=1000)
names = self._graph.search_node_names(search_content, offset, limit)
return {"names": names}

def search_single_node(self, name):
"""
Search node by node name.

Args:
name (str): The name of node.

Returns:
dict, format is:
item_object = {'nodes': [<Node object>],
'scope_name': '',
'children': {<item_object>}}
"""
Validation.check_param_empty(name=name)

nodes = self._graph.search_single_node(name)
return nodes

+ 92
- 0
mindinsight/datavisual/processors/images_processor.py View File

@@ -0,0 +1,92 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Image Processor APIs."""
from mindinsight.datavisual.utils.tools import to_int
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.processors.base_processor import BaseProcessor


class ImageProcessor(BaseProcessor):
"""Image Processor."""

def get_metadata_list(self, train_id, tag):
"""
Builds a JSON-serializable object with information about images.

Args:
train_id (str): The ID of the events data.
tag (str): The name of the tag the images all belong to.

Returns:
list[dict], a list of dictionaries containing the `wall_time`, `step`, `width`,
and `height` for each image.
[
{
"wall_time": ****,
"step": ****,
"width": ****,
"height": ****,
},
{...}
]

"""
Validation.check_param_empty(train_id=train_id, tag=tag)
result = []
tensors = self._data_manager.list_tensors(train_id, tag)

for tensor in tensors:
# no tensor_proto in TensorEvent
(width, height) = (tensor.value.width, tensor.value.height)
result.append({
'wall_time': tensor.wall_time,
'step': tensor.step,
'width': int(width),
'height': int(height),
})
return dict(metadatas=result)

def get_single_image(self, train_id, tag, step):
"""
Returns the actual image bytes for a given image.

Args:
train_id (str): The ID of the events data the image belongs to.
tag (str): The name of the tag the images belongs to.
step (int): The step of the image in the current reservoir.

Returns:
bytes, a byte string of the raw image bytes.

"""
Validation.check_param_empty(train_id=train_id, tag=tag, step=step)
step = to_int(step, "step")

tensors = self._data_manager.list_tensors(train_id, tag)

image = None
for tensor in tensors:
if tensor.step == step:
# Default value for bytes field is empty byte string normally,
# see also "Optional Fields And Default Values" in protobuf
# documentation.
image = tensor.value.encoded_image
break

if image is None:
raise ParamValueError("Can not find the step with given train job id and tag.")

return image

+ 43
- 0
mindinsight/datavisual/processors/scalars_processor.py View File

@@ -0,0 +1,43 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Scalar Processor APIs."""
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.processors.base_processor import BaseProcessor


class ScalarsProcessor(BaseProcessor):
"""Scalar Processor."""

def get_metadata_list(self, train_id, tag):
"""
Builds a JSON-serializable object with information about scalars.

Args:
train_id (str): The ID of the events data.
tag (str): The name of the tag the scalars all belonging to.

Returns:
list[dict], a list of dictionaries containing the `wall_time`, `step`, `value` for each scalar.
"""
Validation.check_param_empty(train_id=train_id, tag=tag)
job_response = []
tensors = self._data_manager.list_tensors(train_id, tag)

for tensor in tensors:
job_response.append({
'wall_time': tensor.wall_time,
'step': tensor.step,
'value': tensor.value})
return dict(metadatas=job_response)

+ 65
- 0
mindinsight/datavisual/processors/train_task_manager.py View File

@@ -0,0 +1,65 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Train task manager."""

from mindinsight.datavisual.common import exceptions
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.processors.base_processor import BaseProcessor


class TrainTaskManager(BaseProcessor):
"""Train task manager."""

def get_single_train_task(self, plugin_name, train_id):
"""
get single train task.

Args:
plugin_name (str): Plugin name, refer `PluginNameEnum`.
train_id (str): Specify a training job to query.

Returns:
{'train_jobs': list[TrainJob]}, refer to restful api.
"""
Validation.check_param_empty(plugin_name=plugin_name, train_id=train_id)
Validation.check_plugin_name(plugin_name=plugin_name)
train_job = self._data_manager.get_train_job_by_plugin(train_id=train_id, plugin_name=plugin_name)
if train_job is None:
raise exceptions.SummaryLogPathInvalid()
return dict(train_jobs=[train_job])

def get_plugins(self, train_id, manual_update=True):
"""
Queries the plug-in data for the specified training job

Args:
train_id (str): Specify a training job to query.
manual_update (bool): Specifies whether to refresh automatically.

Returns:
dict, refer to restful api.
"""
Validation.check_param_empty(train_id=train_id)
train_job = self._data_manager.get_single_train_job(train_id, manual_update=manual_update)
if not train_job:
default_result = dict()
for plugin_name in PluginNameEnum.list_members():
default_result.update({plugin_name: list()})
return dict(plugins=default_result)

return dict(
plugins=train_job['tag_mapping']
)

+ 14
- 0
mindinsight/datavisual/proto_files/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 328
- 0
mindinsight/datavisual/proto_files/mindinsight_anf_ir.proto View File

@@ -0,0 +1,328 @@
// Copyright 2019 Huawei Technologies Co., Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package mindinsight;


// Versioning
enum Version {
// unknown version
UNKNOWWN_VERSION = 0;

// Initial version (IR VERSION 1), published on Sep 23, 2019
IR_VERSION = 0x0000000000000001;
}

// Data type definition
enum DataType {
DT_UNDEFINED = 0;
// Basic types.
DT_BOOL = 1; // bool

DT_INT8 = 2; // int8_t
DT_INT16 = 3; // int16_t
DT_INT32 = 4; // int32_t
DT_INT64 = 5; // int64_t

DT_UINT8 = 6; // uint8_t
DT_UINT16 = 7; // uint16_t
DT_UINT32 = 8; // uint32_t
DT_UINT64 = 9; // uint64_t

DT_FLOAT16 = 10; // float 16
DT_FLOAT32 = 11; // float 32
DT_FLOAT64 = 12; // float 64

DT_STRING = 13; // string
DT_TENSOR = 14; // tensor
DT_GRAPH = 15; // graph

// list type
DT_BOOLS = 16; // list of bool

DT_INTS8 = 17; // list of int8_t
DT_INTS16 = 18; // list of int16_t
DT_INTS32 = 19; // list of int32_t
DT_INTS64 = 20; // list of int64_t

DT_UINTS8 = 21; // list of uint8_t
DT_UINTS16 = 22; // list of uint16_t
DT_UINTS32 = 23; // list of uint32_t
DT_UINTS64 = 24; // list of uint64_t

DT_FLOATS16 = 25; // list of float16
DT_FLOATS32 = 26; // list of float32
DT_FLOATS64 = 27; // list of float64

DT_STRINGS = 28; // list of string
DT_TENSORS = 29; // list of tensor
DT_GRAPHS = 30; // list of graph

DT_TUPLE = 31; // tuple
DT_LIST = 32; // list
DT_DICT = 33; // dictionary

// other types
DT_NONE = 34; // None
DT_SYM_INST = 35; // Symbolic Key Instance

// type related type
DT_BASE_INT = 36; // type generic int
DT_BASE_UINT = 37; // type generate unsigned int
DT_BASE_FLOAT = 38; // type generate float
DT_TYPE = 39; // type type
DT_ANYTHING = 40; // type anything
}

// Value definiton for attribute value or parameter default value
message ValueProto {
// data type of value
optional DataType dtype = 1; // discriminator that indicates which field below is in use

// Exactly ONE of the following fields must be present for this version of the IR
optional bool bool_val = 2; // bool
optional int64 int_val = 3; // int
optional uint64 uint_val = 4; // uint
optional float float_val = 5; // float
optional double double_val = 6; // double
optional string str_val = 7; // string
optional TensorProto tensor_val = 8; // tensor value
optional GraphProto graph = 9; // graph

repeated bool bool_vals = 10; // list of bool
repeated int64 int_vals = 11; // list of int
repeated uint64 uint_vals = 12; // list of uint
repeated float float_vals = 13; // list of float
repeated double double_vals = 14; // list of double
repeated string str_vals = 15; // list of string
repeated TensorProto tensor_vals = 16; // list of tensor value
repeated GraphProto graphs = 17; // list of graph

// tuple or list
repeated ValueProto values = 18; // tuple, list of value

// dictionary
repeated NamedValueProto dict_val = 19; // dictionary info

// filed for type type
optional TypeProto type_val = 20; // type type info
}

message AttributeProto {
optional string name = 1; // attribute name
optional ValueProto value = 2; // attribute value
}

message NamedValueProto {
optional string key = 1; // attribute name
optional ValueProto value = 2; // attribute value
}

// Defines a tensor shape.
message TensorShapeProto {
// One dimension of the tensor.
message Dimension {
// Size of the tensor in that dimension.
// This value must be >= -1, but values of -1 are reserved for "unknown"
// shapes (values of -1 mean "unknown" dimension).
optional int64 size = 1;

// Optional name of the tensor dimension.
optional string name = 2;
};

repeated Dimension dim = 1;
}

// Types for graph input(parameter) and output
message TypeProto {

message Tensor {
// This field MUST have a valid DataType value except DT_TENSOR
optional DataType elem_type = 1;
optional TensorShapeProto shape = 2; // for scalar, this field is not set
}

// tuple type
message Sequence {
// The type and optional shape of elements of the tuple.
repeated TypeProto elem_types = 1;
};

// data type
optional DataType data_type = 1;

oneof value {
// The type of a tensor.
Tensor tensor_type = 2;

// The type of a tuple.
Sequence sequence_type = 3;
}
}

// Defines information on graph parameters, including the name, the type, and
// the default value of parameter if exists.
message ParameterProto {
optional string name = 1; // parameter name
optional TypeProto type = 2; // parameter type
optional ValueProto default_val = 3; // default value of parameter if exists
}

// Defines graph output information
message OutputProto {
optional string name = 1; // output node name
optional TypeProto type = 2; // output node type
}

// Define node input information
message InputProto {
enum EdgeType {
DATA_EDGE = 0; // data edge
CONTROL_EDGE = 1; // control edge
}

optional string name = 1;
optional EdgeType type = 2;
}

// Nodes
//
// Computation graphs are made up of a DAG of nodes, which represent what is
// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
//
// For example, it can be a node of type "Conv" that takes in an image, a filter
// tensor and a bias tensor, and produces the convolved output.
message NodeProto {
repeated InputProto input = 1; // namespace Value
optional string name = 2; // namespace Value

// The symbolic identifier of the Operator to execute.
optional string op_type = 3; // namespace Operator
// The domain of the OperatorSet that specifies the operator named by op_type.
optional string scope = 4; // namespace Domain

// Additional named attributes.
repeated AttributeProto attribute = 5;

// Optional type info of this node
optional TypeProto output_type = 6;

// other fields for debug
optional uint64 output_i = 7;
}

// Models
//
// ModelProto is a top-level file/container format for bundling a ML model and
// associating its computation graph with metadata.
//
// The semantics of the model are described by the associated GraphProto.
message ModelProto {
// ir version
optional int64 ir_version = 1;

// Domain name of the model.
// We use reverse domain names as name space indicators. For example:
// `com.facebook.fair` or `com.microsoft.cognitiveservices`
//
// Together with `model_version` and GraphProto.name, this forms the unique identity of
// the graph.
optional string domain = 2;

// The version of the graph encoded. See Version enum below.
optional int64 model_version = 3;

// The parameterized graph that is evaluated to execute the model.
optional GraphProto graph = 4;

// metadata info of opeartors
optional OperatorSetProto metadata_operators = 5;
};

message OperatorProto {
optional string name = 1; // used as key, must be distinct
optional bytes config = 2; // operator config info
optional bytes obj_info = 3; // operator related object info, e.g. content of operator binary or name
};

message OperatorSetProto {
repeated OperatorProto operators = 1;
optional string summary = 2; // summary info of operators, e.g. file position of operators file
}

// Graphs
//
// A graph defines the computational logic of a model and is comprised of a parameterized
// list of nodes that form a directed acyclic graph based on their inputs and outputs.
// This is the equivalent of the "network" or "graph" in many deep learning
// frameworks.
message GraphProto {
// The nodes in the graph, sorted topologically.
repeated NodeProto node = 1;

// The name of the graph.
optional string name = 2; // namespace Graph

// The parameters(inputs) and outputs of the graph.
repeated ParameterProto parameters = 3;
repeated OutputProto outputs = 4;

// Constants used in this graph
repeated NamedValueProto const_vals = 5;
}

// Tensors
//
// A serialized tensor value.
message TensorProto {
// The shape of the tensor.
repeated int64 dims = 1;

// The data type of the tensor.
// This field MUST have a valid DataType value except DT_TENSOR
optional DataType data_type = 2;

// Tensor content must be organized in row-major order.
//
// Depending on the data_type field, exactly one of the fields below with
// name ending in _data is used to store the elements of the tensor.

// For float values
repeated float float_data = 3 [packed = true];

// For int32, uint8, int8, uint16, int16, and bool values
// When this field is present, the data_type field MUST be
// INT32, INT16, INT8, UINT16, UINT8, or BOOL
repeated int32 int32_data = 4 [packed = true];

// For int64.
// When this field is present, the data_type field MUST be INT64
repeated int64 int64_data = 5 [packed = true];

// For double
// When this field is present, the data_type field MUST be DOUBLE
repeated double double_data = 6 [packed = true];

// For uint64 and uint32 values
// When this field is present, the data_type field MUST be
// UINT32 or UINT64
repeated uint64 uint64_data = 7 [packed = true];

// Store raw tensor content. When this raw_data field is used to store tensor value,
// elements MUST be stored in as fixed-width, little-endian order.
optional bytes raw_data = 8;
}

+ 1381
- 0
mindinsight/datavisual/proto_files/mindinsight_anf_ir_pb2.py
File diff suppressed because it is too large
View File


+ 155
- 0
mindinsight/datavisual/proto_files/mindinsight_summary.proto View File

@@ -0,0 +1,155 @@
// Copyright 2019 Huawei Technologies Co., Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package mindinsight;
option cc_enable_arenas = true;

// The ANF IR define, include the tensor and graph define
import "mindinsight_anf_ir.proto";

// Event Protocol buffer, Top define
message Event {
// Timestamp
required double wall_time = 1;

// The step of train.
optional int64 step = 2;

oneof what {
// An event file was started, with the specified version.
// Now version is "Mindspore.Event:1"
string version = 3;

// GraphDef.
GraphProto graph_def = 4;

// Summary data
Summary summary = 5;

// Train lineage
TrainLineage train_lineage = 6;

// Evaluation lineage
EvaluationLineage evaluation_lineage = 7;

// dataset graph
DatasetGraph dataset_graph = 9;
}
}

// TrainLineage records infos of a train.
message TrainLineage{
message HyperParameters{
optional string optimizer = 1;
optional float learning_rate = 2;
optional string loss_function = 3;
optional int32 epoch = 4;
optional string parallel_mode = 5;
optional int32 device_num = 6;
optional int32 batch_size = 8;
}

message TrainDataset{
optional string train_dataset_path = 1;
optional int32 train_dataset_size = 2;
}

message Algorithm{
optional string network = 1;
optional float loss = 2;
}

message Model{
optional string path = 3;
optional int64 size = 4;
}

optional HyperParameters hyper_parameters = 1;
optional TrainDataset train_dataset = 2;
optional Algorithm algorithm = 3;
optional Model model = 4;
}

//EvalLineage records infos of evaluation.
message EvaluationLineage{
message ValidDataset{
optional string valid_dataset_path = 1;
optional int32 valid_dataset_size = 2;
}

optional string metric = 2;
optional ValidDataset valid_dataset = 3;
}

// A Summary is a set of named values that be produced regularly during training
message Summary {
message Image {
// Dimensions of the image.
required int32 height = 1;
required int32 width = 2;
// Valid colorspace values are
// 1 - grayscale
// 2 - grayscale + alpha
// 3 - RGB
// 4 - RGBA
// 5 - DIGITAL_YUV
// 6 - BGRA
required int32 colorspace = 3;
// Image data in encoded format. Now only support the RGB.
required bytes encoded_image = 4;
}

message Value {
// Tag name for the data.
required string tag = 1;

// Value associated with the tag.
oneof value {
float scalar_value = 3;
Image image = 4;
TensorProto tensor = 8;
}
}

// Set of values for the summary.
repeated Value value = 1;
}

// DatasetGraph
message DatasetGraph {
repeated DatasetGraph children = 1;
optional OperationParameter parameter = 2;
repeated Operation operations = 3;
optional Operation sampler = 4;
}

message Operation {
optional OperationParameter operationParam = 1;
repeated int32 size = 2;
repeated float weights = 3;
}

message OperationParameter{
map<string, string> mapStr = 1;
map<string, StrList> mapStrList = 2;
map<string, bool> mapBool = 3;
map<string, int32> mapInt = 4;
map<string, double> mapDouble = 5;
}

message StrList {
repeated string strValue = 1;
}

+ 1161
- 0
mindinsight/datavisual/proto_files/mindinsight_summary_pb2.py
File diff suppressed because it is too large
View File


+ 16
- 0
mindinsight/datavisual/utils/__init__.py View File

@@ -0,0 +1,16 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Utils."""
from .tools import find_app_package

+ 57
- 0
mindinsight/datavisual/utils/crc32/base.h View File

@@ -0,0 +1,57 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DATAVISUAL_UTILS_CRC32_BASE_H_
#define DATAVISUAL_UTILS_CRC32_BASE_H_

#include <memory>
#include <string>
#include "securec/include/securec.h"

using string = std::string;

using int8 = int8_t;
using int16 = int16_t;
using int32 = int32_t;
using int64 = int64_t;

using uint8 = uint8_t;
using uint16 = uint16_t;
using uint32 = uint32_t;
using uint64 = uint64_t;

// check the null point, Only log it in if(): The value is null
#define EXCEPT_CHECK_NULL(value) \
do { \
if (value == nullptr) { \
break; \
} \
} while (0)

// implement common define function
// Get the 32 bits align value
inline uint32 DecodeFixed32(const char* ptr) {
uint32 result = 0;
if (EOK != memcpy_s(&result, sizeof(result), ptr, sizeof(result))) {
return result;
}
return result;
}

// Used to fetch a naturally-aligned 32-bit word in little endian byte-order
inline uint32 LE_LOAD32(const uint8_t* p) { return DecodeFixed32(reinterpret_cast<const char*>(p)); }

#endif // DATAVISUAL_UTILS_CRC32_BASE_H_

+ 316
- 0
mindinsight/datavisual/utils/crc32/crc32.cc View File

@@ -0,0 +1,316 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "crc32/crc32.h"
#include <stdint.h>

const unsigned int CRC_TABLE_SIZE = 256;

static const uint32 crc_table_o32[CRC_TABLE_SIZE] = {
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, 0x8AD958CF,
0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, 0x105EC76F, 0xE235446C,
0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57,
0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E,
0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD,
0x1642AE59, 0xE4292D5A, 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696,
0x6EF07595, 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, 0x5125DAD3,
0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, 0xDBFC821C, 0x2997011F,
0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, 0x61C69362, 0x93AD1061, 0x80FDE395,
0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312,
0x44694011, 0x5739B3E5, 0xA55230E6, 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE,
0xDDE0EB2A, 0x2F8B6829, 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90,
0x563C5F93, 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, 0x1871A4D8,
0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, 0xA24BB5A6, 0x502036A5,
0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E,
0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19,
0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8,
0xE52CC12C, 0x1747422F, 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3,
0x9D9E1AE0, 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, 0xE330A81A,
0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, 0x69E9F0D5, 0x9B8273D6,
0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, 0xF36E6F75, 0x0105EC76, 0x12551F82,
0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351};

static const uint32 crc_table_o40[CRC_TABLE_SIZE] = {
0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945, 0x9D14C3B8,
0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, 0x3FC5F181, 0x2C6769F6,
0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4, 0xA2D13239, 0xB173AA4E, 0x859402D7,
0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C, 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B,
0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47, 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166,
0xBFB7D911, 0x8B507188, 0x98F2E9FF, 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28,
0x298143B1, 0x3A23DBC6, 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009,
0xA737187E, 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9, 0xC0D23785,
0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0, 0x5DC6F43D, 0x4E646C4A,
0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78, 0x809C2506, 0x933EBD71, 0xA7D915E8,
0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43, 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27,
0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB, 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B,
0xE2712D2C, 0xD69685B5, 0xC5341DC2, 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94,
0x4B82460D, 0x5820DE7A, 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB,
0x81AE33BC, 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D, 0x5912C8C0,
0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185, 0x844819FB, 0x97EA818C,
0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE, 0x195CDA43, 0x0AFE4234, 0x3E19EAAD,
0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306, 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3,
0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F, 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E,
0x7BB1D269, 0x4F567AF0, 0x5CF4E287, 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556,
0x6D1B6DCF, 0x7EB9F5B8, 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77,
0xE3AD3600, 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781, 0x7B5FDFFF,
0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA, 0xE64B1C47, 0xF5E98430,
0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502, 0x449A2E7E, 0x5738B609, 0x63DF1E90,
0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B, 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F,
0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483};

static const uint32 crc_table_o48[CRC_TABLE_SIZE] = {
0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469, 0x38513EC5,
0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, 0x70A27D8A, 0xD5E3EFF4,
0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3, 0x48F3434F, 0xEDB2D131, 0x079C1142,
0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726, 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67,
0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D, 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB,
0xE28AF3B5, 0x08A433C6, 0xADE5A1B8, 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA,
0x40577089, 0xE516E2F7, 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C,
0xDD47DC32, 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75, 0xB7C7FD53,
0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A, 0x8F96C396, 0x2AD751E8,
0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF, 0x26217BCD, 0x8360E9B3, 0x694E29C0,
0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4, 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B,
0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161, 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D,
0x6D1C3023, 0x8732F050, 0x2273622E, 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6,
0xBF63CE95, 0x1A225CEB, 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154,
0xFFD7132A, 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0, 0xC3D4340C,
0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065, 0x6A638C57, 0xCF221E29,
0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E, 0x5232B292, 0xF77320EC, 0x1D5DE09F,
0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB, 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE,
0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4, 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02,
0x190FF97C, 0xF321390F, 0x5660AB71, 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE,
0x9DF3018D, 0x38B293F3, 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48,
0x00E3AD36, 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC, 0xAD060C8E,
0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7, 0x9557324B, 0x3016A035,
0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622, 0xDDA47104, 0x78E5E37A, 0x92CB2309,
0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D, 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2,
0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8};

static const uint32 crc_table_o56[CRC_TABLE_SIZE] = {
0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA, 0xF64463E6,
0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, 0xE964B13D, 0x34211B85,
0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7, 0x1F20D2DB, 0xC2657863, 0xA047F15A,
0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11, 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2,
0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41, 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E,
0x8706EC26, 0xE524651F, 0x3861CFA7, 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD,
0xFA04B7C4, 0x27411D7C, 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422,
0xD1057E9A, 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB, 0x42C2EEDA,
0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610, 0xB4868D3C, 0x69C32784,
0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6, 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED,
0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6, 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3,
0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040, 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2,
0x3380611A, 0x51A2E823, 0x8CE7429B, 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC,
0xA7E68BC5, 0x7AA3217D, 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D,
0x4BA071F5, 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8, 0x4D801BE4,
0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E, 0x8585DDB4, 0x58C0770C,
0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E, 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3,
0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698, 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0,
0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443, 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C,
0x3CC29424, 0x5EE01D1D, 0x83A5B7A5, 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93,
0x3D4384AA, 0xE0062E12, 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C,
0x16424DF4, 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9, 0x2E238253,
0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99, 0xD867E1B5, 0x05224B0D,
0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F, 0xC747336E, 0x1A0299D6, 0x782010EF,
0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4, 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1,
0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842};

static const uint32 crc_table_o64[CRC_TABLE_SIZE] = {
0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44, 0xC5670B91,
0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, 0x8F2261D3, 0xB7330E7F,
0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97, 0x4A456A42, 0x725405EE, 0x3A67B51A,
0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406, 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3,
0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13, 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076,
0x069B6FDA, 0x4EA8DF2E, 0x76B9B082, 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598,
0x04EDB56C, 0x3CFCDAC0, 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD,
0xF99BD151, 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B, 0xB8730B7D,
0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539, 0x7D1400EC, 0x45056F40,
0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8, 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1,
0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD, 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C,
0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C, 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A,
0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E, 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7,
0xF6DBD453, 0xCECABBFF, 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4,
0xC6D4DB18, 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB, 0x24E7BF1E,
0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A, 0x750A600B, 0x4D1B0FA7,
0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F, 0xB06D6B9A, 0x887C0436, 0xC04FB4C2,
0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE, 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C,
0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C, 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9,
0xE71BDB55, 0xAF286BA1, 0x9739040D, 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE,
0xC994DE1A, 0xF185B1B6, 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B,
0x34E2BA27, 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4, 0x425B0AA5,
0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1, 0x873C0134, 0xBF2D6E98,
0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70, 0xCD796B76, 0xF56804DA, 0xBD5BB42E,
0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532, 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013,
0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3};

static const uint32 crc_table_o72[CRC_TABLE_SIZE] = {
0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD, 0x6006181F,
0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, 0xC00C303E, 0x2F3C5B27,
0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93, 0xA00A2821, 0x4F3A4338, 0x7B8688E2,
0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C, 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57,
0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20, 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5,
0xB83752FC, 0x8C8B9926, 0x63BBF23F, 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD,
0x2C81B107, 0xC3B1DA1E, 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918,
0xA3B7C201, 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59, 0xCE086BD5,
0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778, 0xAE0E73CA, 0x413E18D3,
0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67, 0x8BF04D66, 0x64C0267F, 0x507CEDA5,
0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB, 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3,
0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4, 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F,
0x16392136, 0x2285EAEC, 0xCDB581F5, 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929,
0x4283F2F3, 0xADB399EA, 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062,
0x9A414B7B, 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45, 0xBC029FF7,
0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A, 0x99FCA15B, 0x76CCCA42,
0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6, 0xF9FAB944, 0x16CAD25D, 0x22761987,
0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9, 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF,
0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8, 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D,
0x6433D514, 0x508F1ECE, 0xBFBF75D7, 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053,
0x7B757B89, 0x94451090, 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396,
0xF443088F, 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1, 0x97F8FAB0,
0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D, 0xF7FEE2AF, 0x18CE89B6,
0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02, 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D,
0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623, 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B,
0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C};

static const uint32 crc_table_o80[CRC_TABLE_SIZE] = {
0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089, 0x4E2DFD53,
0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, 0x9C5BFAA6, 0xF458D66E,
0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F, 0xD27607F5, 0xBA752B3D, 0x02705E65,
0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C, 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5,
0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334, 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F,
0xBE9597F7, 0x0690E2AF, 0x6E93CE67, 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002,
0xD4E6E55A, 0xBCE5C992, 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809,
0xF2C834C1, 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0, 0xE6ECFDDC,
0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55, 0xA8C1008F, 0xC0C22C47,
0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006, 0x47EC84C7, 0x2FEFA80F, 0x97EADD57,
0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E, 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC,
0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D, 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0,
0x16549778, 0xAE51E220, 0xC652CEE8, 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B,
0xE07C1F73, 0x887F33BB, 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5,
0xE88BBE7D, 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB, 0x27180901,
0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988, 0xC8358D49, 0xA036A181,
0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0, 0x8618701A, 0xEE1B5CD2, 0x561E298A,
0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093, 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7,
0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766, 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D,
0xD7A063A5, 0x6FA516FD, 0x07A63A35, 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097,
0xFA3F95CF, 0x923CB907, 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C,
0xDC114454, 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2, 0xB2828A33,
0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA, 0xFCAF7760, 0x94AC5BA8,
0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9, 0x2ED97095, 0x46DA5C5D, 0xFEDF2905,
0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C, 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E,
0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F};

static const uint32 crc_table_o88[CRC_TABLE_SIZE] = {
0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504, 0x423B04DA,
0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, 0x847609B4, 0xCD4A7493,
0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0, 0xC64D0D6E, 0x8F717049, 0x5435F720,
0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A, 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0,
0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D, 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E,
0x271A9E09, 0xFC5E1960, 0xB5626447, 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367,
0x3A13140E, 0x732F6929, 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4,
0x31146DF3, 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC, 0x9E76C286,
0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782, 0xDC4DC65C, 0x9571BB7B,
0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358, 0x1700AEAB, 0x5E3CD38C, 0x857854E5,
0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF, 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18,
0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75, 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572,
0xFB575855, 0x2013DF3C, 0x692FA21B, 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F,
0x6228DBE6, 0x2B14A6C1, 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47,
0xCE589360, 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4, 0xF24C9B0A,
0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E, 0x3901F3FD, 0x703D8EDA,
0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9, 0x7B3AF727, 0x32068A00, 0xE9420D69,
0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223, 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20,
0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D, 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE,
0x976D01D9, 0x4C2986B0, 0x0515FB97, 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C,
0x9D642575, 0xD4585852, 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF,
0x96635C88, 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C, 0x230138CF,
0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB, 0x613A3C15, 0x28064132,
0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911, 0xA777317B, 0xEE4B4C5C, 0x350FCB35,
0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F, 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8,
0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5};

// Use the 8 table to calc crc32c value
inline void CRC32T8(uint32 *crc, const uint8_t **p) {
auto c = static_cast<uint32>(*crc ^ LE_LOAD32(*p));
*p += 4;
*crc = crc_table_o88[c & 0xff] ^ crc_table_o80[(c >> 8) & 0xff] ^ crc_table_o72[(c >> 16) & 0xff] ^
crc_table_o64[(c >> 24) & 0xff];
c = static_cast<uint32>(LE_LOAD32(*p));
*crc = (*crc) ^ crc_table_o56[c & 0xff] ^ crc_table_o48[(c >> 8) & 0xff] ^ crc_table_o40[(c >> 16) & 0xff] ^
crc_table_o32[(c >> 24) & 0xff];
*p += 4;
}

// calc the crc32c value
uint32 MakeCrc32c(uint32 init_crc, const char *data, size_t size) {
EXCEPT_CHECK_NULL(data);
uint32 crc = init_crc ^ 0xffffffffu;
const unsigned int OFFSET = 8;

// Get the origin begin and end address(not alignment)
auto *bp = reinterpret_cast<const uint8_t *>(data);
const uint8_t *ep = bp + size;

// Get the alignment address
// Point x at first 4-byte aligned byte in string.
// This might be just past the end of the string.
auto pval = reinterpret_cast<uintptr_t>(bp);
auto *bp_align = reinterpret_cast<const uint8_t *>(MEM_ALIGN(pval, 2));

// process the not alignment bits when size < 4 byte
if (bp_align <= ep) {
// Process bytes until finished or p is 4-byte aligned
while (bp != bp_align) {
crc = crc_table_o32[(crc & 0xff) ^ (*bp++)] ^ (crc >> 8);
}
}

// Process bytes 8 at a time use the 8 table
while ((ep - bp) >= OFFSET) {
CRC32T8(&crc, &bp);
}

// Process the last not alignment bytes
while (bp < ep) {
crc = crc_table_o32[(crc & 0xff) ^ (*bp++)] ^ (crc >> 8);
}
return crc ^ 0xffffffffu;
}

+ 54
- 0
mindinsight/datavisual/utils/crc32/crc32.h View File

@@ -0,0 +1,54 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DATAVISUAL_UTILS_CRC32_CRC32_H_
#define DATAVISUAL_UTILS_CRC32_CRC32_H_

#include <pybind11/pybind11.h>
#include <stddef.h>
#include <cstdint>
#include "crc32/base.h"

// Align n to (1 << m) byte boundary
#define MEM_ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1))

// Masked for crc.
static constexpr uint32 kMaskDelta = 0xa282ead8ul;

// Provide the Crc32c function

// Calculate the crc32c value, use the 8 table method
uint32 MakeCrc32c(uint32 init_crc, const char* data, size_t size);

uint32 GetMaskCrc32cValue(const char* data, size_t n) {
auto crc = MakeCrc32c(0, data, n);
return crc;
}

uint32 GetValueFromStr(const char* crc_str) {
uint32 crc = DecodeFixed32(crc_str);
uint32 rot = crc - kMaskDelta;
return ((rot >> 17) | (rot << 15));
}

PYBIND11_MODULE(crc32, m) {
m.doc() = "crc util";
m.def("MakeCrc32c", &MakeCrc32c, "A function calculating the crc32c value, use the 8 table method");
m.def("GetMaskCrc32cValue", &GetMaskCrc32cValue, "A function return the crc32c value");
m.def("GetValueFromStr", &GetValueFromStr, "A function return the crc32c value from string");
}

#endif // DATAVISUAL_UTILS_CRC32_CRC32_H_

+ 155
- 0
mindinsight/datavisual/utils/tools.py View File

@@ -0,0 +1,155 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Common Tools."""
import imghdr
import math
import os

from numbers import Number
from urllib.parse import unquote

from mindinsight.utils import exceptions

_IMG_EXT_TO_MIMETYPE = {
'bmp': 'image/bmp',
'gif': 'image/gif',
'jpeg': 'image/jpeg',
'png': 'image/png',
}
_DEFAULT_IMAGE_MIMETYPE = 'application/octet-stream'


def find_app_package():
"""Find package in current directory."""
backend_dir = os.path.realpath(os.path.join(__file__, os.pardir, os.pardir, os.pardir, "backend"))
packages = []
for file in os.listdir(backend_dir):
file_path = os.path.join(backend_dir, file)
if os.path.isfile(file_path):
continue
if not os.path.isfile(os.path.join(file_path, '__init__.py')):
continue
rel_path = os.path.relpath(file_path, backend_dir)
package = rel_path.replace(os.path.sep, '.')
package = f"mindinsight.backend.{package}"
packages.append(package)
return packages


def to_str(bytes_or_text, encode="utf-8"):
"""Bytes transform string."""
if isinstance(bytes_or_text, bytes):
return bytes_or_text.decode(encode)
if isinstance(bytes_or_text, str):
return bytes_or_text

raise TypeError("Param isn't str or bytes type, param={}".format(bytes_or_text))


def to_int(param, param_name):
"""
Transfer param to int type.

Args:
param (Any): A param transformed.
param_name (str): Param name.

Returns:
int, value after transformed.

"""
try:
param = int(param)
except ValueError:
raise exceptions.ParamTypeError(param_name, 'Integer')
return param


def str_to_bool(param, param_name):
"""
Check param and transform it to bool.

Args:
param (str): 'true' or 'false' is valid.
param_name (str): Param name.

Returns:
bool, if param is 'true', case insensitive.

Raises:
ParamValueError: If the value of param is not 'false' and 'true'.

"""
if not isinstance(param, str):
raise exceptions.ParamTypeError(param_name, 'str')

if param.lower() not in ['false', 'true']:
raise exceptions.ParamValueError("The value of %s must be 'false' or 'true'." % param_name)
param = (param.lower() == 'true')

return param


def get_img_mimetype(img_data):
"""
Recognize image headers and generate image MIMETYPE.

Args:
img_data (bin): Binary character stream of image.

Returns:
str, a MIMETYPE of the give image.
"""
image_type = imghdr.what(None, img_data)
mimetype = _IMG_EXT_TO_MIMETYPE.get(image_type, _DEFAULT_IMAGE_MIMETYPE)
return mimetype


def get_train_id(request):
"""
Get train ID from requst query string and unquote content.

Args:
request (FlaskRequest): Http request instance.

Returns:
str, unquoted train ID.
"""
train_id = request.args.get('train_id')
if train_id is not None:
try:
train_id = unquote(train_id, errors='strict')
except UnicodeDecodeError:
raise exceptions.ParamValueError('Unquote error with strict mode')
return train_id


def if_nan_inf_to_none(name, value):
"""
Transform value to None if it is NaN or Inf.

Args:
name (str): Name of value.
value (float): A number transformed.

Returns:
float, if value is NaN or Inf, return None.

"""
if not isinstance(value, Number):
raise exceptions.ParamTypeError(name, 'number')
if math.isnan(value) or math.isinf(value):
value = None
return value

+ 33
- 0
mindinsight/lineagemgr/__init__.py View File

@@ -0,0 +1,33 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Lineagemgr Module Introduction.

This module provides Python APIs to collect and query the lineage of models.
Users can add the TrainLineage/EvalLineage callback to the MindSpore train/eval callback list to
collect the key parameters and results, such as, the name of the network and optimizer, the
evaluation metric and results.
The APIs can be used to get the lineage information of the models. For example,
what hyperparameter is used in the model training, which model has the highest
accuracy among all the versions, etc.
"""
from mindinsight.lineagemgr.api.model import get_summary_lineage, filter_summary_lineage
from mindinsight.lineagemgr.common.log import logger
try:
from mindinsight.lineagemgr.collection.model.model_lineage import TrainLineage, EvalLineage
except (ModuleNotFoundError, NameError, ImportError):
logger.warning('Not found MindSpore!')

__all__ = ["TrainLineage", "EvalLineage", "get_summary_lineage", "filter_summary_lineage"]

+ 14
- 0
mindinsight/lineagemgr/api/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 292
- 0
mindinsight/lineagemgr/api/model.py View File

@@ -0,0 +1,292 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This file is used to define the model lineage python api."""
import os

from mindinsight.lineagemgr.common.exceptions.exceptions import LineageParamValueError, \
LineageFileNotFoundError, LineageQuerySummaryDataError, LineageParamSummaryPathError, \
LineageQuerierParamException, LineageDirNotExistError, LineageSearchConditionParamError, \
LineageParamTypeError, LineageSummaryParseException
from mindinsight.lineagemgr.common.log import logger as log
from mindinsight.lineagemgr.common.path_parser import SummaryPathParser
from mindinsight.lineagemgr.common.validator.model_parameter import SearchModelConditionParameter
from mindinsight.lineagemgr.common.validator.validate import validate_filter_key
from mindinsight.lineagemgr.common.validator.validate import validate_search_model_condition, \
validate_condition, validate_path
from mindinsight.lineagemgr.querier.querier import Querier
from mindinsight.utils.exceptions import MindInsightException


def get_summary_lineage(summary_dir, keys=None):
"""
Get the lineage information according to summary directory and keys.

The function queries lineage information of single train process
corresponding to the given summary directory. Users can query the
information according to `keys`.

Args:
summary_dir (str): The summary directory. It contains summary logs for
one training.
keys (list[str]): The filter keys of lineage information. The acceptable
keys are `metric`, `hyper_parameters`, `algorithm`, `train_dataset`,
`model`, `valid_dataset` and `dataset_graph`. If it is `None`, all
information will be returned. Default: None.

Returns:
dict, the lineage information for one training.

Raises:
LineageParamSummaryPathError: If summary path is invalid.
LineageQuerySummaryDataError: If querying summary data fails.
LineageFileNotFoundError: If the summary log file is not found.

Examples:
>>> summary_dir = "/path/to/summary"
>>> summary_lineage_info = get_summary_lineage(summary_dir)
>>> hyper_parameters = get_summary_lineage(summary_dir, keys=["hyper_parameters"])
"""
try:
summary_dir = validate_path(summary_dir)
except MindInsightException as error:
log.error(str(error))
log.exception(error)
raise LineageParamSummaryPathError(str(error.message))

if keys is not None:
validate_filter_key(keys)

summary_path = SummaryPathParser.get_latest_lineage_summary(summary_dir)
if summary_path is None:
log.error('There is no summary log file under summary_dir.')
raise LineageFileNotFoundError(
'There is no summary log file under summary_dir.'
)

try:
result = Querier(summary_path).get_summary_lineage(
summary_dir, filter_keys=keys)
except LineageSummaryParseException:
return {}
except (LineageQuerierParamException, LineageParamTypeError) as error:
log.error(str(error))
log.exception(error)
raise LineageQuerySummaryDataError("Get summary lineage failed.")

return result[0]


def filter_summary_lineage(summary_base_dir, search_condition=None):
"""
Filter the lineage information under summary base directory according to search condition.

Users can filter and sort all lineage information according to the search
condition. The supported filter fields include `summary_dir`, `network`,
etc. The filter conditions include `eq`, `lt`, `gt`, `le`, `ge` and `in`.
At the same time, the combined use of these fields and conditions is
supported. If you want to sort based on filter fields, the field of
`sorted_name` and `sorted_type` should be specified.

Users can use `lineage_type` to decide what kind of lineage information to
query. If the `lineage_type` is `dataset`, the query result is only the
lineage information related to data augmentation. If the `lineage_type` is
`model` or `None`, the query result is all lineage information.

Users can paginate query result based on `offset` and `limit`. The `offset`
refers to page number. The `limit` refers to the number in one page.

Args:
summary_base_dir (str): The summary base directory. It contains summary
directories generated by training.
search_condition (dict): The search condition. When filtering and
sorting, in addition to the following supported fields, fields
prefixed with `metric_` are also supported. The fields prefixed with
`metric_` are related to the `metrics` parameter in the training
script. For example, if the key of `metrics` parameter is
`accuracy`, the field should be `metric_accuracy`. Default: None.

- summary_dir (dict): The filter condition of summary directory.

- loss_function (dict): The filter condition of loss function.

- train_dataset_path (dict): The filter condition of train dataset path.

- train_dataset_count (dict): The filter condition of train dataset count.

- test_dataset_path (dict): The filter condition of test dataset path.

- test_dataset_count (dict): The filter condition of test dataset count.

- network (dict): The filter condition of network.

- optimizer (dict): The filter condition of optimizer.

- learning_rate (dict): The filter condition of learning rate.

- epoch (dict): The filter condition of epoch.

- batch_size (dict): The filter condition of batch size.

- loss (dict): The filter condition of loss.

- model_size (dict): The filter condition of model size.

- dataset_mark (dict): The filter condition of dataset mark.

- offset (int): Page number, the value range is [0, 100000].

- limit (int): The number in one page, the value range is [1, 100].

- sorted_name (str): Specify which field to sort by.

- sorted_type (str): Specify sort order. It can be `ascending` or
`descending`.

- lineage_type (str): It decides what kind of lineage information to
query. It can be `dataset` or `model`. If it is `dataset`,
the query result is only the lineage information related to data
augmentation. If it is `model` or `None`, the query result is all
lineage information.

Returns:
dict, all lineage information under summary base directory according to
search condition.

Raises:
LineageSearchConditionParamError: If search_condition param is invalid.
LineageParamSummaryPathError: If summary path is invalid.
LineageFileNotFoundError: If the summary log file is not found.
LineageQuerySummaryDataError: If querying summary log file data fails.

Examples:
>>> summary_base_dir = "/path/to/summary_base"
>>> search_condition = {
>>> 'summary_dir': {
>>> 'in': [
>>> os.path.join(summary_base_dir, 'summary_1'),
>>> os.path.join(summary_base_dir, 'summary_2'),
>>> os.path.join(summary_base_dir, 'summary_3')
>>> ]
>>> },
>>> 'loss': {
>>> 'gt': 2.0
>>> },
>>> 'batch_size': {
>>> 'ge': 128,
>>> 'le': 256
>>> },
>>> 'metric_accuracy': {
>>> 'lt': 0.1
>>> },
>>> 'sorted_name': 'summary_dir',
>>> 'sorted_type': 'descending',
>>> 'limit': 3,
>>> 'offset': 0,
>>> 'lineage_type': 'model'
>>> }
>>> summary_lineage = filter_summary_lineage(summary_base_dir)
>>> summary_lineage_filter = filter_summary_lineage(summary_base_dir, search_condition)
"""
try:
summary_base_dir = validate_path(summary_base_dir)
except (LineageParamValueError, LineageDirNotExistError) as error:
log.error(str(error))
log.exception(error)
raise LineageParamSummaryPathError(str(error.message))

search_condition = {} if search_condition is None else search_condition

try:
validate_condition(search_condition)
validate_search_model_condition(SearchModelConditionParameter, search_condition)
except MindInsightException as error:
log.error(str(error))
log.exception(error)
raise LineageSearchConditionParamError(str(error.message))

try:
search_condition = _convert_relative_path_to_abspath(summary_base_dir, search_condition)
except (LineageParamValueError, LineageDirNotExistError) as error:
log.error(str(error))
log.exception(error)
raise LineageParamSummaryPathError(str(error.message))

summary_path = SummaryPathParser.get_latest_lineage_summaries(summary_base_dir)
if not summary_path:
log.error('There is no summary log file under summary_base_dir.')
raise LineageFileNotFoundError(
'There is no summary log file under summary_base_dir.'
)

try:
result = Querier(summary_path).filter_summary_lineage(
condition=search_condition
)
except LineageSummaryParseException:
result = {'object': [], 'count': 0}
except (LineageQuerierParamException, LineageParamTypeError) as error:
log.error(str(error))
log.exception(error)
raise LineageQuerySummaryDataError("Filter summary lineage failed.")

return result


def _convert_relative_path_to_abspath(summary_base_dir, search_condition):
"""
Convert relative path to absolute path.

Args:
summary_base_dir (str): The summary base directory.
search_condition (dict): The search condition.

Returns:
dict, the updated search_condition.

Raises:
LineageParamValueError: If the value of input_name is invalid.
"""
if ("summary_dir" not in search_condition) or (not search_condition.get("summary_dir")):
return search_condition

summary_dir_condition = search_condition.get("summary_dir")
if not set(summary_dir_condition.keys()).issubset(['in', 'eq']):
raise LineageParamValueError("Invalid operation of summary dir.")

if 'in' in summary_dir_condition:
summary_paths = []
for summary_dir in summary_dir_condition.get('in'):
if summary_dir.startswith('./'):
abs_dir = os.path.join(
summary_base_dir, summary_dir[2:]
)
abs_dir = validate_path(abs_dir)
else:
abs_dir = validate_path(summary_dir)
summary_paths.append(abs_dir)
search_condition.get('summary_dir')['in'] = summary_paths

if 'eq' in summary_dir_condition:
summary_dir = summary_dir_condition.get('eq')
if summary_dir.startswith('./'):
abs_dir = os.path.join(
summary_base_dir, summary_dir[2:]
)
abs_dir = validate_path(abs_dir)
else:
abs_dir = validate_path(summary_dir)
search_condition.get('summary_dir')['eq'] = abs_dir

return search_condition

+ 14
- 0
mindinsight/lineagemgr/collection/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 14
- 0
mindinsight/lineagemgr/collection/model/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 37
- 0
mindinsight/lineagemgr/collection/model/base.py View File

@@ -0,0 +1,37 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Metadata of lineage collection."""


class Metadata:
"""Initialize parameters used in model lineage management."""
train_dataset_path = 'train_dataset_path'
valid_dataset_path = 'valid_dataset_path'
train_network = 'train_network'
loss_function = 'loss_function'
loss = 'loss'
optimizer = 'optimizer'
learning_rate = 'learning_rate'
epoch = 'epoch'
step_num = 'step_num'
parallel_mode = 'parallel_mode'
device_num = 'device_num'
batch_size = 'batch_size'
model_path = 'model_path'
model_ckpt = 'model_ckpt'
model_size = 'model_size'
metrics = 'metrics'
train_dataset_size = 'train_dataset_size'
valid_dataset_size = 'valid_dataset_size'

+ 621
- 0
mindinsight/lineagemgr/collection/model/model_lineage.py View File

@@ -0,0 +1,621 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This module is used to collect lineage information of model training."""
import json
import os

import numpy as np

from mindinsight.lineagemgr.summary.summary_record import LineageSummary
from mindinsight.utils.exceptions import \
MindInsightException
from mindinsight.lineagemgr.common.validator.validate import validate_train_run_context, \
validate_eval_run_context, validate_file_path, validate_network, \
validate_int_params, validate_summary_record, validate_raise_exception
from mindinsight.lineagemgr.common.exceptions.error_code import LineageErrors, LineageErrorMsg
from mindinsight.lineagemgr.common.exceptions.exceptions import LineageParamRunContextError, \
LineageGetModelFileError, LineageLogError
from mindinsight.lineagemgr.common.log import logger as log
from mindinsight.lineagemgr.common.utils import try_except
from mindinsight.lineagemgr.common.validator.model_parameter import RunContextArgs, \
EvalParameter
from mindinsight.lineagemgr.collection.model.base import Metadata

try:
from mindspore.common.tensor import Tensor
from mindspore.train.callback import Callback, RunContext, ModelCheckpoint, SummaryStep
from mindspore.nn import Cell, Optimizer, WithLossCell, TrainOneStepWithLossScaleCell
from mindspore.nn.loss.loss import _Loss
from mindspore.dataset.engine import Dataset, MindDataset
import mindspore.dataset as ds
except (ImportError, ModuleNotFoundError):
log.warning('MindSpore Not Found!')


class TrainLineage(Callback):
"""
Collect lineage of a training job.

Args:
summary_record (SummaryRecord): SummaryRecord is used to record
the summary value, and summary_record is an instance of SummaryRecord,
see mindspore.train.summary.SummaryRecord.
raise_exception (bool): Whether to raise exception when error occurs in
TrainLineage. If True, raise exception. If False, catch exception
and continue. Default: False.

Raises:
MindInsightException: If validating parameter fails.
LineageLogError: If recording lineage information fails.

Examples:
>>> from mindinsight.lineagemgr import TrainLineage
>>> from mindspore.train.callback import ModelCheckpoint, SummaryStep
>>> from mindspore.train.summary import SummaryRecord
>>> model = Model(train_network)
>>> model_ckpt = ModelCheckpoint(directory='/dir/to/save/model/')
>>> summary_writer = SummaryRecord(log_dir='./')
>>> summary_callback = SummaryStep(summary_writer, flush_step=2)
>>> lineagemgr = TrainLineage(summary_record=summary_writer)
>>> model.train(epoch_num, dataset, callbacks=[model_ckpt, summary_callback, lineagemgr])
"""
def __init__(self, summary_record, raise_exception=False):
super(TrainLineage, self).__init__()
try:
validate_raise_exception(raise_exception)
self.raise_exception = raise_exception

validate_summary_record(summary_record)
self.summary_record = summary_record

summary_log_path = summary_record.full_file_name
validate_file_path(summary_log_path)
self.lineage_log_path = summary_log_path + '_lineage'

self.initial_learning_rate = None
except MindInsightException as err:
log.error(err)
if raise_exception:
raise

@try_except(log)
def begin(self, run_context):
"""
Initialize the training progress when the training job begins.

Args:
run_context (RunContext): It contains all lineage information,
see mindspore.train.callback.RunContext.

Raises:
MindInsightException: If validating parameter fails.
"""
log.info('Initialize training lineage collection...')

if not isinstance(run_context, RunContext):
error_msg = f'Invalid TrainLineage run_context.'
log.error(error_msg)
raise LineageParamRunContextError(error_msg)

run_context_args = run_context.original_args()
if not self.initial_learning_rate:
optimizer = run_context_args.get('optimizer')
if optimizer and not isinstance(optimizer, Optimizer):
log.error("The parameter optimizer is invalid. It should be an instance of "
"mindspore.nn.optim.optimizer.Optimizer.")
raise MindInsightException(error=LineageErrors.PARAM_OPTIMIZER_ERROR,
message=LineageErrorMsg.PARAM_OPTIMIZER_ERROR.value)
if optimizer:
log.info('Obtaining initial learning rate...')
self.initial_learning_rate = AnalyzeObject.analyze_optimizer(optimizer)
log.debug('initial_learning_rate: %s', self.initial_learning_rate)
else:
network = run_context_args.get('train_network')
validate_network(network)
optimizer = AnalyzeObject.get_optimizer_by_network(network)
self.initial_learning_rate = AnalyzeObject.analyze_optimizer(optimizer)
log.debug('initial_learning_rate: %s', self.initial_learning_rate)

# get train dataset graph
train_dataset = run_context_args.get('train_dataset')
dataset_graph_dict = ds.serialize(train_dataset)
dataset_graph_json_str = json.dumps(dataset_graph_dict, indent=2)
dataset_graph_dict = json.loads(dataset_graph_json_str)
log.info('Logging dataset graph...')
try:
lineage_summary = LineageSummary(self.lineage_log_path)
lineage_summary.record_dataset_graph(dataset_graph=dataset_graph_dict)
except Exception as error:
error_msg = f'Dataset graph log error in TrainLineage begin: {error}'
log.error(error_msg)
raise LineageLogError(error_msg)
log.info('Dataset graph logged successfully.')

@try_except(log)
def end(self, run_context):
"""
Collect lineage information when the training job ends.

Args:
run_context (RunContext): It contains all lineage information,
see mindspore.train.callback.RunContext.

Raises:
LineageLogError: If recording lineage information fails.
"""
log.info('Start to collect training lineage...')
if not isinstance(run_context, RunContext):
error_msg = f'Invalid TrainLineage run_context.'
log.error(error_msg)
raise LineageParamRunContextError(error_msg)

run_context_args = run_context.original_args()
validate_train_run_context(RunContextArgs, run_context_args)

train_lineage = dict()
train_lineage = AnalyzeObject.get_network_args(
run_context_args, train_lineage
)
train_dataset = run_context_args.get('train_dataset')
callbacks = run_context_args.get('list_callback')
list_callback = getattr(callbacks, '_callbacks', [])

log.info('Obtaining model files...')
ckpt_file_path, _ = AnalyzeObject.get_file_path(list_callback)

train_lineage[Metadata.learning_rate] = self.initial_learning_rate
train_lineage[Metadata.epoch] = run_context_args.get('epoch_num')
train_lineage[Metadata.step_num] = run_context_args.get('cur_step_num')
train_lineage[Metadata.parallel_mode] = run_context_args.get('parallel_mode')
train_lineage[Metadata.device_num] = run_context_args.get('device_number')
train_lineage[Metadata.batch_size] = run_context_args.get('batch_num')
model_path_dict = {
'ckpt': ckpt_file_path
}
train_lineage[Metadata.model_path] = json.dumps(model_path_dict)

log.info('Calculating model size...')
train_lineage[Metadata.model_size] = AnalyzeObject.get_model_size(
ckpt_file_path
)
log.debug('model_size: %s', train_lineage[Metadata.model_size])

log.info('Analyzing dataset object...')
train_lineage = AnalyzeObject.analyze_dataset(train_dataset, train_lineage, 'train')

log.info('Logging lineage information...')
try:
lineage_summary = LineageSummary(self.lineage_log_path)
lineage_summary.record_train_lineage(train_lineage)
except IOError as error:
error_msg = f'End error in TrainLineage: {error}'
log.error(error_msg)
raise LineageLogError(error_msg)
except Exception as error:
error_msg = f'End error in TrainLineage: {error}'
log.error(error_msg)
log.error('Fail to log the lineage of the training job.')
raise LineageLogError(error_msg)
log.info('The lineage of the training job has logged successfully.')


class EvalLineage(Callback):
"""
Collect lineage of an evaluation job.

Args:
summary_record (SummaryRecord): SummaryRecord is used to record
the summary value, and summary_record is an instance of SummaryRecord,
see mindspore.train.summary.SummaryRecord.
raise_exception (bool): Whether to raise exception when error occurs in
EvalLineage. If True, raise exception. If False, catch exception
and continue. Default: False.

Raises:
MindInsightException: If validating parameter fails.
LineageLogError: If recording lineage information fails.

Examples:
>>> from mindinsight.lineagemgr import EvalLineage
>>> from mindspore.train.callback import ModelCheckpoint, SummaryStep
>>> from mindspore.train.summary import SummaryRecord
>>> model = Model(train_network)
>>> model_ckpt = ModelCheckpoint(directory='/dir/to/save/model/')
>>> summary_writer = SummaryRecord(log_dir='./')
>>> summary_callback = SummaryStep(summary_writer, flush_step=2)
>>> lineagemgr = EvalLineage(summary_record=summary_writer)
>>> model.eval(epoch_num, dataset, callbacks=[model_ckpt, summary_callback, lineagemgr])
"""
def __init__(self, summary_record, raise_exception=False):
super(EvalLineage, self).__init__()
try:
validate_raise_exception(raise_exception)
self.raise_exception = raise_exception

validate_summary_record(summary_record)
self.summary_record = summary_record

summary_log_path = summary_record.full_file_name
validate_file_path(summary_log_path)
self.lineage_log_path = summary_log_path + '_lineage'
except MindInsightException as err:
log.error(err)
if raise_exception:
raise

@try_except(log)
def end(self, run_context):
"""
Collect lineage information when the training job ends.

Args:
run_context (RunContext): It contains all lineage information,
see mindspore.train.callback.RunContext.

Raises:
MindInsightException: If validating parameter fails.
LineageLogError: If recording lineage information fails.
"""
if not isinstance(run_context, RunContext):
error_msg = f'Invalid EvalLineage run_context.'
log.error(error_msg)
raise LineageParamRunContextError(error_msg)

run_context_args = run_context.original_args()
validate_eval_run_context(EvalParameter, run_context_args)

valid_dataset = run_context_args.get('valid_dataset')

eval_lineage = dict()
metrics = run_context_args.get('metrics')
eval_lineage[Metadata.metrics] = json.dumps(metrics)
eval_lineage[Metadata.step_num] = run_context_args.get('cur_step_num')

log.info('Analyzing dataset object...')
eval_lineage = AnalyzeObject.analyze_dataset(valid_dataset, eval_lineage, 'valid')

log.info('Logging evaluation job lineage...')
try:
lineage_summary = LineageSummary(self.lineage_log_path)
lineage_summary.record_evaluation_lineage(eval_lineage)
except IOError as error:
error_msg = f'End error in EvalLineage: {error}'
log.error(error_msg)
log.error('Fail to log the lineage of the evaluation job.')
raise LineageLogError(error_msg)
except Exception as error:
error_msg = f'End error in EvalLineage: {error}'
log.error(error_msg)
log.error('Fail to log the lineage of the evaluation job.')
raise LineageLogError(error_msg)
log.info('The lineage of the evaluation job has logged successfully.')


class AnalyzeObject:
"""Analyze class object in MindSpore."""

@staticmethod
def get_optimizer_by_network(network):
"""
Get optimizer by analyzing network.

Args:
network (Cell): See mindspore.nn.Cell.

Returns:
Optimizer, an Optimizer object.
"""
optimizer = None
net_args = vars(network) if network else {}
net_cell = net_args.get('_cells') if net_args else {}
for _, value in net_cell.items():
if isinstance(value, Optimizer):
optimizer = value
break
return optimizer

@staticmethod
def get_loss_fn_by_network(network):
"""
Get loss function by analyzing network.

Args:
network (Cell): See mindspore.nn.Cell.

Returns:
Loss_fn, a Cell object.
"""
loss_fn = None
inner_cell_list = []
net_args = vars(network) if network else {}
net_cell = net_args.get('_cells') if net_args else {}
for _, value in net_cell.items():
if isinstance(value, Cell) and \
not isinstance(value, Optimizer):
inner_cell_list.append(value)

while inner_cell_list:
inner_net_args = vars(inner_cell_list[0])
inner_net_cell = inner_net_args.get('_cells')

for value in inner_net_cell.values():
if isinstance(value, _Loss):
loss_fn = value
break
if isinstance(value, Cell):
inner_cell_list.append(value)
if loss_fn:
break

inner_cell_list.pop(0)

return loss_fn

@staticmethod
def get_backbone_network(network):
"""
Get the name of backbone network.

Args:
network (Cell): The train network.

Returns:
str, the name of the backbone network.
"""
with_loss_cell = False
backbone = None
net_args = vars(network) if network else {}
net_cell = net_args.get('_cells') if net_args else {}

for _, value in net_cell.items():
if isinstance(value, WithLossCell):
backbone = getattr(value, '_backbone')
with_loss_cell = True
break

if with_loss_cell:
backbone_name = type(backbone).__name__ \
if backbone else None
elif isinstance(network, TrainOneStepWithLossScaleCell):
backbone = getattr(network, 'network')
backbone_name = type(backbone).__name__ \
if backbone else None
else:
backbone_name = type(network).__name__ \
if network else None
return backbone_name

@staticmethod
def analyze_optimizer(optimizer):
"""
Analyze Optimizer, a Cell object of MindSpore.

In this way, we can obtain the following attributes:
learning_rate (float),
weight_decay (float),
momentum (float),
weights (float).

Args:
optimizer (Optimizer): See mindspore.nn.optim.Optimizer.

Returns:
float, the learning rate that the optimizer adopted.
"""
learning_rate = None
if isinstance(optimizer, Optimizer):
learning_rate = getattr(optimizer, 'learning_rate', None)

if learning_rate:
learning_rate = learning_rate.default_input

# Get the real learning rate value
if isinstance(learning_rate, Tensor):
learning_rate = learning_rate.asnumpy()
if learning_rate.ndim == 0:
learning_rate = np.atleast_1d(learning_rate)
learning_rate = list(learning_rate)
elif isinstance(learning_rate, float):
learning_rate = [learning_rate]

return learning_rate[0] if learning_rate else None

@staticmethod
def analyze_dataset(dataset, lineage_dict, dataset_type):
"""
Analyze Dataset, a Dataset object of MindSpore.

In this way, we can obtain the following attributes:
dataset_path (str),
train_dataset_size (int),
valid_dataset_size (int),
batch_size (int)

Args:
dataset (Dataset): See mindspore.dataengine.datasets.Dataset.
lineage_dict (dict): A dict contains lineage metadata.
dataset_type (str): Dataset type, train or valid.

Returns:
dict, the lineage metadata.
"""
dataset_batch_size = dataset.get_dataset_size()
if dataset_batch_size is not None:
validate_int_params(dataset_batch_size, 'dataset_batch_size')
log.debug('dataset_batch_size: %d', dataset_batch_size)
dataset_path = AnalyzeObject.get_dataset_path_wrapped(dataset)
if dataset_path:
dataset_path = '/'.join(dataset_path.split('/')[:-1])

step_num = lineage_dict.get('step_num')
validate_int_params(step_num, 'step_num')
log.debug('step_num: %d', step_num)

if dataset_type == 'train':
lineage_dict[Metadata.train_dataset_path] = dataset_path
epoch = lineage_dict.get('epoch')
train_dataset_size = dataset_batch_size * (step_num / epoch)
lineage_dict[Metadata.train_dataset_size] = int(train_dataset_size)
elif dataset_type == 'valid':
lineage_dict[Metadata.valid_dataset_path] = dataset_path
lineage_dict[Metadata.valid_dataset_size] = dataset_batch_size * step_num

return lineage_dict

def get_dataset_path(self, output_dataset):
"""
Get dataset path of MindDataset object.

Args:
output_dataset (Union[MindDataset, Dataset]): See
mindspore.dataengine.datasets.Dataset.

Returns:
str, dataset path.
"""
if isinstance(output_dataset, MindDataset):
return output_dataset.dataset_file
return self.get_dataset_path(output_dataset.input[0])

@staticmethod
def get_dataset_path_wrapped(dataset):
"""
A wrapper for obtaining dataset path.

Args:
dataset (Union[MindDataset, Dataset]): See
mindspore.dataengine.datasets.Dataset.

Returns:
str, dataset path.
"""
dataset_path = None
if isinstance(dataset, Dataset):
try:
dataset_path = AnalyzeObject().get_dataset_path(dataset)
except IndexError:
dataset_path = None
validate_file_path(dataset_path, allow_empty=True)
return dataset_path

@staticmethod
def get_file_path(list_callback):
"""
Get ckpt_file_name and summary_log_path from MindSpore callback list.

Args:
list_callback (list[Callback]): The MindSpore training Callback list.

Returns:
tuple, contains ckpt_file_name and summary_log_path.
"""
ckpt_file_path = None
summary_log_path = None
for callback in list_callback:
if isinstance(callback, ModelCheckpoint):
ckpt_file_path = callback.latest_ckpt_file_name
if isinstance(callback, SummaryStep):
summary_log_path = callback.summary_file_name

if ckpt_file_path:
validate_file_path(ckpt_file_path)
ckpt_file_path = os.path.realpath(ckpt_file_path)

if summary_log_path:
validate_file_path(summary_log_path)
summary_log_path = os.path.realpath(summary_log_path)

return ckpt_file_path, summary_log_path

@staticmethod
def get_file_size(file_path):
"""
Get the file size.

Args:
file_path (str): The file path.

Returns:
int, the file size.
"""
try:
return os.path.getsize(file_path)
except (OSError, IOError) as error:
error_msg = f"Error when get model file size: {error}"
log.error(error_msg)
raise LineageGetModelFileError(error_msg)

@staticmethod
def get_model_size(ckpt_file_path):
"""
Get model the total size of the model file and the checkpoint file.

Args:
ckpt_file_path (str): The checkpoint file path.

Returns:
int, the total file size.
"""
if ckpt_file_path:
ckpt_file_path = os.path.realpath(ckpt_file_path)
ckpt_file_size = AnalyzeObject.get_file_size(ckpt_file_path)
else:
ckpt_file_size = 0

return ckpt_file_size

@staticmethod
def get_network_args(run_context_args, train_lineage):
"""
Get the parameters related to the network,
such as optimizer, loss function.

Args:
run_context_args (dict): It contains all information of the training job.
train_lineage (dict): A dict contains lineage metadata.

Returns:
dict, the lineage metadata.
"""
network = run_context_args.get('train_network')
validate_network(network)
optimizer = run_context_args.get('optimizer')
if not optimizer:
optimizer = AnalyzeObject.get_optimizer_by_network(network)
loss_fn = run_context_args.get('loss_fn')
if not loss_fn:
loss_fn = AnalyzeObject.get_loss_fn_by_network(network)
loss = None
else:
loss = run_context_args.get('net_outputs')
if loss:
log.info('Calculating loss...')
loss_numpy = loss.asnumpy()
loss = float(np.atleast_1d(loss_numpy)[0])
log.debug('loss: %s', loss)
train_lineage[Metadata.loss] = loss
else:
train_lineage[Metadata.loss] = None
# Analyze classname of optimizer, loss function and training network.
train_lineage[Metadata.optimizer] = type(optimizer).__name__ \
if optimizer else None
train_lineage[Metadata.train_network] = AnalyzeObject.get_backbone_network(network)
train_lineage[Metadata.loss_function] = type(loss_fn).__name__ \
if loss_fn else None

return train_lineage

+ 14
- 0
mindinsight/lineagemgr/common/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 14
- 0
mindinsight/lineagemgr/common/exceptions/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 207
- 0
mindinsight/lineagemgr/common/exceptions/error_code.py View File

@@ -0,0 +1,207 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Lineage error code and messages."""
from enum import Enum, unique
from mindinsight.utils.constant import LineageMgrErrors as LineageErrorCodes


_PARAM_ERROR_MASK = 0b00001 << 7
_MINDSPORE_COLLECTOR_ERROR = 0b00011 << 7
_MODEL_LINEAGE_API_ERROR_MASK = 0b00100 << 7
_DATASET_COLLECTOR_ERROR_MASK = 0b00101 << 7
_DATASET_LINEAGE_ERROR_MASK = 0b00110 << 7
_SUMMARY_ANALYZE_ERROR_MASK = 0b00111 << 7
_QUERIER_ERROR_MASK = 0b01000 << 7


@unique
class LineageErrors(LineageErrorCodes):
"""Lineage error codes."""
PARAM_TYPE_ERROR = 0 | _PARAM_ERROR_MASK
PARAM_VALUE_ERROR = 1 | _PARAM_ERROR_MASK
PARAM_MISSING_ERROR = 2 | _PARAM_ERROR_MASK
PARAM_SUMMARY_RECORD_ERROR = 3 | _PARAM_ERROR_MASK
PARAM_RAISE_EXCEPTION_ERROR = 4 | _PARAM_ERROR_MASK

# MindSpore Collector error codes.
PARAM_RUN_CONTEXT_ERROR = 0 | _MINDSPORE_COLLECTOR_ERROR
PARAM_OPTIMIZER_ERROR = 1 | _MINDSPORE_COLLECTOR_ERROR
PARAM_LOSS_FN_ERROR = 2 | _MINDSPORE_COLLECTOR_ERROR
PARAM_TRAIN_NETWORK_ERROR = 3 | _MINDSPORE_COLLECTOR_ERROR
PARAM_DATASET_ERROR = 4 | _MINDSPORE_COLLECTOR_ERROR
PARAM_EPOCH_NUM_ERROR = 5 | _MINDSPORE_COLLECTOR_ERROR
PARAM_BATCH_NUM_ERROR = 6 | _MINDSPORE_COLLECTOR_ERROR
PARAM_TRAIN_PARALLEL_ERROR = 7 | _MINDSPORE_COLLECTOR_ERROR
PARAM_DEVICE_NUMBER_ERROR = 8 | _MINDSPORE_COLLECTOR_ERROR
PARAM_FILE_PATH_ERROR = 9 | _MINDSPORE_COLLECTOR_ERROR
PARAM_DATASET_SIZE_ERROR = 10 | _MINDSPORE_COLLECTOR_ERROR
PARAM_LEARNING_RATE_ERROR = 11 | _MINDSPORE_COLLECTOR_ERROR
PARAM_EVAL_METRICS_ERROR = 12 | _MINDSPORE_COLLECTOR_ERROR
PARAM_BATCH_SIZE_ERROR = 13 | _MINDSPORE_COLLECTOR_ERROR
PARAM_NET_OUTPUTS_ERROR = 14 | _MINDSPORE_COLLECTOR_ERROR
PARAM_CALLBACK_LIST_ERROR = 15 | _MINDSPORE_COLLECTOR_ERROR
LINEAGE_GET_MODEL_FILE_ERROR = 16 | _MINDSPORE_COLLECTOR_ERROR
LOG_LINEAGE_INFO_ERROR = 17 | _MINDSPORE_COLLECTOR_ERROR
PARAM_STEP_NUM_ERROR = 18 | _MINDSPORE_COLLECTOR_ERROR

# Model lineage error codes.
LINEAGE_PARAM_OPERATION_ERROR = 0 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_METRIC_ERROR = 1 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_LOSS_FUNCTION_ERROR = 4 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_TRAIN_DATASET_PATH_ERROR = 5 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_TRAIN_DATASET_COUNT_ERROR = 6 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_TEST_DATASET_PATH_ERROR = 7 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_TEST_DATASET_COUNT_ERROR = 8 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_NETWORK_ERROR = 9 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_OPTIMIZER_ERROR = 10 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_LEARNING_RATE_ERROR = 11 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_EPOCH_ERROR = 12 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_BATCH_SIZE_ERROR = 13 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_NOT_SUPPORT_ERROR = 14 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_LOSS_ERROR = 15 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_MODEL_SIZE_ERROR = 16 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_SUMMARY_DIR_ERROR = 17 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_SORTED_NAME_ERROR = 18 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_SORTED_TYPE_ERROR = 19 | _MODEL_LINEAGE_API_ERROR_MASK

LINEAGE_DIR_NOT_EXIST_ERROR = 20 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_SUMMARY_DATA_ERROR = 21 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_FILE_NOT_FOUND_ERROR = 22 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_SUMMARY_PATH_ERROR = 23 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_SEARCH_CONDITION_PARAM_ERROR = 24 | _MODEL_LINEAGE_API_ERROR_MASK
LINEAGE_PARAM_LINEAGE_TYPE_ERROR = 25 | _MODEL_LINEAGE_API_ERROR_MASK


SUMMARY_ANALYZE_ERROR = 0 | _SUMMARY_ANALYZE_ERROR_MASK
SUMMARY_VERIFICATION_ERROR = 1 | _SUMMARY_ANALYZE_ERROR_MASK

# Querier error codes.
EVENT_NOT_EXIST_ERROR = 0 | _QUERIER_ERROR_MASK
QUERIER_PARAM_ERROR = 1 | _QUERIER_ERROR_MASK
SUMMARY_PARSE_FAIL_ERROR = 2 | _QUERIER_ERROR_MASK
EVENT_FIELD_NOT_EXIST_ERROR = 4 | _QUERIER_ERROR_MASK


@unique
class LineageErrorMsg(Enum):
"""Lineage error messages."""
PARAM_TYPE_ERROR = "TypeError. {}"
PARAM_VALUE_ERROR = "ValueError. {}"
PARAM_MISSING_ERROR = "MissingError. {}"
PARAM_LIMIT_ERROR = "Invalid input limit. 0 < limit <= 100"
PARAM_OFFSET_ERROR = "Invalid input offset. 0 <= offset <= 100000"
PARAM_SUMMARY_RECORD_ERROR = "Invalid value for summary_record. It should be an instance of " \
"mindspore.train.summary.SummaryRecord"
PARAM_RAISE_EXCEPTION_ERROR = "Invalid value for raise_exception. It should be True or False."
# Lineage error messages.
LINEAGE_PARAM_SUMMARY_PATH_ERROR = "The parameter summary path error: {}"
LINEAGE_SUMMARY_DATA_ERROR = "Query summary data error: {}"
LINEAGE_FILE_NOT_FOUND_ERROR = "File not found error: {}"
LINEAGE_DIR_NOT_EXIST_ERROR = "Dir not exist error: {}"
LINEAGE_SEARCH_CONDITION_PARAM_ERROR = "Search_condition param error: {}"

# MindSpore Collector error messages.
PARAM_RUN_CONTEXT_ERROR = "The parameter run_context is invalid. It should be an instance of " \
"mindspore.train.callback.RunContext. {}"

PARAM_OPTIMIZER_ERROR = "The parameter optimizer is invalid. It should be an instance of " \
"mindspore.nn.optim.optimizer.Optimizer."

PARAM_LOSS_FN_ERROR = "The parameter loss_fn is invalid. It should be a Function."

PARAM_NET_OUTPUTS_ERROR = "The parameter net_outputs is invalid. It should be a Tensor."

PARAM_TRAIN_NETWORK_ERROR = "The parameter train_network is invalid. It should be an instance of " \
"mindspore.nn.cell.Cell."

PARAM_EPOCH_NUM_ERROR = "The parameter epoch is invalid. It should be a positive integer."

PARAM_STEP_NUM_ERROR = "The parameter step_num is invalid. It should be a positive integer."

PARAM_BATCH_NUM_ERROR = "The parameter batch_num is invalid. It should be a non-negative integer."

PARAM_TRAIN_PARALLEL_ERROR = "The parameter parallel_mode is invalid. It should be an integer" \
"between 0 and 4."

PARAM_DEVICE_NUMBER_ERROR = "The parameter device_number is invalid. It should be a positive integer."

PARAM_LEARNING_RATE_ERROR = "The parameter learning_rate is invalid. It should be a float number or " \
"an instance of mindspore.common.tensor.Tensor."

PARAM_EVAL_METRICS_ERROR = "The parameter metrics is invalid. It should be a dictionary."

PARAM_BATCH_SIZE_ERROR = "The parameter batch_size is invalid. It should be a non-negative integer."

PARAM_CALLBACK_LIST_ERROR = "The parameter list_callback is invalid. It should be an instance of " \
"mindspore.train.callback._ListCallback."

LINEAGE_GET_MODEL_FILE_ERROR = "Error when get model file size. {}"

LINEAGE_METRIC_ERROR = "The parameter {} is invalid. " \
"It should be a dict and the value should be a float or a integer"

LINEAGE_COMPARE_OPERATION_ERROR = "The schema error and compare operation should be" \
" 'eq', 'lt', 'gt', 'ge', 'le', 'in'."

LINEAGE_PARAM_SUMMARY_DIR_ERROR = "The parameter summary_dir is invalid. It should be a dict and the value " \
"should be a string"

LINEAGE_TRAIN_DATASET_PATH_ERROR = "The parameter train_dataset_path is invalid." \
" It should be a dict and the value should be a string"

LINEAGE_TRAIN_DATASET_COUNT_ERROR = "The parameter train_dataset_count is invalid. It should be a dict " \
"and the value should be a integer between 0 and pow(2, 63) -1"

LINEAGE_TEST_DATASET_PATH_ERROR = "The parameter test_dataset_path is invalid. " \
"It should be a dict and the value should be a string"

LINEAGE_TEST_DATASET_COUNT_ERROR = "The parameter test_dataset_count is invalid. It should be a dict " \
"and the value should be a integer between 0 and pow(2, 63) -1"

LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string"

LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. It should be a dict and the value should be a string"

LINEAGE_LOSS_FUNCTION_ERROR = "The parameter loss_function is invalid. " \
"It should be a dict and the value should be a string"

LINEAGE_LOSS_ERROR = "The parameter loss is invalid. " \
"It should be a float."

LINEAGE_MODEL_SIZE_ERROR = "The parameter model_size is invalid. " \
"It should be an integer between 0 and pow(2, 63) -1."

LINEAGE_LEARNING_RATE_ERROR = "The parameter learning_rate is invalid. " \
"It should be a dict and the value should be a float or a integer"

LINEAGE_PARAM_SORTED_NAME_ERROR = "The parameter sorted_name is invalid. " \
"It should be a string."

LINEAGE_PARAM_SORTED_TYPE_ERROR = "The parameter sorted_type is invalid. " \
"It should be a string."

LINEAGE_PARAM_LINEAGE_TYPE_ERROR = "The parameter lineage_type is invalid. " \
"It should be None, 'dataset' or 'model'."

SUMMARY_ANALYZE_ERROR = "Failed to analyze summary log. {}"
SUMMARY_VERIFICATION_ERROR = "Verification failed in summary analysis. {}"

# Querier error codes.
EVENT_NOT_EXIST_ERROR = "Train and evaluation event not exist in summary log."
QUERIER_PARAM_ERROR = "Querier param <{}> invalid. {}"
SUMMARY_PARSE_FAIL_ERROR = "All summary logs parsing failed."
EVENT_FIELD_NOT_EXIST_ERROR = 'Event field <{}> not exist.'

LOG_LINEAGE_INFO_ERROR = "Fail to write lineage information into log file. {}"

+ 191
- 0
mindinsight/lineagemgr/common/exceptions/exceptions.py View File

@@ -0,0 +1,191 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Definition of error code and relative messages in lineage module."""
from mindinsight.utils.exceptions import MindInsightException
from mindinsight.lineagemgr.common.exceptions.error_code import LineageErrors, LineageErrorMsg


class LineageParamTypeError(MindInsightException):
"""The parameter type error in lineage module."""

def __init__(self, msg):
super(LineageParamTypeError, self).__init__(
error=LineageErrors.PARAM_TYPE_ERROR,
message=LineageErrorMsg.PARAM_TYPE_ERROR.value.format(msg)
)


class LineageParamValueError(MindInsightException):
"""The parameter value error in lineage module."""

def __init__(self, msg):
super(LineageParamValueError, self).__init__(
error=LineageErrors.PARAM_VALUE_ERROR,
message=LineageErrorMsg.PARAM_VALUE_ERROR.value.format(msg)
)


class LineageParamMissingError(MindInsightException):
"""The parameter missing error in lineage module."""

def __init__(self, msg):
super(LineageParamMissingError, self).__init__(
error=LineageErrors.PARAM_MISSING_ERROR,
message=LineageErrorMsg.PARAM_MISSING_ERROR.value.format(msg)
)


class LineageParamRunContextError(MindInsightException):
"""The input parameter run_context error in lineage module."""

def __init__(self, msg):
super(LineageParamRunContextError, self).__init__(
error=LineageErrors.PARAM_RUN_CONTEXT_ERROR,
message=LineageErrorMsg.PARAM_RUN_CONTEXT_ERROR.value.format(msg)
)


class LineageGetModelFileError(MindInsightException):
"""The get model file error in lineage module."""

def __init__(self, msg):
super(LineageGetModelFileError, self).__init__(
error=LineageErrors.LINEAGE_GET_MODEL_FILE_ERROR,
message=LineageErrorMsg.LINEAGE_GET_MODEL_FILE_ERROR.value.format(msg)
)


class LineageSearchModelParamError(MindInsightException):
"""The lineage search model param error."""
def __init__(self, msg):
super(LineageSearchModelParamError, self).__init__(
error=LineageErrors.LINEAGE_PARAM_NOT_SUPPORT_ERROR,
message=LineageErrorMsg.LINEAGE_PARAM_NOT_SUPPORT_ERROR.value.format(msg)
)


class LineageSummaryAnalyzeException(MindInsightException):
"""The summary analyze error in lineage module."""

def __init__(self, msg=None):
if msg is None:
msg = ''
super(LineageSummaryAnalyzeException, self).__init__(
error=LineageErrors.SUMMARY_ANALYZE_ERROR,
message=LineageErrorMsg.SUMMARY_ANALYZE_ERROR.value.format(msg)
)


class LineageVerificationException(MindInsightException):
"""The summary verification error in lineage module."""
def __init__(self, msg):
super(LineageVerificationException, self).__init__(
error=LineageErrors.SUMMARY_VERIFICATION_ERROR,
message=LineageErrorMsg.SUMMARY_VERIFICATION_ERROR.value.format(msg)
)


class LineageLogError(MindInsightException):
"""The lineage collector error."""
def __init__(self, msg):
super(LineageLogError, self).__init__(
error=LineageErrors.LOG_LINEAGE_INFO_ERROR,
message=LineageErrorMsg.LOG_LINEAGE_INFO_ERROR.value.format(msg)
)


class LineageEventNotExistException(MindInsightException):
"""The querier error in lineage module."""

def __init__(self):
super(LineageEventNotExistException, self).__init__(
error=LineageErrors.EVENT_NOT_EXIST_ERROR,
message=LineageErrorMsg.EVENT_NOT_EXIST_ERROR.value
)


class LineageQuerierParamException(MindInsightException):
"""The querier error in lineage module."""

def __init__(self, *msg):
super(LineageQuerierParamException, self).__init__(
error=LineageErrors.QUERIER_PARAM_ERROR,
message=LineageErrorMsg.QUERIER_PARAM_ERROR.value.format(*msg)
)


class LineageSummaryParseException(MindInsightException):
"""The querier error in lineage module."""

def __init__(self):
super(LineageSummaryParseException, self).__init__(
error=LineageErrors.SUMMARY_PARSE_FAIL_ERROR,
message=LineageErrorMsg.SUMMARY_PARSE_FAIL_ERROR.value
)


class LineageEventFieldNotExistException(MindInsightException):
"""The querier error in lineage module."""

def __init__(self, msg):
super(LineageEventFieldNotExistException, self).__init__(
error=LineageErrors.EVENT_FIELD_NOT_EXIST_ERROR,
message=LineageErrorMsg.EVENT_FIELD_NOT_EXIST_ERROR.value.format(msg)
)


class LineageParamSummaryPathError(MindInsightException):
"""The lineage parameter summary path error."""
def __init__(self, msg):
super(LineageParamSummaryPathError, self).__init__(
error=LineageErrors.LINEAGE_PARAM_SUMMARY_PATH_ERROR,
message=LineageErrorMsg.LINEAGE_PARAM_SUMMARY_PATH_ERROR.value.format(msg)
)


class LineageQuerySummaryDataError(MindInsightException):
"""Query summary data error in lineage module."""
def __init__(self, msg):
super(LineageQuerySummaryDataError, self).__init__(
error=LineageErrors.LINEAGE_SUMMARY_DATA_ERROR,
message=LineageErrorMsg.LINEAGE_SUMMARY_DATA_ERROR.value.format(msg)
)


class LineageFileNotFoundError(MindInsightException):
"""Summary file not found in lineage module."""
def __init__(self, msg):
super(LineageFileNotFoundError, self).__init__(
error=LineageErrors.LINEAGE_FILE_NOT_FOUND_ERROR,
message=LineageErrorMsg.LINEAGE_FILE_NOT_FOUND_ERROR.value.format(msg)
)


class LineageDirNotExistError(MindInsightException):
"""Directory not exist in lineage module."""
def __init__(self, msg):
super(LineageDirNotExistError, self).__init__(
error=LineageErrors.LINEAGE_DIR_NOT_EXIST_ERROR,
message=LineageErrorMsg.LINEAGE_DIR_NOT_EXIST_ERROR.value.format(msg)
)


class LineageSearchConditionParamError(MindInsightException):
"""Search condition param is invalid in lineage module."""
def __init__(self, msg):
super(LineageSearchConditionParamError, self).__init__(
error=LineageErrors.LINEAGE_SEARCH_CONDITION_PARAM_ERROR,
message=LineageErrorMsg.LINEAGE_SEARCH_CONDITION_PARAM_ERROR.value.format(msg)
)

+ 20
- 0
mindinsight/lineagemgr/common/log.py View File

@@ -0,0 +1,20 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Import mindinsight unified log module."""
from mindinsight.utils.log import setup_logger

LOG_NAME = "lineage"
LOG_MODULE = "lineage"
logger = setup_logger(sub_module=LOG_MODULE, log_name=LOG_NAME)

+ 149
- 0
mindinsight/lineagemgr/common/path_parser.py View File

@@ -0,0 +1,149 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This file provides path resolution."""
import os

from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher


class SummaryPathParser:
"""
Summary path parser.

This class is a utility class, users can use it to parse summary dir,
parse summary log path, get the latest lineage summary log, etc.
"""
LINEAGE_SUMMARY_SUFFIX = '_lineage'
_LINEAGE_SUMMARY_SUFFIX_LEN = len(LINEAGE_SUMMARY_SUFFIX)

@staticmethod
def get_summary_dirs(summary_base_dir):
"""
Get summary dirs according to summary base dir.

Args:
summary_base_dir (str): Summary base dir.

Returns:
list[str], all summary dirs in summary base dir. The summary dir is
absolute path.
"""
summary_watcher = SummaryWatcher()
relative_dirs = summary_watcher.list_summary_directories(
summary_base_dir=summary_base_dir
)
summary_dirs = list(
map(
lambda item: os.path.realpath(
os.path.join(summary_base_dir, item.get('relative_path'))
),
relative_dirs
)
)
return summary_dirs

@staticmethod
def get_latest_lineage_summary(summary_dir):
"""
Get latest lineage summary log path according to summary dir.

Args:
summary_dir (str): Summary dir.

Returns:
Union[str, None], if the lineage summary log exist, return the path,
else return None. The lineage summary log path is absolute path.
"""
summary_watcher = SummaryWatcher()
summaries = summary_watcher.list_summaries(summary_base_dir=summary_dir)
latest_file_name = SummaryPathParser._get_latest_lineage_file(summaries)
return os.path.join(summary_dir, latest_file_name) \
if latest_file_name is not None else None

@staticmethod
def get_latest_lineage_summaries(summary_base_dir):
"""
Get all latest lineage summary logs in summary base dir.

Args:
summary_base_dir (str): Summary base dir.

Returns:
list[str], all latest lineage summary logs in summary base dir. The
lineage summary log is absolute path.
"""
summary_watcher = SummaryWatcher()
relative_dirs = summary_watcher.list_summary_directories(
summary_base_dir=summary_base_dir
)
latest_summaries = []
for item in relative_dirs:
relative_dir = item.get('relative_path')
summaries = summary_watcher.list_summaries(
summary_base_dir=summary_base_dir,
relative_path=relative_dir
)
latest_file_name = SummaryPathParser._get_latest_lineage_file(
summaries
)
if latest_file_name is None:
continue
latest_file = os.path.realpath(
os.path.join(
summary_base_dir,
relative_dir,
latest_file_name
)
)
latest_summaries.append(latest_file)
return latest_summaries

@staticmethod
def _get_latest_lineage_file(summaries):
"""
Get latest lineage summary file.

If there is a file with the suffix `LINEAGE_SUMMARY_SUFFIX`, check
whether there is a file with the same name that does not include the
suffix `LINEAGE_SUMMARY_SUFFIX`. When both exist, the file is considered
to be a lineage summary log.

Args:
summaries (list[dict]): All summary logs info in summary dir.

Returns:
str, the latest lineage summary file name.
"""
try:
latest_summary = max(
summaries,
key=lambda summary: summary.get('create_time')
)
except ValueError:
return None
max_create_time = latest_summary.get('create_time')
summary_file_names = []
for summary in summaries:
if summary.get('create_time') == max_create_time:
summary_file_names.append(summary.get('file_name'))

latest_lineage_name = None
for name in summary_file_names:
if not name.endswith(SummaryPathParser.LINEAGE_SUMMARY_SUFFIX):
continue
ms_name = name[:-SummaryPathParser._LINEAGE_SUMMARY_SUFFIX_LEN]
if ms_name in summary_file_names:
latest_lineage_name = name
return latest_lineage_name

+ 56
- 0
mindinsight/lineagemgr/common/utils.py View File

@@ -0,0 +1,56 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Lineage utils."""
from functools import wraps

from mindinsight.lineagemgr.common.exceptions.exceptions import LineageParamRunContextError, \
LineageGetModelFileError, LineageLogError
from mindinsight.utils.exceptions import MindInsightException


def enum_to_list(enum):
return [enum_ele.value for enum_ele in enum]


def try_except(logger):
"""
Catch or raise exceptions while collecting lineage.

Args:
logger (logger): The logger instance which logs the warning info.

Returns:
function, the decorator which we use to retry the decorated function.
"""
def try_except_decorate(func):
@wraps(func)
def wrapper(self, *args, **kwargs):
try:
func(self, *args, **kwargs)
except (AttributeError, MindInsightException,
LineageParamRunContextError, LineageLogError,
LineageGetModelFileError, IOError) as err:
logger.error(err)

try:
raise_except = self.raise_exception
except AttributeError:
raise_except = False

if raise_except is True:
raise

return wrapper
return try_except_decorate

+ 14
- 0
mindinsight/lineagemgr/common/validator/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 253
- 0
mindinsight/lineagemgr/common/validator/model_parameter.py View File

@@ -0,0 +1,253 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Define schema of model lineage input parameters."""
from marshmallow import Schema, fields, ValidationError, pre_load, validates
from marshmallow.validate import Range, OneOf

from mindinsight.lineagemgr.common.exceptions.error_code import LineageErrorMsg, \
LineageErrors
from mindinsight.lineagemgr.common.exceptions.exceptions import \
LineageParamTypeError, LineageParamValueError
from mindinsight.lineagemgr.common.log import logger
from mindinsight.lineagemgr.common.utils import enum_to_list
from mindinsight.lineagemgr.querier.querier import LineageType
from mindinsight.lineagemgr.querier.query_model import FIELD_MAPPING
from mindinsight.utils.exceptions import MindInsightException

try:
from mindspore.dataset.engine import Dataset
from mindspore.nn import Cell, Optimizer
from mindspore.common.tensor import Tensor
from mindspore.train.callback import _ListCallback
except (ImportError, ModuleNotFoundError):
logger.error('MindSpore Not Found!')


class RunContextArgs(Schema):
"""Define the parameter schema for RunContext."""
optimizer = fields.Function(allow_none=True)
loss_fn = fields.Function(allow_none=True)
net_outputs = fields.Function(allow_none=True)
train_network = fields.Function(allow_none=True)
train_dataset = fields.Function(allow_none=True)
epoch_num = fields.Int(allow_none=True, validate=Range(min=1))
batch_num = fields.Int(allow_none=True, validate=Range(min=0))
cur_step_num = fields.Int(allow_none=True, validate=Range(min=0))
parallel_mode = fields.Str(allow_none=True)
device_number = fields.Int(allow_none=True, validate=Range(min=1))
list_callback = fields.Function(allow_none=True)

@pre_load
def check_optimizer(self, data, **kwargs):
optimizer = data.get("optimizer")
if optimizer and not isinstance(optimizer, Optimizer):
raise ValidationError({'optimizer': [
"Parameter optimizer must be an instance of mindspore.nn.optim.Optimizer."
]})
return data

@pre_load
def check_train_network(self, data, **kwargs):
train_network = data.get("train_network")
if train_network and not isinstance(train_network, Cell):
raise ValidationError({'train_network': [
"Parameter train_network must be an instance of mindspore.nn.Cell."]})
return data

@pre_load
def check_train_dataset(self, data, **kwargs):
train_dataset = data.get("train_dataset")
if train_dataset and not isinstance(train_dataset, Dataset):
raise ValidationError({'train_dataset': [
"Parameter train_dataset must be an instance of "
"mindspore.dataengine.datasets.Dataset"]})
return data

@pre_load
def check_loss(self, data, **kwargs):
net_outputs = data.get("net_outputs")
if net_outputs and not isinstance(net_outputs, Tensor):
raise ValidationError({'net_outpus': [
"The parameter net_outputs is invalid. It should be a Tensor."
]})
return data

@pre_load
def check_list_callback(self, data, **kwargs):
list_callback = data.get("list_callback")
if list_callback and not isinstance(list_callback, _ListCallback):
raise ValidationError({'list_callback': [
"Parameter list_callback must be an instance of "
"mindspore.train.callback._ListCallback."
]})
return data


class EvalParameter(Schema):
"""Define the parameter schema for Evaluation job."""
valid_dataset = fields.Function(allow_none=True)
metrics = fields.Dict(allow_none=True)

@pre_load
def check_valid_dataset(self, data, **kwargs):
valid_dataset = data.get("valid_dataset")
if valid_dataset and not isinstance(valid_dataset, Dataset):
raise ValidationError({'valid_dataset': [
"Parameter valid_dataset must be an instance of "
"mindspore.dataengine.datasets.Dataset"]})
return data


class SearchModelConditionParameter(Schema):
"""Define the search model condition parameter schema."""
summary_dir = fields.Dict()
loss_function = fields.Dict()
train_dataset_path = fields.Dict()
train_dataset_count = fields.Dict()
test_dataset_path = fields.Dict()
test_dataset_count = fields.Dict()
network = fields.Dict()
optimizer = fields.Dict()
learning_rate = fields.Dict()
epoch = fields.Dict()
batch_size = fields.Dict()
loss = fields.Dict()
model_size = fields.Dict()
limit = fields.Int(validate=lambda n: 0 < n <= 100)
offset = fields.Int(validate=lambda n: 0 <= n <= 100000)
sorted_name = fields.Str()
sorted_type = fields.Str(allow_none=True)
lineage_type = fields.Str(
validate=OneOf(enum_to_list(LineageType)),
allow_none=True
)

@staticmethod
def check_dict_value_type(data, value_type):
"""Check dict value type and int scope."""
for key, value in data.items():
if key == "in":
if not isinstance(value, (list, tuple)):
raise ValidationError("In operation's value must be list or tuple.")
else:
if not isinstance(value, value_type):
raise ValidationError("Wrong value type.")
if value_type is int:
if value < 0 or value > pow(2, 63) - 1:
raise ValidationError("Int value should <= pow(2, 63) - 1.")
if isinstance(value, bool):
raise ValidationError("Wrong value type.")

@staticmethod
def check_param_value_type(data):
"""Check input param's value type."""
for key, value in data.items():
if key == "in":
if not isinstance(value, (list, tuple)):
raise ValidationError("In operation's value must be list or tuple.")
else:
if isinstance(value, bool) or \
(not isinstance(value, float) and not isinstance(value, int)):
raise ValidationError("Wrong value type.")

@validates("loss")
def check_loss(self, data):
"""Check loss."""
SearchModelConditionParameter.check_param_value_type(data)

@validates("learning_rate")
def check_learning_rate(self, data):
"""Check learning_rate."""
SearchModelConditionParameter.check_param_value_type(data)

@validates("loss_function")
def check_loss_function(self, data):
SearchModelConditionParameter.check_dict_value_type(data, str)

@validates("train_dataset_path")
def check_train_dataset_path(self, data):
SearchModelConditionParameter.check_dict_value_type(data, str)

@validates("train_dataset_count")
def check_train_dataset_count(self, data):
SearchModelConditionParameter.check_dict_value_type(data, int)

@validates("test_dataset_path")
def check_test_dataset_path(self, data):
SearchModelConditionParameter.check_dict_value_type(data, str)

@validates("test_dataset_count")
def check_test_dataset_count(self, data):
SearchModelConditionParameter.check_dict_value_type(data, int)

@validates("network")
def check_network(self, data):
SearchModelConditionParameter.check_dict_value_type(data, str)

@validates("optimizer")
def check_optimizer(self, data):
SearchModelConditionParameter.check_dict_value_type(data, str)

@validates("epoch")
def check_epoch(self, data):
SearchModelConditionParameter.check_dict_value_type(data, int)

@validates("batch_size")
def check_batch_size(self, data):
SearchModelConditionParameter.check_dict_value_type(data, int)

@validates("model_size")
def check_model_size(self, data):
SearchModelConditionParameter.check_dict_value_type(data, int)

@validates("summary_dir")
def check_summary_dir(self, data):
SearchModelConditionParameter.check_dict_value_type(data, str)

@pre_load
def check_comparision(self, data, **kwargs):
"""Check comparision for all parameters in schema."""
for attr, condition in data.items():
if attr in ["limit", "offset", "sorted_name", "sorted_type", "lineage_type"]:
continue

if not isinstance(attr, str):
raise LineageParamValueError('The search attribute not supported.')

if attr not in FIELD_MAPPING and not attr.startswith('metric_'):
raise LineageParamValueError('The search attribute not supported.')

if not isinstance(condition, dict):
raise LineageParamTypeError("The search_condition element {} should be dict."
.format(attr))

for key in condition.keys():
if key not in ["eq", "lt", "gt", "le", "ge", "in"]:
raise LineageParamValueError("The compare condition should be in "
"('eq', 'lt', 'gt', 'le', 'ge', 'in').")

if attr.startswith('metric_'):
if len(attr) == 7:
raise LineageParamValueError(
'The search attribute not supported.'
)
try:
SearchModelConditionParameter.check_param_value_type(condition)
except ValidationError:
raise MindInsightException(
error=LineageErrors.LINEAGE_PARAM_METRIC_ERROR,
message=LineageErrorMsg.LINEAGE_METRIC_ERROR.value.format(attr)
)
return data

+ 395
- 0
mindinsight/lineagemgr/common/validator/validate.py View File

@@ -0,0 +1,395 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Validate the parameters."""
import os

from marshmallow import ValidationError

from mindinsight.lineagemgr.common.exceptions.error_code import LineageErrors, LineageErrorMsg
from mindinsight.lineagemgr.common.exceptions.exceptions import LineageParamMissingError, \
LineageParamTypeError, LineageParamValueError, LineageDirNotExistError
from mindinsight.lineagemgr.common.log import logger as log
from mindinsight.lineagemgr.common.validator.validate_path import safe_normalize_path
from mindinsight.lineagemgr.querier.query_model import FIELD_MAPPING
from mindinsight.utils.exceptions import MindInsightException

try:
from mindspore.nn import Cell
from mindspore.train.summary import SummaryRecord
except (ImportError, ModuleNotFoundError):
log.warning('MindSpore Not Found!')

TRAIN_RUN_CONTEXT_ERROR_MAPPING = {
'optimizer': LineageErrors.PARAM_OPTIMIZER_ERROR,
'loss_fn': LineageErrors.PARAM_LOSS_FN_ERROR,
'net_outputs': LineageErrors.PARAM_NET_OUTPUTS_ERROR,
'train_network': LineageErrors.PARAM_TRAIN_NETWORK_ERROR,
'train_dataset': LineageErrors.PARAM_DATASET_ERROR,
'epoch_num': LineageErrors.PARAM_EPOCH_NUM_ERROR,
'batch_num': LineageErrors.PARAM_BATCH_NUM_ERROR,
'parallel_mode': LineageErrors.PARAM_TRAIN_PARALLEL_ERROR,
'device_number': LineageErrors.PARAM_DEVICE_NUMBER_ERROR,
'list_callback': LineageErrors.PARAM_CALLBACK_LIST_ERROR,
'train_dataset_size': LineageErrors.PARAM_DATASET_SIZE_ERROR,
}

SEARCH_MODEL_ERROR_MAPPING = {
'summary_dir': LineageErrors.LINEAGE_PARAM_SUMMARY_DIR_ERROR,
'loss_function': LineageErrors.LINEAGE_PARAM_LOSS_FUNCTION_ERROR,
'train_dataset_path': LineageErrors.LINEAGE_PARAM_TRAIN_DATASET_PATH_ERROR,
'train_dataset_count': LineageErrors.LINEAGE_PARAM_TRAIN_DATASET_COUNT_ERROR,
'test_dataset_path': LineageErrors.LINEAGE_PARAM_TEST_DATASET_PATH_ERROR,
'test_dataset_count': LineageErrors.LINEAGE_PARAM_TEST_DATASET_COUNT_ERROR,
'network': LineageErrors.LINEAGE_PARAM_NETWORK_ERROR,
'optimizer': LineageErrors.LINEAGE_PARAM_OPTIMIZER_ERROR,
'learning_rate': LineageErrors.LINEAGE_PARAM_LEARNING_RATE_ERROR,
'epoch': LineageErrors.LINEAGE_PARAM_EPOCH_ERROR,
'batch_size': LineageErrors.LINEAGE_PARAM_BATCH_SIZE_ERROR,
'limit': LineageErrors.PARAM_VALUE_ERROR,
'offset': LineageErrors.PARAM_VALUE_ERROR,
'loss': LineageErrors.LINEAGE_PARAM_LOSS_ERROR,
'model_size': LineageErrors.LINEAGE_PARAM_MODEL_SIZE_ERROR,
'sorted_name': LineageErrors.LINEAGE_PARAM_SORTED_NAME_ERROR,
'sorted_type': LineageErrors.LINEAGE_PARAM_SORTED_TYPE_ERROR,
'lineage_type': LineageErrors.LINEAGE_PARAM_LINEAGE_TYPE_ERROR
}


TRAIN_RUN_CONTEXT_ERROR_MSG_MAPPING = {
'optimizer': LineageErrorMsg.PARAM_OPTIMIZER_ERROR.value,
'loss_fn': LineageErrorMsg.PARAM_LOSS_FN_ERROR.value,
'net_outputs': LineageErrorMsg.PARAM_NET_OUTPUTS_ERROR.value,
'train_network': LineageErrorMsg.PARAM_TRAIN_NETWORK_ERROR.value,
'epoch_num': LineageErrorMsg.PARAM_EPOCH_NUM_ERROR.value,
'batch_num': LineageErrorMsg.PARAM_BATCH_NUM_ERROR.value,
'parallel_mode': LineageErrorMsg.PARAM_TRAIN_PARALLEL_ERROR.value,
'device_number': LineageErrorMsg.PARAM_DEVICE_NUMBER_ERROR.value,
'list_callback': LineageErrorMsg.PARAM_CALLBACK_LIST_ERROR.value
}

SEARCH_MODEL_ERROR_MSG_MAPPING = {
'summary_dir': LineageErrorMsg.LINEAGE_PARAM_SUMMARY_DIR_ERROR.value,
'loss_function': LineageErrorMsg.LINEAGE_LOSS_FUNCTION_ERROR.value,
'train_dataset_path': LineageErrorMsg.LINEAGE_TRAIN_DATASET_PATH_ERROR.value,
'train_dataset_count': LineageErrorMsg.LINEAGE_TRAIN_DATASET_COUNT_ERROR.value,
'test_dataset_path': LineageErrorMsg.LINEAGE_TEST_DATASET_PATH_ERROR.value,
'test_dataset_count': LineageErrorMsg.LINEAGE_TEST_DATASET_COUNT_ERROR.value,
'network': LineageErrorMsg.LINEAGE_NETWORK_ERROR.value,
'optimizer': LineageErrorMsg.LINEAGE_OPTIMIZER_ERROR.value,
'learning_rate': LineageErrorMsg.LINEAGE_LEARNING_RATE_ERROR.value,
'epoch': LineageErrorMsg.PARAM_EPOCH_NUM_ERROR.value,
'batch_size': LineageErrorMsg.PARAM_BATCH_SIZE_ERROR.value,
'limit': LineageErrorMsg.PARAM_LIMIT_ERROR.value,
'offset': LineageErrorMsg.PARAM_OFFSET_ERROR.value,
'loss': LineageErrorMsg.LINEAGE_LOSS_ERROR.value,
'model_size': LineageErrorMsg.LINEAGE_MODEL_SIZE_ERROR.value,
'sorted_name': LineageErrorMsg.LINEAGE_PARAM_SORTED_NAME_ERROR.value,
'sorted_type': LineageErrorMsg.LINEAGE_PARAM_SORTED_TYPE_ERROR.value,
'lineage_type': LineageErrorMsg.LINEAGE_PARAM_LINEAGE_TYPE_ERROR.value
}


EVAL_RUN_CONTEXT_ERROR_MAPPING = {
'valid_dataset': LineageErrors.PARAM_DATASET_ERROR,
'metrics': LineageErrors.PARAM_EVAL_METRICS_ERROR
}

EVAL_RUN_CONTEXT_ERROR_MSG_MAPPING = {
'metrics': LineageErrorMsg.PARAM_EVAL_METRICS_ERROR.value,
}


def validate_int_params(int_param, param_name):
"""
Verify the parameter which type is integer valid or not.

Args:
int_param (int): parameter that is integer,
including epoch, dataset_batch_size, step_num
param_name (str): the name of parameter,
including epoch, dataset_batch_size, step_num

Raises:
MindInsightException: If the parameters are invalid.
"""
if not isinstance(int_param, int) or int_param <= 0 or int_param > pow(2, 63) - 1:
if param_name == 'step_num':
log.error('Invalid step_num. The step number should be a positive integer.')
raise MindInsightException(error=LineageErrors.PARAM_STEP_NUM_ERROR,
message=LineageErrorMsg.PARAM_STEP_NUM_ERROR.value)

if param_name == 'dataset_batch_size':
log.error('Invalid dataset_batch_size. '
'The batch size should be a positive integer.')
raise MindInsightException(error=LineageErrors.PARAM_BATCH_SIZE_ERROR,
message=LineageErrorMsg.PARAM_BATCH_SIZE_ERROR.value)


def validate_network(network):
"""
Verify if the network is valid.

Args:
network (Cell): See mindspore.nn.Cell.

Raises:
LineageParamMissingError: If the network is None.
MindInsightException: If the network is invalid.
"""
if not network:
error_msg = "The input network for TrainLineage should not be None."
log.error(error_msg)
raise LineageParamMissingError(error_msg)

if not isinstance(network, Cell):
log.error("Invalid network. Network should be an instance"
"of mindspore.nn.Cell.")
raise MindInsightException(
error=LineageErrors.PARAM_TRAIN_NETWORK_ERROR,
message=LineageErrorMsg.PARAM_TRAIN_NETWORK_ERROR.value
)


def validate_file_path(file_path, allow_empty=False):
"""
Verify that the file_path is valid.

Args:
file_path (str): Input file path.
allow_empty (bool): Whether file_path can be empty.

Raises:
MindInsightException: If the parameters are invalid.
"""
try:
if allow_empty and not file_path:
return
safe_normalize_path(file_path, raise_key='dataset_path', safe_prefixes=None)
except ValidationError as error:
log.error(str(error))
raise MindInsightException(error=LineageErrors.PARAM_FILE_PATH_ERROR,
message=str(error))


def validate_train_run_context(schema, data):
"""
Validate mindspore train run_context data according to schema.

Args:
schema (Schema): data schema.
data (dict): data to check schema.

Raises:
MindInsightException: If the parameters are invalid.
"""

errors = schema().validate(data)
for error_key, error_msg in errors.items():
if error_key in TRAIN_RUN_CONTEXT_ERROR_MAPPING.keys():
error_code = TRAIN_RUN_CONTEXT_ERROR_MAPPING.get(error_key)
if TRAIN_RUN_CONTEXT_ERROR_MSG_MAPPING.get(error_key):
error_msg = TRAIN_RUN_CONTEXT_ERROR_MSG_MAPPING.get(error_key)
log.error(error_msg)
raise MindInsightException(error=error_code, message=error_msg)


def validate_eval_run_context(schema, data):
"""
Validate mindspore evaluation job run_context data according to schema.

Args:
schema (Schema): data schema.
data (dict): data to check schema.

Raises:
MindInsightException: If the parameters are invalid.
"""
errors = schema().validate(data)
for error_key, error_msg in errors.items():
if error_key in EVAL_RUN_CONTEXT_ERROR_MAPPING.keys():
error_code = EVAL_RUN_CONTEXT_ERROR_MAPPING.get(error_key)
if EVAL_RUN_CONTEXT_ERROR_MSG_MAPPING.get(error_key):
error_msg = EVAL_RUN_CONTEXT_ERROR_MSG_MAPPING.get(error_key)
log.error(error_msg)
raise MindInsightException(error=error_code, message=error_msg)


def validate_search_model_condition(schema, data):
"""
Validate search model condition.

Args:
schema (Schema): Data schema.
data (dict): Data to check schema.

Raises:
MindInsightException: If the parameters are invalid.
"""
error = schema().validate(data)
for error_key in error.keys():
if error_key in SEARCH_MODEL_ERROR_MAPPING.keys():
error_code = SEARCH_MODEL_ERROR_MAPPING.get(error_key)
error_msg = SEARCH_MODEL_ERROR_MSG_MAPPING.get(error_key)
log.error(error_msg)
raise MindInsightException(error=error_code, message=error_msg)


def validate_summary_record(summary_record):
"""
Validate summary_record.

Args:
summary_record (SummaryRecord): SummaryRecord is used to record
the summary value, and summary_record is an instance of SummaryRecord,
see mindspore.train.summary.SummaryRecord

Raises:
MindInsightException: If the parameters are invalid.
"""
if not isinstance(summary_record, SummaryRecord):
log.error("Invalid summary_record. It should be an instance "
"of mindspore.train.summary.SummaryRecord.")
raise MindInsightException(
error=LineageErrors.PARAM_SUMMARY_RECORD_ERROR,
message=LineageErrorMsg.PARAM_SUMMARY_RECORD_ERROR.value
)


def validate_raise_exception(raise_exception):
"""
Validate raise_exception.

Args:
raise_exception (bool): decide raise exception or not,
if True, raise exception; else, catch exception and continue.

Raises:
MindInsightException: If the parameters are invalid.
"""
if not isinstance(raise_exception, bool):
log.error("Invalid raise_exception. It should be True or False.")
raise MindInsightException(
error=LineageErrors.PARAM_RAISE_EXCEPTION_ERROR,
message=LineageErrorMsg.PARAM_RAISE_EXCEPTION_ERROR.value
)


def validate_filter_key(keys):
"""
Verify the keys of filtering is valid or not.

Args:
keys (list): The keys to get the relative lineage info.

Raises:
LineageParamTypeError: If keys is not list.
LineageParamValueError: If the value of keys is invalid.
"""
filter_keys = [
'metric', 'hyper_parameters', 'algorithm',
'train_dataset', 'model', 'valid_dataset',
'dataset_graph'
]

if not isinstance(keys, list):
log.error("Keys must be list.")
raise LineageParamTypeError("Keys must be list.")

for element in keys:
if not isinstance(element, str):
log.error("Element of keys must be str.")
raise LineageParamTypeError("Element of keys must be str.")

if not set(keys).issubset(filter_keys):
err_msg = "Keys must be in {}.".format(filter_keys)
log.error(err_msg)
raise LineageParamValueError(err_msg)


def validate_condition(search_condition):
"""
Verify the param in search_condition is valid or not.

Args:
search_condition (dict): The search condition.

Raises:
LineageParamTypeError: If the type of the param in search_condition is invalid.
LineageParamValueError: If the value of the param in search_condition is invalid.
"""
if not isinstance(search_condition, dict):
log.error("Invalid search_condition type, it should be dict.")
raise LineageParamTypeError("Invalid search_condition type, "
"it should be dict.")

if "limit" in search_condition:
if isinstance(search_condition.get("limit"), bool) \
or not isinstance(search_condition.get("limit"), int):
log.error("The limit must be int.")
raise LineageParamTypeError("The limit must be int.")

if "offset" in search_condition:
if isinstance(search_condition.get("offset"), bool) \
or not isinstance(search_condition.get("offset"), int):
log.error("The offset must be int.")
raise LineageParamTypeError("The offset must be int.")

if "sorted_name" in search_condition:
sorted_name = search_condition.get("sorted_name")
err_msg = "The sorted_name must be in {} or start with " \
"`metric_`.".format(list(FIELD_MAPPING.keys()))
if not isinstance(sorted_name, str):
log.error(err_msg)
raise LineageParamValueError(err_msg)
if sorted_name not in FIELD_MAPPING and not (
sorted_name.startswith('metric_') and len(sorted_name) > 7):
log.error(err_msg)
raise LineageParamValueError(err_msg)

sorted_type_param = ['ascending', 'descending', None]
if "sorted_type" in search_condition:
if "sorted_name" not in search_condition:
log.error("The sorted_name have to exist when sorted_type exists.")
raise LineageParamValueError("The sorted_name have to exist when sorted_type exists.")

if search_condition.get("sorted_type") not in sorted_type_param:
err_msg = "The sorted_type must be ascending or descending."
log.error(err_msg)
raise LineageParamValueError(err_msg)


def validate_path(summary_path):
"""
Verify the summary path is valid or not.

Args:
summary_path (str): The summary path which is a dir.

Raises:
LineageParamValueError: If the input param value is invalid.
LineageDirNotExistError: If the summary path is invalid.
"""
try:
summary_path = safe_normalize_path(
summary_path, "summary_path", None, check_absolute_path=True
)
except ValidationError:
log.error("The summary path is invalid.")
raise LineageParamValueError("The summary path is invalid.")
if not os.path.isdir(summary_path):
log.error("The summary path does not exist or is not a dir.")
raise LineageDirNotExistError("The summary path does not exist or is not a dir.")

return summary_path

+ 120
- 0
mindinsight/lineagemgr/common/validator/validate_path.py View File

@@ -0,0 +1,120 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Validate the input path."""
import os
from typing import Union, List
from marshmallow import ValidationError


def safe_normalize_path(
path,
raise_key,
safe_prefixes: Union[None, List[str]],
check_absolute_path=False,
allow_parent_dir=False,
):
"""
Returns safe normalized path.

This func validates given path, and returns its normalized form. If
safe_prefixes is given, this func will check whether the path is safe.

Note:
This func is not compatible with windows.

Caller should check returned path to ensure safety according to
business logic.

File scheme (rfc8089) is currently not supported.

Args:
path (str): Path to be normalized.

raise_key (str): The exception raise key

safe_prefixes (list[str]): If not none, path must startswith one of the
safe_prefixes. Set this arg to [] will cause all paths considered
unsafe. Normally, prefix in this arg should end with "/".

check_absolute_path (bool): Whether check path is absolute.

allow_parent_dir (bool): Whether allow parent dir in path.

Returns:
str, normalized path.
"""
normalized_path = validate_and_normalize_path(
path,
raise_key=raise_key,
check_absolute_path=check_absolute_path,
allow_parent_dir=allow_parent_dir,
)

if safe_prefixes is None:
return normalized_path

normalized_str = str(normalized_path)
for prefix in safe_prefixes:
if normalized_str.startswith(prefix):
return normalized_path

raise ValidationError({raise_key: {"The path is invalid!"}})


def validate_and_normalize_path(
path,
raise_key,
check_absolute_path=False,
allow_parent_dir=False,
):
"""
Validates path and returns its normalized form.

If path has a valid scheme, treat path as url, otherwise consider path a
unix local path.

Note:
File scheme (rfc8089) is currently not supported.

Args:
path (str): Path to be normalized.
raise_key (str): The exception raise key.
check_absolute_path (bool): Whether check path scheme is supported.
allow_parent_dir (bool): Whether allow parent dir in path.


Returns:
str, normalized path.
"""
if not path:
raise ValidationError({raise_key: {"The path is invalid!"}})

path_str = str(path)
if not allow_parent_dir:
path_components = path_str.split("/")
if ".." in path_components:
raise ValidationError({raise_key: {"The path is invalid!"}})

# path does not have valid schema, treat it as unix local path.
if check_absolute_path:
if not path_str.startswith("/"):
raise ValidationError({raise_key: {"The path is invalid!"}})
try:
# most unix systems allow
normalized_path = os.path.realpath(path)
except ValueError:
raise ValidationError({raise_key: {"The path is invalid!"}})

return normalized_path

+ 14
- 0
mindinsight/lineagemgr/querier/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 446
- 0
mindinsight/lineagemgr/querier/querier.py View File

@@ -0,0 +1,446 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This file is used to define lineage info querier."""
import enum
import functools
import operator
import os

from mindinsight.lineagemgr.common.exceptions.exceptions import \
LineageParamTypeError, LineageSummaryAnalyzeException, \
LineageEventNotExistException, LineageQuerierParamException, \
LineageSummaryParseException, LineageEventFieldNotExistException
from mindinsight.lineagemgr.common.log import logger
from mindinsight.lineagemgr.querier.query_model import LineageObj, FIELD_MAPPING
from mindinsight.lineagemgr.summary.lineage_summary_analyzer import \
LineageSummaryAnalyzer


@enum.unique
class ConditionParam(enum.Enum):
"""
Filtering and sorting field names.

`LIMIT` represents the number of lineage info per page. `OFFSET` represents
page number. `SORTED_NAME` means to sort by this field. `SORTED_TYPE` means
ascending or descending.
"""
LIMIT = 'limit'
OFFSET = 'offset'
SORTED_NAME = 'sorted_name'
SORTED_TYPE = 'sorted_type'
LINEAGE_TYPE = 'lineage_type'

@classmethod
def is_condition_type(cls, value):
"""
Judge that the input param is one of field names in the class.

Args:
value (str): The input field name.

Returns:
bool, `True` if the input field name in the class, else `False`.
"""
return value in cls._value2member_map_


@enum.unique
class ExpressionType(enum.Enum):
"""
Filter condition name definition.

`EQ` means `==`. `LT` means `<`. `GT` means `>`. `LE` means `<=`. `GE` means
`>=`. `IN` means filter value in the specified list.
"""
EQ = 'eq'
LT = 'lt'
GT = 'gt'
LE = 'le'
GE = 'ge'
IN = 'in'

@classmethod
def is_valid_exp(cls, key):
"""
Judge that the input param is one of filter condition names in the class.

Args:
key (str): The input filter condition name.

Returns:
bool, `True` if the input filter condition name in the class,
else `False`.
"""
return key in cls._value2member_map_

@classmethod
def is_match(cls, except_key, except_value, actual_value):
"""
Determine whether the value meets the expected requirement.

Args:
except_key (str): The expression key.
except_value (Union[str, int, float, list, tuple]): The expected
value.
actual_value (Union[str, int, float]): The actual value.

Returns:
bool, `True` if the actual value meets the expected requirement,
else `False`.
"""
if actual_value is None and except_key in [cls.LT.value, cls.GT.value,
cls.LE.value, cls.GE.value]:
return False

if except_key == cls.IN.value:
state = operator.contains(except_value, actual_value)
else:
state = getattr(operator, except_key)(actual_value, except_value)
return state


@enum.unique
class LineageFilterKey(enum.Enum):
"""Summary lineage information filter key."""
METRIC = 'metric'
HYPER_PARAM = 'hyper_parameters'
ALGORITHM = 'algorithm'
TRAIN_DATASET = 'train_dataset'
VALID_DATASET = 'valid_dataset'
MODEL = 'model'
DATASET_GRAPH = 'dataset_graph'

@classmethod
def is_valid_filter_key(cls, key):
"""
Judge that the input param is one of field names in the class.

Args:
key (str): The input field name.

Returns:
bool, `True` if the input field name in the class, else `False`.
"""
return key in cls._value2member_map_

@classmethod
def get_key_list(cls):
"""
Get the filter key name list.

Returns:
list[str], the filter key name list.
"""
return [member.value for member in cls]


@enum.unique
class LineageType(enum.Enum):
"""Lineage search type."""
DATASET = 'dataset'
MODEL = 'model'


class Querier:
"""
The querier of model lineage information.

The class provides model lineage information query function. The information
includes hyper parameters, train dataset, algorithm, model information,
metric, valid dataset, etc.

The class also provides search and sorting capabilities about model lineage
information. You can search and sort by the specified condition.
The condition explain in `ConditionParam` and `ExpressionType` class.
See the method `filter_summary_lineage` for supported fields.

Args:
summary_path (Union[str, list[str]]): The single summary log path or
a list of summary log path.

Raises:
LineageParamTypeError: If the input parameter type is invalid.
LineageQuerierParamException: If the input parameter value is invalid.
LineageSummaryParseException: If all summary logs parsing failed.
"""
def __init__(self, summary_path):
self._lineage_objects = []
self._index_map = {}
self._parse_failed_paths = []
self._parse_summary_logs(summary_path)
self._size = len(self._lineage_objects)

def get_summary_lineage(self, summary_dir=None, filter_keys=None):
"""
Get summary lineage information.

If a summary dir is specified, the special summary lineage information
will be found. If the summary dir is `None`, all summary lineage
information will be found.

Returns the content corresponding to the specified field in the filter
key. The contents of the filter key include `metric`, `hyper_parameters`,
`algorithm`, `train_dataset`, `valid_dataset` and `model`. You can
specify multiple filter keys in the `filter_keys`. If the parameter is
`None`, complete information will be returned.

Args:
summary_dir (Union[str, None]): Summary log dir. Default: None.
filter_keys (Union[list[str], None]): Filter keys. Default: None.

Returns:
list[dict], summary lineage information.
"""
self._parse_fail_summary_logs()

if filter_keys is None:
filter_keys = LineageFilterKey.get_key_list()
else:
for key in filter_keys:
if not LineageFilterKey.is_valid_filter_key(key):
raise LineageQuerierParamException(
filter_keys, 'The filter key {} is invalid.'.format(key)
)

if summary_dir is None:
result = [
item.get_summary_info(filter_keys) for item in self._lineage_objects
]
else:
index = self._index_map.get(summary_dir)
if index is None:
raise LineageQuerierParamException(
'summary_dir',
'Summary dir {} does not exist.'.format(summary_dir)
)
lineage_obj = self._lineage_objects[index]
result = [lineage_obj.get_summary_info(filter_keys)]
return result

def filter_summary_lineage(self, condition=None):
"""
Filter and sort lineage information based on the specified condition.

See `ConditionType` and `ExpressionType` class for the rule of filtering
and sorting. The filtering and sorting fields are defined in
`FIELD_MAPPING` or prefixed with `metric_`.

If the condition is `None`, all model lineage information will be
returned.

Args:
condition (Union[dict, None]): Filter and sort condition.
Default: None.

Returns:
dict, filtered and sorted model lineage information.
"""
def _filter(lineage_obj: LineageObj):
for condition_key, condition_value in condition.items():
if ConditionParam.is_condition_type(condition_key):
continue
if self._is_valid_field(condition_key):
raise LineageQuerierParamException(
'condition',
'The field {} not supported'.format(condition_key)
)

value = lineage_obj.get_value_by_key(condition_key)
for exp_key, exp_value in condition_value.items():
if not ExpressionType.is_valid_exp(exp_key):
raise LineageQuerierParamException(
'condition',
'The expression {} not supported.'.format(exp_key)
)
if not ExpressionType.is_match(exp_key, exp_value, value):
return False
return True

def _cmp(obj1: LineageObj, obj2: LineageObj):
value1 = obj1.get_value_by_key(sorted_name)
value2 = obj2.get_value_by_key(sorted_name)

if value1 is None and value2 is None:
cmp_result = 0
elif value1 is None:
cmp_result = -1
elif value2 is None:
cmp_result = 1
else:
cmp_result = (value1 > value2) - (value1 < value2)

return cmp_result

self._parse_fail_summary_logs()

if condition is None:
condition = {}
result = list(filter(_filter, self._lineage_objects))

if ConditionParam.SORTED_NAME.value in condition:
sorted_name = condition.get(ConditionParam.SORTED_NAME.value)
if self._is_valid_field(sorted_name):
raise LineageQuerierParamException(
'condition',
'The sorted name {} not supported.'.format(sorted_name)
)
sorted_type = condition.get(ConditionParam.SORTED_TYPE.value)
reverse = sorted_type == 'descending'
result = sorted(
result, key=functools.cmp_to_key(_cmp), reverse=reverse
)

offset_result = self._handle_limit_and_offset(condition, result)

search_type = condition.get(ConditionParam.LINEAGE_TYPE.value)
lineage_info = {
'object': [
item.to_dataset_lineage_dict() if search_type == LineageType.DATASET.value
else item.to_filtration_dict() for item in offset_result
],
'count': len(result)
}

return lineage_info

def _is_valid_field(self, field_name):
"""
Check if field name is valid.

Args:
field_name (str): Field name.

Returns:
bool, `True` if the field name is valid, else `False`.
"""
return field_name not in FIELD_MAPPING and not field_name.startswith('metric_')

def _handle_limit_and_offset(self, condition, result):
"""
Handling the condition of `limit` and `offset`.

Args:
condition (dict): Filter and sort condition.
result (list[LineageObj]): Filtered and sorted result.

Returns:
list[LineageObj], paginated result.
"""
offset = 0
limit = 10
if ConditionParam.OFFSET.value in condition:
offset = condition.get(ConditionParam.OFFSET.value)
if ConditionParam.LIMIT.value in condition:
limit = condition.get(ConditionParam.LIMIT.value)
if ConditionParam.OFFSET.value not in condition \
and ConditionParam.LIMIT.value not in condition:
offset_result = result
else:
offset_result = result[offset * limit: limit * (offset + 1)]
return offset_result

def _parse_summary_logs(self, summary_path):
"""
Parse summary logs.

Args:
summary_path (Union[str, list[str]]): The single summary log path or
a list of summary log path.
"""
if not summary_path:
raise LineageQuerierParamException(
'summary_path', 'The summary path is empty.'
)
if isinstance(summary_path, str):
self._parse_summary_log(summary_path, 0)
elif isinstance(summary_path, list):
index = 0
for path in summary_path:
parse_result = self._parse_summary_log(path, index)
if parse_result:
index += 1
else:
raise LineageParamTypeError('Summary path is not str or list.')

if self._parse_failed_paths:
logger.info('Parse failed paths: %s', str(self._parse_failed_paths))

if not self._lineage_objects:
raise LineageSummaryParseException()

def _parse_summary_log(self, log_path, index: int, is_save_fail_path=True):
"""
Parse the single summary log.

Args:
log_path (str): The single summary log path.
index (int): TrainInfo instance index in the train info list.
is_save_fail_path (bool): Set whether to save the failed summary
path. Default: True.

Returns:
bool, `True` if parse summary log success, else `False`.
"""
log_dir = os.path.dirname(log_path)
try:
lineage_info = LineageSummaryAnalyzer.get_summary_infos(log_path)
lineage_obj = LineageObj(
log_dir,
train_lineage=lineage_info.train_lineage,
evaluation_lineage=lineage_info.eval_lineage,
dataset_graph=lineage_info.dataset_graph
)
self._lineage_objects.append(lineage_obj)
self._add_dataset_mark()
self._index_map[log_dir] = index
return True
except (LineageSummaryAnalyzeException,
LineageEventNotExistException,
LineageEventFieldNotExistException):
if is_save_fail_path:
self._parse_failed_paths.append(log_path)
return False

def _parse_fail_summary_logs(self):
"""Parse fail summary logs."""
if self._parse_failed_paths:
failed_paths = []
for path in self._parse_failed_paths:
parse_result = self._parse_summary_log(path, self._size, False)
if parse_result:
self._size += 1
else:
failed_paths.append(path)
self._parse_failed_paths = failed_paths

def _add_dataset_mark(self):
"""Add dataset mark into LineageObj."""
# give a dataset mark for each dataset graph in lineage information
marked_dataset_group = {'1': None}
for lineage in self._lineage_objects:
dataset_mark = '0'
for dataset_graph_mark, marked_dataset_graph in marked_dataset_group.items():
if marked_dataset_graph == lineage.dataset_graph:
dataset_mark = dataset_graph_mark
break
# if no matched, add the new dataset graph into group
if dataset_mark == '0':
dataset_mark = str(int(max(marked_dataset_group.keys())) + 1)
marked_dataset_group.update({
dataset_mark:
lineage.dataset_graph
})
lineage.dataset_mark = dataset_mark

+ 344
- 0
mindinsight/lineagemgr/querier/query_model.py View File

@@ -0,0 +1,344 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This file is used to define lineage info model."""
import json
from collections import namedtuple

from google.protobuf.json_format import MessageToDict

from mindinsight.lineagemgr.common.exceptions.exceptions import \
LineageEventFieldNotExistException, LineageEventNotExistException
from mindinsight.lineagemgr.summary._summary_adapter import organize_graph

Field = namedtuple('Field', ['base_name', 'sub_name'])
FIELD_MAPPING = {
"summary_dir": Field('summary_dir', None),
"loss_function": Field("hyper_parameters", 'loss_function'),
"train_dataset_path": Field('train_dataset', 'train_dataset_path'),
"train_dataset_count": Field("train_dataset", 'train_dataset_size'),
"test_dataset_path": Field('valid_dataset', 'valid_dataset_path'),
"test_dataset_count": Field('valid_dataset', 'valid_dataset_size'),
"network": Field('algorithm', 'network'),
"optimizer": Field('hyper_parameters', 'optimizer'),
"learning_rate": Field('hyper_parameters', 'learning_rate'),
"epoch": Field('hyper_parameters', 'epoch'),
"batch_size": Field('hyper_parameters', 'batch_size'),
"loss": Field('algorithm', 'loss'),
"model_size": Field('model', 'size'),
"dataset_mark": Field('dataset_mark', None),
}


class LineageObj:
"""
Lineage information class.

An instance of the class hold lineage information for a training session.

Args:
summary_dir (str): Summary log dir.
kwargs (dict): Params to init the instance.

- train_lineage (Event): Train lineage object.

- evaluation_lineage (Event): Evaluation lineage object.

- dataset_graph (Event): Dataset graph object.

Raises:
LineageEventNotExistException: If train and evaluation event not exist.
LineageEventFieldNotExistException: If the special event field not exist.
"""
_name_train_lineage = 'train_lineage'
_name_evaluation_lineage = 'evaluation_lineage'
_name_summary_dir = 'summary_dir'
_name_metric = 'metric'
_name_hyper_parameters = 'hyper_parameters'
_name_algorithm = 'algorithm'
_name_train_dataset = 'train_dataset'
_name_model = 'model'
_name_valid_dataset = 'valid_dataset'
_name_dataset_graph = 'dataset_graph'
_name_dataset_mark = 'dataset_mark'

def __init__(self, summary_dir, **kwargs):
self._lineage_info = {
self._name_summary_dir: summary_dir
}
train_lineage = kwargs.get('train_lineage')
evaluation_lineage = kwargs.get('evaluation_lineage')
dataset_graph = kwargs.get('dataset_graph')
if not any([train_lineage, evaluation_lineage, dataset_graph]):
raise LineageEventNotExistException()
self._parse_train_lineage(train_lineage)
self._parse_evaluation_lineage(evaluation_lineage)
self._parse_dataset_graph(dataset_graph)
self._filtration_result = self._organize_filtration_result()

@property
def summary_dir(self):
"""
Get summary log dir.

Returns:
str, the summary log dir.
"""
return self._lineage_info.get(self._name_summary_dir)

@property
def metric(self):
"""
Get metric information.

Returns:
dict, the metric information.
"""
return self._lineage_info.get(self._name_metric)

@property
def hyper_parameters(self):
"""
Get hyperparameters.

Returns:
dict, the hyperparameters.
"""
return self._lineage_info.get(self._name_hyper_parameters)

@property
def algorithm(self):
"""
Get algorithm.

Returns:
dict, the algorithm.
"""
return self._lineage_info.get(self._name_algorithm)

@property
def train_dataset(self):
"""
Get train dataset information.

Returns:
dict, the train dataset information.
"""
return self._lineage_info.get(self._name_train_dataset)

@property
def model(self):
"""
Get model information.

Returns:
dict, the model information.
"""
return self._lineage_info.get(self._name_model)

@property
def valid_dataset(self):
"""
Get valid dataset information.

Returns:
dict, the valid dataset information.
"""
return self._lineage_info.get(self._name_valid_dataset)

@property
def dataset_graph(self):
"""
Get dataset_graph.

Returns:
dict, the dataset graph information.
"""
return self._lineage_info.get(self._name_dataset_graph)

@property
def dataset_mark(self):
"""
Get dataset_mark.

Returns:
dict, the dataset mark information.
"""
return self._lineage_info.get(self._name_dataset_mark)

@dataset_mark.setter
def dataset_mark(self, dataset_mark):
"""
Set dataset mark.

Args:
dataset_mark (int): Dataset mark.
"""
self._lineage_info[self._name_dataset_mark] = dataset_mark
# update dataset_mark into filtration result
self._filtration_result[self._name_dataset_mark] = dataset_mark

def get_summary_info(self, filter_keys: list):
"""
Get the summary lineage information.

Returns the content corresponding to the specified field in the filter
key. The contents of the filter key include `metric`, `hyper_parameters`,
`algorithm`, `train_dataset`, `valid_dataset` and `model`. You can
specify multiple filter keys in the `filter_keys`

Args:
filter_keys (list): Filter keys.

Returns:
dict, the summary lineage information.
"""
result = {
self._name_summary_dir: self.summary_dir,
}

for key in filter_keys:
result[key] = getattr(self, key)
return result

def to_filtration_dict(self):
"""
Returns the lineage information required by filtering interface.

Returns:
dict, the lineage information required by filtering interface.
"""
return self._filtration_result

def to_dataset_lineage_dict(self):
"""
Returns the dataset part lineage information.

Returns:
dict, the dataset lineage information.
"""
dataset_lineage = {
key: self._filtration_result.get(key)
for key in [self._name_summary_dir, self._name_dataset_graph]
}

return dataset_lineage

def get_value_by_key(self, key):
"""
Get the value based on the key in `FIELD_MAPPING` or the key prefixed with `metric_`.

Args:
key (str): The key in `FIELD_MAPPING` or prefixed with `metric_`.

Returns:
object, the value.
"""
if key.startswith('metric_'):
metric_key = key.split('_', 1)[1]
metric = self._filtration_result.get(self._name_metric)
if metric:
return metric.get(metric_key)
return self._filtration_result.get(key)

def _organize_filtration_result(self):
"""
Organize filtration result.

Returns:
dict, the filtration result.
"""
result = {}
for key, field in FIELD_MAPPING.items():
if field.base_name is not None:
base_attr = getattr(self, field.base_name)
result[key] = base_attr.get(field.sub_name) \
if field.sub_name else base_attr
# add metric into filtration result
result[self._name_metric] = self.metric
# add dataset_graph into filtration result
result[self._name_dataset_graph] = getattr(self, self._name_dataset_graph)
return result

def _parse_train_lineage(self, train_lineage):
"""
Parse train lineage.

Args:
train_lineage (Event): Train lineage.
"""
if train_lineage is None:
self._lineage_info[self._name_model] = {}
self._lineage_info[self._name_algorithm] = {}
self._lineage_info[self._name_hyper_parameters] = {}
self._lineage_info[self._name_train_dataset] = {}
return

event_dict = MessageToDict(
train_lineage, preserving_proto_field_name=True
)
train_dict = event_dict.get(self._name_train_lineage)
if train_dict is None:
raise LineageEventFieldNotExistException(
self._name_train_lineage
)

# when MessageToDict is converted to dict, int64 type is converted
# to string, so we convert it to an int in python
if train_dict.get(self._name_model):
model_size = train_dict.get(self._name_model).get('size')
if model_size:
train_dict[self._name_model]['size'] = int(model_size)

self._lineage_info.update(**train_dict)

def _parse_evaluation_lineage(self, evaluation_lineage):
"""
Parse evaluation lineage.

Args:
evaluation_lineage (Event): Evaluation lineage.
"""
if evaluation_lineage is None:
self._lineage_info[self._name_metric] = {}
self._lineage_info[self._name_valid_dataset] = {}
return

event_dict = MessageToDict(
evaluation_lineage, preserving_proto_field_name=True
)
evaluation_dict = event_dict.get(self._name_evaluation_lineage)
if evaluation_dict is None:
raise LineageEventFieldNotExistException(
self._name_evaluation_lineage
)
self._lineage_info.update(**evaluation_dict)
metric = self._lineage_info.get(self._name_metric)
self._lineage_info[self._name_metric] = json.loads(metric) if metric else {}

def _parse_dataset_graph(self, dataset_graph):
"""
Parse dataset graph.

Args:
dataset_graph (Event): Dataset graph.
"""
if dataset_graph is None:
self._lineage_info[self._name_dataset_graph] = {}
else:
# convert message to dict
event_dict = organize_graph(dataset_graph.dataset_graph)
if event_dict is None:
raise LineageEventFieldNotExistException(self._name_evaluation_lineage)
self._lineage_info[self._name_dataset_graph] = event_dict if event_dict else {}

+ 14
- 0
mindinsight/lineagemgr/summary/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 293
- 0
mindinsight/lineagemgr/summary/_summary_adapter.py View File

@@ -0,0 +1,293 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The converter between proto format event of lineage and dict."""
import time

from mindinsight.datavisual.proto_files.mindinsight_summary_pb2 import Event
from mindinsight.lineagemgr.common.exceptions.exceptions import LineageParamTypeError
from mindinsight.lineagemgr.common.log import logger as log


def package_dataset_graph(graph):
"""
Package dataset graph.

Args:
graph (dict): Dataset graph.

Returns:
Event, the proto message event contains dataset graph.
"""
dataset_graph_event = Event()
dataset_graph_event.wall_time = time.time()

dataset_graph = dataset_graph_event.dataset_graph
if "children" in graph:
children = graph.pop("children")
if children:
_package_children(children=children, message=dataset_graph)
_package_current_dataset(operation=graph, message=dataset_graph)

return dataset_graph_event


def _package_children(children, message):
"""
Package children in dataset operation.

Args:
children (list[dict]): Child operations.
message (DatasetGraph): Children proto message.
"""
for child in children:
if child:
child_graph_message = getattr(message, "children").add()
grandson = child.pop("children")
if grandson:
_package_children(children=grandson, message=child_graph_message)
# package other parameters
_package_current_dataset(operation=child, message=child_graph_message)


def _package_current_dataset(operation, message):
"""
Package operation parameters in event message.

Args:
operation (dict): Operation dict.
message (Operation): Operation proto message.
"""
for key, value in operation.items():
if key == "operations":
for operator in value:
_package_enhancement_operation(
operator,
message.operations.add()
)
elif key == "sampler":
_package_enhancement_operation(
value,
message.sampler
)
else:
_package_parameter(key, value, message.parameter)


def _package_enhancement_operation(operation, message):
"""
Package enhancement operation in MapDataset.

Args:
operation (dict): Enhancement operation.
message (Operation): Enhancement operation proto message.
"""

for key, value in operation.items():
if isinstance(value, list):
if all(isinstance(ele, int) for ele in value):
message.size.extend(value)
else:
message.weights.extend(value)
else:
_package_parameter(key, value, message.operationParam)


def _package_parameter(key, value, message):
"""
Package parameters in operation.

Args:
key (str): Operation name.
value (Union[str, bool, int, float, list, None]): Operation args.
message (OperationParameter): Operation proto message.
"""
if isinstance(value, str):
message.mapStr[key] = value
elif isinstance(value, bool):
message.mapBool[key] = value
elif isinstance(value, int):
message.mapInt[key] = value
elif isinstance(value, float):
message.mapDouble[key] = value
elif isinstance(value, list) and key != "operations":
if value:
replace_value_list = list(map(lambda x: "" if x is None else x, value))
message.mapStrList[key].strValue.extend(replace_value_list)
elif value is None:
message.mapStr[key] = "None"
else:
error_msg = "Parameter {} is not supported " \
"in event package.".format(key)
log.error(error_msg)
raise LineageParamTypeError(error_msg)


def organize_graph(graph_message):
"""
Convert a dataset graph to its dict format.

Args:
graph_message (DatasetGraph): Graph event message.

Returns:
dict, dataset graph.
"""
result = {}
# update current dataset graph dict
result.update(_organize_current_dataset(
parameter=getattr(graph_message, 'parameter'),
operations=getattr(graph_message, 'operations'),
sampler=getattr(graph_message, 'sampler')
))
# update children dataset graph dict
result.update(
_organize_children(getattr(graph_message, 'children'))
)

return result


def _organize_children(children_message):
"""
Convert children message to its dict format.

Args:
children_message (list[DatasetGraph]): Children message.

Returns:
dict, children dict of dataset graph.
"""
children_list = []
children_dict = {'children': children_list}
if children_message:
for child_event in children_message:
child_dict = {}
# update current dataset to child
child_dict.update(
_organize_current_dataset(
parameter=getattr(child_event, 'parameter'),
operations=getattr(child_event, 'operations'),
sampler=getattr(child_event, 'sampler')
)
)
# update child's children
child_dict.update(
_organize_children(getattr(child_event, 'children'))
)
children_list.append(child_dict)
children_dict['children'] = children_list

return children_dict


def _organize_current_dataset(parameter, operations, sampler):
"""
Convert current dataset message to its dict format.

Note:
Current dataset message include parameter, operations,
sampler message of dataset graph event.

Args:
parameter (OperationParameter): Parameter message.
operations (Operation): Operations message.
sampler (Operation): Sampler message.

Returns:
dict, current dataset.
"""
current_dataset = {}
if parameter:
current_dataset.update(
_organize_parameter(parameter)
)
if operations:
operation_list = []
for operation in operations:
operation_list.append(
_organize_operation(operation)
)
current_dataset.update(
{'operations': operation_list}
)
if sampler:
if _organize_operation(sampler):
current_dataset.update({
'sampler':
_organize_operation(sampler)
})
return current_dataset


def _organize_operation(operation):
"""
Convert operation message to its dict format.

Args:
operation (Operation): Operation message.

Returns:
dict, operation.
"""
operation_dict = {}
operation_dict.update(_organize_parameter(getattr(operation, 'operationParam')))
tmp_list = []
repeated_keys = ['size', 'weights']
for key in repeated_keys:
for str_ele in getattr(operation, key):
tmp_list.append(str_ele)
dict()
if tmp_list:
operation_dict.update({key: tmp_list})
return operation_dict


def _organize_parameter(parameter):
"""
Convert operation parameter message to its dict format.

Args:
parameter (OperationParameter): Operation parameter message.

Returns:
dict, operation parameter.
"""
parameter_result = dict()
parameter_keys = [
'mapStr',
'mapBool',
'mapInt',
'mapDouble',
]
for parameter_key in parameter_keys:
base_attr = getattr(parameter, parameter_key)
parameter_value = dict(base_attr)
# convert str 'None' to None
for key, value in parameter_value.items():
if value == 'None':
parameter_value[key] = None
parameter_result.update(parameter_value)
# drop `mapStrList` and `strValue` keys in result parameter
str_list_para = dict(getattr(parameter, 'mapStrList'))
result_str_list_para = dict()
for key, value in str_list_para.items():
str_list_para_list = list()
for str_ele in getattr(value, 'strValue'):
str_list_para_list.append(str_ele)
str_list_para_list = list(map(lambda x: None if x == '' else x, str_list_para_list))
result_str_list_para[key] = str_list_para_list
parameter_result.update(result_str_list_para)

return parameter_result

+ 95
- 0
mindinsight/lineagemgr/summary/event_writer.py View File

@@ -0,0 +1,95 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Event writer to record lineage message to summary log."""
import os
import stat
import struct

from mindinsight.datavisual.utils import crc32

KMASKDELTA = 0xa282ead8


class EventWriter:
"""
Lineage summary record.
Recording train lineage and evaluation lineage to summary log.

Args:
file_path (str): Summary log path.
override (bool): If override the summary log exist.

Raises:
IOError: Write to summary log failed or file_path is a dir.

Examples:
>>> content = b'\x01\x02\x03\x04'
>>> event_writer = EventWriter("./test.log", True)
>>> event_writer.write_event_to_file(content)
"""
def __init__(self, file_path, override=False):
"""
Init EventWriter, get the type of writing.

Args:
file_path (str): The file path to writing.
override (bool): The type of writing.
"""
if os.path.exists(file_path):
if not os.path.isfile(file_path):
raise IOError("The file_path is not a normal file.")

self.file_path = file_path
if override:
self.write_type = 'wb'
else:
self.write_type = 'ab'

def write_event_to_file(self, content):
"""
Write event to file.

Args:
content (bytes): Content to write.
"""
length = struct.pack("<Q", len(content))
header_crc = EventWriter.get_crc(length)
crc = EventWriter.get_crc(content)
content = length + header_crc + content + crc
try:
with open(self.file_path, self.write_type) as log_file:
os.chmod(self.file_path, stat.S_IRUSR | stat.S_IWUSR)
log_file.write(content)
except IOError:
raise IOError("There are some error when writing summary log.")

@staticmethod
def get_crc(content):
"""
Calculate crc value of the content.

Args:
content (bytes): Content to be Calculated.

Returns:
bytes, crc of content, 4 bytes.
"""
mask = (1 << 32) - 1

crc_value = crc32.MakeCrc32c(0, content, len(content))
crc_value = ((crc_value >> 15) | (crc_value << 17)) & mask
crc_value = (crc_value + KMASKDELTA) & mask

return struct.pack("<L", crc_value)

+ 95
- 0
mindinsight/lineagemgr/summary/file_handler.py View File

@@ -0,0 +1,95 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""File handler for lineage summary log."""
import os


class FileHandler:
"""
Summary log file handler.

Summary log file handler provides Python APIs to manage file IO, including
read, seek. It is not suitable for very large files.

Args:
file_path (str): File path.
"""

def __init__(self, file_path):
self._size = os.path.getsize(file_path)
self._cache = self._read_cache(file_path)
self._offset = 0

@property
def size(self):
"""
The size of file.

Returns:
int, the size of file.
"""
return self._size

def _read_cache(self, file_path):
"""
Read file in cache.

Args:
file_path (str): File path.

Returns:
bytes, the file content.
"""
with open(file_path, 'rb') as log_file:
return log_file.read()

def seek(self, offset):
"""
Set the new offset of file.

Args:
pos (int): The new offset.
"""
self._offset = offset

def tell(self):
"""
Tell the current offset.

Returns:
int, the offset.
"""
return self._offset

def read(self, size=-1, offset=None):
"""
Read bytes from buffer by size.

Args:
size (int): Number of bytes to read. If set -1, read the whole file.
Default: -1.
offset (int): The start offset to read bytes from. Default: None.

Returns:
bytes, the content.
"""
if offset is None:
offset = self._offset

new_offset = offset + size
result = self._cache[offset:new_offset]
self._offset = new_offset

return result

+ 209
- 0
mindinsight/lineagemgr/summary/lineage_summary_analyzer.py View File

@@ -0,0 +1,209 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This module provides python APIs to get lineage summary from summary log."""
import struct
from collections import namedtuple
from enum import Enum

from mindinsight.datavisual.proto_files.mindinsight_summary_pb2 import Event
from mindinsight.datavisual.utils import crc32
from mindinsight.lineagemgr.common.exceptions.exceptions import MindInsightException, \
LineageVerificationException, LineageSummaryAnalyzeException
from mindinsight.lineagemgr.common.log import logger as log
from mindinsight.lineagemgr.common.validator.validate_path import safe_normalize_path
from mindinsight.lineagemgr.summary.file_handler import FileHandler

LineageInfo = namedtuple('LineageInfo', ['train_lineage', 'eval_lineage', 'dataset_graph'])


class SummaryTag(Enum):
"""The tag value of lineage fields."""

# the value is `field_number << 3 | wire_type`
WALL_TIME = 'wall_time'
STEP = 'step'
VERSION = 'version'
GRAPH = 'graph'
SUMMARY = 'summary'
TRAIN_LINEAGE = 'train_lineage'
EVAL_LINEAGE = 'evaluation_lineage'
DATASET_GRAPH = 'dataset_graph'


class SummaryAnalyzer:
"""
Summary log Analyzer.

Args:
file_path (str): The path of summary log.

Raises:
LineageVerificationException: Raise when verification failed.
"""
HEADER_SIZE = 8
HEADER_CRC_SIZE = 4
BODY_CRC_SIZE = 4

def __init__(self, file_path):
self.file_handler = FileHandler(file_path)

def load_events(self):
"""
Load events in summary log.

Returns:
generator, the event generator.
"""
while self._has_next():
yield self._read_event()

def _has_next(self):
"""
Check if the file has reached the end.

Returns:
bool, whether the file has reached the end.
"""
current_offset = self.file_handler.tell()
if current_offset < self.file_handler.size:
return True

return False

def _read_event(self):
"""
Read event.

Returns:
Event, the event body.
"""
body_size = self._read_header()
body_str = self._read_body(body_size)
event = Event().FromString(body_str)
return event

def _read_header(self):
"""
Read header information.

Returns:
int, the length of event body.
"""
header_str = self.file_handler.read(self.HEADER_SIZE)
header_crc_str = self.file_handler.read(self.HEADER_CRC_SIZE)
SummaryAnalyzer._check_crc(header_str, header_crc_str)

body_len = struct.unpack("<Q", header_str)[0]

return body_len

def _read_body(self, body_size):
"""
Read event body information.

Args:
body_size (int): The size of event body.

Returns:
bytes, the event body in bytes.
"""
body_str = self.file_handler.read(body_size)
body_crc_str = self.file_handler.read(self.BODY_CRC_SIZE)
SummaryAnalyzer._check_crc(body_str, body_crc_str)

return body_str

@staticmethod
def _check_crc(source_str, crc_str):
"""
Check the integrity of source string.

Args:
source_str (bytes): Source string in bytes.
crc_str (bytes): CRC string of source string in bytes.

Raises:
LineageVerificationException: Raise when verification failed.
"""
if crc32.GetValueFromStr(crc_str) != \
crc32.GetMaskCrc32cValue(source_str, len(source_str)):
log.error("The CRC verification failed.")
raise LineageVerificationException("The CRC verification failed.")


class LineageSummaryAnalyzer(SummaryAnalyzer):
"""
Summary log analyzer for lineage information.

Args:
file_path (str): The path of summary log.

Raises:
LineageSummaryAnalyzeException: If failed to get lineage information.
"""

def __init__(self, file_path):
file_path = safe_normalize_path(file_path, 'lineage_summary_path', None)
super(LineageSummaryAnalyzer, self).__init__(file_path)

def get_latest_info(self):
"""
Get latest lineage info in summary log file.

Returns:
LineageInfo, the lineage summary information.
"""
lineage_events = {
SummaryTag.TRAIN_LINEAGE: None,
SummaryTag.EVAL_LINEAGE: None,
SummaryTag.DATASET_GRAPH: None
}
for event in self.load_events():
for tag, _ in lineage_events.items():
if event.HasField(tag.value):
lineage_events[tag] = event
break

lineage_info = LineageInfo(
train_lineage=lineage_events.get(SummaryTag.TRAIN_LINEAGE),
eval_lineage=lineage_events.get(SummaryTag.EVAL_LINEAGE),
dataset_graph=lineage_events.get(SummaryTag.DATASET_GRAPH)
)

return lineage_info

@classmethod
def get_summary_infos(cls, file_path):
"""
Get lineage summary information from summary log file.

Args:
file_path (str): The file path of summary log.

Returns:
LineageInfo, the lineage summary information.

Raises:
LineageSummaryAnalyzeException: If failed to get lineage information.
"""
analyzer = cls(file_path)
try:
lineage_info = analyzer.get_latest_info()
except (MindInsightException, IOError) as err:
log.error("Failed to get lineage information.")
log.exception(err)
raise LineageSummaryAnalyzeException()

return lineage_info

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save