| @@ -0,0 +1,89 @@ | |||
| # Byte-compiled / optimized / DLL files | |||
| __pycache__/ | |||
| *.py[cod] | |||
| htmlcov | |||
| .trash | |||
| .pytest_cache/ | |||
| # Distribution / packaging | |||
| bin/ | |||
| develop-eggs/ | |||
| dist/ | |||
| eggs/ | |||
| lib/ | |||
| lib64/ | |||
| parts/ | |||
| sdist/ | |||
| var/ | |||
| *.egg-info/ | |||
| .installed.cfg | |||
| *.egg | |||
| # Installer logs | |||
| pip-log.txt | |||
| pip-delete-this-directory.txt | |||
| # Editors/IDEs | |||
| .idea/ | |||
| *.sublime-* | |||
| *.swp | |||
| *.save | |||
| # test file | |||
| .coverage | |||
| .cache | |||
| # project wide git ignore | |||
| # Compiled artifacts | |||
| *.so | |||
| *.whl | |||
| # Python backup files | |||
| *.pyc | |||
| # Emacs backup files | |||
| *~ | |||
| *# | |||
| .#* | |||
| # Vim file artifacts | |||
| .*.sw* | |||
| # Makefile dummy artifacts | |||
| .*-dummy | |||
| # log files | |||
| *.log | |||
| # code coverage | |||
| *.cov | |||
| # Test result xml files | |||
| report.xml | |||
| *.pprof | |||
| results.xml | |||
| TESTS*.xml | |||
| # local project settings | |||
| .settings | |||
| .project | |||
| .gradle | |||
| .idea | |||
| # tox | |||
| .tox/ | |||
| # vscode settings | |||
| .vscode | |||
| package-lock.json | |||
| build/lib | |||
| build/bdist.* | |||
| output/ | |||
| !output/README.md | |||
| third_party/securec/build | |||
| @@ -0,0 +1,201 @@ | |||
| Apache License | |||
| Version 2.0, January 2004 | |||
| http://www.apache.org/licenses/ | |||
| TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION | |||
| 1. Definitions. | |||
| "License" shall mean the terms and conditions for use, reproduction, | |||
| and distribution as defined by Sections 1 through 9 of this document. | |||
| "Licensor" shall mean the copyright owner or entity authorized by | |||
| the copyright owner that is granting the License. | |||
| "Legal Entity" shall mean the union of the acting entity and all | |||
| other entities that control, are controlled by, or are under common | |||
| control with that entity. For the purposes of this definition, | |||
| "control" means (i) the power, direct or indirect, to cause the | |||
| direction or management of such entity, whether by contract or | |||
| otherwise, or (ii) ownership of fifty percent (50%) or more of the | |||
| outstanding shares, or (iii) beneficial ownership of such entity. | |||
| "You" (or "Your") shall mean an individual or Legal Entity | |||
| exercising permissions granted by this License. | |||
| "Source" form shall mean the preferred form for making modifications, | |||
| including but not limited to software source code, documentation | |||
| source, and configuration files. | |||
| "Object" form shall mean any form resulting from mechanical | |||
| transformation or translation of a Source form, including but | |||
| not limited to compiled object code, generated documentation, | |||
| and conversions to other media types. | |||
| "Work" shall mean the work of authorship, whether in Source or | |||
| Object form, made available under the License, as indicated by a | |||
| copyright notice that is included in or attached to the work | |||
| (an example is provided in the Appendix below). | |||
| "Derivative Works" shall mean any work, whether in Source or Object | |||
| form, that is based on (or derived from) the Work and for which the | |||
| editorial revisions, annotations, elaborations, or other modifications | |||
| represent, as a whole, an original work of authorship. For the purposes | |||
| of this License, Derivative Works shall not include works that remain | |||
| separable from, or merely link (or bind by name) to the interfaces of, | |||
| the Work and Derivative Works thereof. | |||
| "Contribution" shall mean any work of authorship, including | |||
| the original version of the Work and any modifications or additions | |||
| to that Work or Derivative Works thereof, that is intentionally | |||
| submitted to Licensor for inclusion in the Work by the copyright owner | |||
| or by an individual or Legal Entity authorized to submit on behalf of | |||
| the copyright owner. For the purposes of this definition, "submitted" | |||
| means any form of electronic, verbal, or written communication sent | |||
| to the Licensor or its representatives, including but not limited to | |||
| communication on electronic mailing lists, source code control systems, | |||
| and issue tracking systems that are managed by, or on behalf of, the | |||
| Licensor for the purpose of discussing and improving the Work, but | |||
| excluding communication that is conspicuously marked or otherwise | |||
| designated in writing by the copyright owner as "Not a Contribution." | |||
| "Contributor" shall mean Licensor and any individual or Legal Entity | |||
| on behalf of whom a Contribution has been received by Licensor and | |||
| subsequently incorporated within the Work. | |||
| 2. Grant of Copyright License. Subject to the terms and conditions of | |||
| this License, each Contributor hereby grants to You a perpetual, | |||
| worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |||
| copyright license to reproduce, prepare Derivative Works of, | |||
| publicly display, publicly perform, sublicense, and distribute the | |||
| Work and such Derivative Works in Source or Object form. | |||
| 3. Grant of Patent License. Subject to the terms and conditions of | |||
| this License, each Contributor hereby grants to You a perpetual, | |||
| worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |||
| (except as stated in this section) patent license to make, have made, | |||
| use, offer to sell, sell, import, and otherwise transfer the Work, | |||
| where such license applies only to those patent claims licensable | |||
| by such Contributor that are necessarily infringed by their | |||
| Contribution(s) alone or by combination of their Contribution(s) | |||
| with the Work to which such Contribution(s) was submitted. If You | |||
| institute patent litigation against any entity (including a | |||
| cross-claim or counterclaim in a lawsuit) alleging that the Work | |||
| or a Contribution incorporated within the Work constitutes direct | |||
| or contributory patent infringement, then any patent licenses | |||
| granted to You under this License for that Work shall terminate | |||
| as of the date such litigation is filed. | |||
| 4. Redistribution. You may reproduce and distribute copies of the | |||
| Work or Derivative Works thereof in any medium, with or without | |||
| modifications, and in Source or Object form, provided that You | |||
| meet the following conditions: | |||
| (a) You must give any other recipients of the Work or | |||
| Derivative Works a copy of this License; and | |||
| (b) You must cause any modified files to carry prominent notices | |||
| stating that You changed the files; and | |||
| (c) You must retain, in the Source form of any Derivative Works | |||
| that You distribute, all copyright, patent, trademark, and | |||
| attribution notices from the Source form of the Work, | |||
| excluding those notices that do not pertain to any part of | |||
| the Derivative Works; and | |||
| (d) If the Work includes a "NOTICE" text file as part of its | |||
| distribution, then any Derivative Works that You distribute must | |||
| include a readable copy of the attribution notices contained | |||
| within such NOTICE file, excluding those notices that do not | |||
| pertain to any part of the Derivative Works, in at least one | |||
| of the following places: within a NOTICE text file distributed | |||
| as part of the Derivative Works; within the Source form or | |||
| documentation, if provided along with the Derivative Works; or, | |||
| within a display generated by the Derivative Works, if and | |||
| wherever such third-party notices normally appear. The contents | |||
| of the NOTICE file are for informational purposes only and | |||
| do not modify the License. You may add Your own attribution | |||
| notices within Derivative Works that You distribute, alongside | |||
| or as an addendum to the NOTICE text from the Work, provided | |||
| that such additional attribution notices cannot be construed | |||
| as modifying the License. | |||
| You may add Your own copyright statement to Your modifications and | |||
| may provide additional or different license terms and conditions | |||
| for use, reproduction, or distribution of Your modifications, or | |||
| for any such Derivative Works as a whole, provided Your use, | |||
| reproduction, and distribution of the Work otherwise complies with | |||
| the conditions stated in this License. | |||
| 5. Submission of Contributions. Unless You explicitly state otherwise, | |||
| any Contribution intentionally submitted for inclusion in the Work | |||
| by You to the Licensor shall be under the terms and conditions of | |||
| this License, without any additional terms or conditions. | |||
| Notwithstanding the above, nothing herein shall supersede or modify | |||
| the terms of any separate license agreement you may have executed | |||
| with Licensor regarding such Contributions. | |||
| 6. Trademarks. This License does not grant permission to use the trade | |||
| names, trademarks, service marks, or product names of the Licensor, | |||
| except as required for reasonable and customary use in describing the | |||
| origin of the Work and reproducing the content of the NOTICE file. | |||
| 7. Disclaimer of Warranty. Unless required by applicable law or | |||
| agreed to in writing, Licensor provides the Work (and each | |||
| Contributor provides its Contributions) on an "AS IS" BASIS, | |||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| implied, including, without limitation, any warranties or conditions | |||
| of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A | |||
| PARTICULAR PURPOSE. You are solely responsible for determining the | |||
| appropriateness of using or redistributing the Work and assume any | |||
| risks associated with Your exercise of permissions under this License. | |||
| 8. Limitation of Liability. In no event and under no legal theory, | |||
| whether in tort (including negligence), contract, or otherwise, | |||
| unless required by applicable law (such as deliberate and grossly | |||
| negligent acts) or agreed to in writing, shall any Contributor be | |||
| liable to You for damages, including any direct, indirect, special, | |||
| incidental, or consequential damages of any character arising as a | |||
| result of this License or out of the use or inability to use the | |||
| Work (including but not limited to damages for loss of goodwill, | |||
| work stoppage, computer failure or malfunction, or any and all | |||
| other commercial damages or losses), even if such Contributor | |||
| has been advised of the possibility of such damages. | |||
| 9. Accepting Warranty or Additional Liability. While redistributing | |||
| the Work or Derivative Works thereof, You may choose to offer, | |||
| and charge a fee for, acceptance of support, warranty, indemnity, | |||
| or other liability obligations and/or rights consistent with this | |||
| License. However, in accepting such obligations, You may act only | |||
| on Your own behalf and on Your sole responsibility, not on behalf | |||
| of any other Contributor, and only if You agree to indemnify, | |||
| defend, and hold each Contributor harmless for any liability | |||
| incurred by, or claims asserted against, such Contributor by reason | |||
| of your accepting any such warranty or additional liability. | |||
| END OF TERMS AND CONDITIONS | |||
| APPENDIX: How to apply the Apache License to your work. | |||
| To apply the Apache License to your work, attach the following | |||
| boilerplate notice, with the fields enclosed by brackets "[]" | |||
| replaced with your own identifying information. (Don't include | |||
| the brackets!) The text should be enclosed in the appropriate | |||
| comment syntax for the file format. We also recommend that a | |||
| file or class name and description of purpose be included on the | |||
| same "printed page" as the copyright notice for easier | |||
| identification within third-party archives. | |||
| Copyright [yyyy] [name of copyright owner] | |||
| Licensed under the Apache License, Version 2.0 (the "License"); | |||
| you may not use this file except in compliance with the License. | |||
| You may obtain a copy of the License at | |||
| http://www.apache.org/licenses/LICENSE-2.0 | |||
| Unless required by applicable law or agreed to in writing, software | |||
| distributed under the License is distributed on an "AS IS" BASIS, | |||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| See the License for the specific language governing permissions and | |||
| limitations under the License. | |||
| @@ -0,0 +1,7 @@ | |||
| recursive-include mindinsight * | |||
| recursive-exclude * .git | |||
| recursive-exclude * .gitignore | |||
| recursive-exclude * __pycache__ | |||
| recursive-exclude * *.py[co] *.swp | |||
| recursive-exclude mindinsight/ui * | |||
| recursive-include mindinsight/ui/dist * | |||
| @@ -0,0 +1,2 @@ | |||
| MindSpore MindInsight | |||
| Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| @@ -0,0 +1,115 @@ | |||
| MindInsight provides MindSpore with easy-to-use debugging and tuning capabilities. It | |||
| enables users to visualize the experiments. The features of MindInsight are as follows. | |||
| - Visualization of training process: | |||
| Provide visualization of training process information, | |||
| such as computation graph, training process metrics, etc. | |||
| - Traceability of training result: | |||
| Provide visualization of model parameters information, | |||
| such as training data, model accuracy, etc. | |||
| # Index | |||
| - [More about MindInsight](#more-about-mindinsight) | |||
| - [Installation](#installation) | |||
| - [QuickStart](#quickstart) | |||
| - [Docs](#docs) | |||
| - [Community](#community) | |||
| - [Contributing](#contributing) | |||
| - [Release Notes](#release-notes) | |||
| - [License](#license) | |||
| # More about MindInsight | |||
| The architecture diagram of MindInsight is illustrated as follows: | |||
|  | |||
| ## Summary log file | |||
| The summary log file consists of a series of operation events. Each event contains | |||
| the necessary data for visualization. | |||
| MindSpore uses the Callback mechanism to record graph, scalar, image and model | |||
| information into summary log file. | |||
| - The scalar and image is recorded by Summary operator. | |||
| - The computation graph is recorded by SummaryRecord after it was compiled. | |||
| - The model parameters is recorded by TrainLineage or EvalLineage. | |||
| MindInsight provides the capability to analyze summary log files and visualize | |||
| relative information. | |||
| ## Visualization | |||
| MindInsight provides users with a full-process visualized GUI during | |||
| AI development, in order to help model developers to improve the model | |||
| precision efficiently. | |||
| MindInsight has the following visualization capabilities: | |||
| ### Graph visualization | |||
| The GUI of MindInsight displays the structure of neural network, the data flow and control | |||
| flow of each operator during the entire training process. | |||
| ### Scalar visualization | |||
| The GUI of MindInsight displays the change tendency of a specific scalar during the entire | |||
| training process, such as loss value and accuracy rate of each iteration. | |||
| Two scalar curves can be combined and displayed in one chart. | |||
| ### Image visualization | |||
| The GUI of MindInsight displays both original images and enhanced images during the entire | |||
| training process. | |||
| ### Model lineage visualization | |||
| The GUI of MindInsight displays the parameters and metrics of all models, such as the | |||
| learning rate, the number of samples and the loss function of each model. | |||
| ### Dataset Graph visualization | |||
| The GUI of MindInsight displays the pipeline of dataset processing and augmentation. | |||
| ### Dataset Lineage visualization | |||
| The GUI of MindInsight displays the parameters and operations of the dataset processing and augmentation. | |||
| # Installation | |||
| See [Install MindInsight](https://www.mindspore.cn/install/en). | |||
| # QuickStart | |||
| See [guidance](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/advanced_use/visualization_tutorials.html) | |||
| # Docs | |||
| See [API Reference](https://www.mindspore.cn/api/en/master/index.html) | |||
| # Community | |||
| - [MindSpore Slack](https://join.slack.com/t/mindspore/shared_invite/enQtOTcwMTIxMDI3NjM0LTNkMWM2MzI5NjIyZWU5ZWQ5M2EwMTQ5MWNiYzMxOGM4OWFhZjI4M2E5OGI2YTg3ODU1ODE2Njg1MThiNWI3YmQ) - Communication platform for developers. | |||
| # Contributing | |||
| Welcome contributions. See our [Contributor Wiki](https://gitee.com/mindspore/mindspore/blob/master/CONTRIBUTING.md) for more details. | |||
| # Release Notes | |||
| The release notes, see our [RELEASE](RELEASE.md). | |||
| # License | |||
| [Apache License 2.0](LICENSE) | |||
| @@ -0,0 +1,9 @@ | |||
| ## MindInsight | |||
| # Release 0.1.0-alpha | |||
| * Training process observation | |||
| * Provides and displays training process information, including computational graphs and training process indicators. | |||
| * Training result tracing | |||
| * Provides functions of tracing and visualizing model training parameter information, including filtering and sorting of training data, model accuracy and training hyperparameters. | |||
| @@ -0,0 +1,6 @@ | |||
| # MindInsight Application Scenarios and Security Risks | |||
| 1. MindInsight is a local tool developed using the HTTP protocol, which is insecure. You are not advised to use it in cloud services or scenarios with security requirements. Otherwise, data may be stolen. | |||
| 2. The MindInsight source code restricts access from a localhost. If you modify the source code to cancel the localhost binding restriction, data leakage may occur. | |||
| # MindInsight Security Usage Suggestions | |||
| - You are advised to create an independent OS user to install and run the MindInsight service. Permissions among OS users are isolated to prevent data theft. In addition, you are advised to set a proper log directory size to prevent log recording exceptions due to insufficient disk space. | |||
| @@ -0,0 +1,121 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd. | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| SCRIPT_BASEDIR=$( | |||
| cd "$(dirname "$0")" || exit | |||
| pwd | |||
| ) | |||
| rename_wheel() { | |||
| VERSION="$1" | |||
| PACKAGE_LIST=$(ls mindinsight-*-any.whl) || exit | |||
| for PACKAGE_ORIG in ${PACKAGE_LIST}; do | |||
| MINDINSIGHT_VERSION=$(echo "${PACKAGE_ORIG}" | awk -F"-" '{print $2}') | |||
| PYTHON_VERSION_NUM=$(echo "${VERSION}" | awk -F"." '{print $1$2}') | |||
| PYTHON_VERSION_TAG="cp${PYTHON_VERSION_NUM}" | |||
| PYTHON_ABI_TAG="cp${PYTHON_VERSION_NUM}m" | |||
| OS_NAME=$(uname | tr '[:upper:]' '[:lower:]') | |||
| MACHINE_TAG="${OS_NAME}_$(uname -i)" | |||
| PACKAGE_NEW="mindinsight-${MINDINSIGHT_VERSION}-${PYTHON_VERSION_TAG}-${PYTHON_ABI_TAG}-${MACHINE_TAG}.whl" | |||
| mv "${PACKAGE_ORIG}" "${PACKAGE_NEW}" | |||
| done | |||
| } | |||
| build_wheel() { | |||
| PROJECT_BASEDIR=$(cd "$(dirname "$SCRIPT_BASEDIR")" || exit; pwd) | |||
| cd "${PROJECT_BASEDIR}" || exit | |||
| if [ $# -gt 0 ]; then | |||
| if [ "$1" = "clean" ]; then | |||
| echo "start cleaning mindinsight" | |||
| clean_files | |||
| echo "clean mindinsight done" | |||
| else | |||
| echo "unknown command: $1" | |||
| fi | |||
| exit | |||
| fi | |||
| echo "start building mindinsight" | |||
| clean_files | |||
| PYTHON=$(command -v python3 || command -v python) | |||
| if [ -z "${PYTHON}" ]; then | |||
| echo "Could not find python3 or python command" | |||
| exit 1 | |||
| fi | |||
| PYTHON_VERSION=$(${PYTHON} -c "import platform; print(platform.python_version())" | grep '^3.*') | |||
| if [ -z "${PYTHON_VERSION}" ]; then | |||
| echo "Could not find Python 3" | |||
| exit 1 | |||
| fi | |||
| rm -f output | |||
| mkdir output | |||
| ${PYTHON} setup.py bdist_wheel | |||
| if [ ! -x "dist" ]; then | |||
| echo "Build failed" | |||
| exit 1 | |||
| fi | |||
| mv dist/mindinsight-*-any.whl output/ | |||
| cd output || exit | |||
| rename_wheel "${PYTHON_VERSION}" | |||
| cd - >/dev/null 2>&1 || exit | |||
| clean_files | |||
| echo "Build success, output directory is: ${PROJECT_BASEDIR}/output" | |||
| } | |||
| clean_files() { | |||
| rm -rf third_party/build | |||
| rm -rf build/lib | |||
| rm -rf build/bdist.* | |||
| rm -rf mindinsight.egg-info | |||
| rm -rf dist | |||
| } | |||
| show_usage() { | |||
| echo "Build mindinsight" | |||
| echo "" | |||
| echo "usage: build.sh [-h] [clean]" | |||
| echo "" | |||
| echo "options:" | |||
| echo " -h show usage info" | |||
| echo " clean clean build files" | |||
| } | |||
| check_opts() { | |||
| while getopts ':h' OPT; do | |||
| case "$OPT" in | |||
| h) | |||
| show_usage | |||
| exit 0 | |||
| ;; | |||
| \?) | |||
| show_usage | |||
| exit 1 | |||
| ;; | |||
| esac | |||
| done | |||
| } | |||
| check_opts "$@" | |||
| cd "${SCRIPT_BASEDIR}" || exit | |||
| build_wheel "$@" | |||
| @@ -0,0 +1,90 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd. | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| SCRIPT_BASEDIR=$( | |||
| cd "$(dirname "$0")" || exit | |||
| pwd | |||
| ) | |||
| THIRD_PARTY_DIR=$(realpath "${SCRIPT_BASEDIR}/../../third_party") | |||
| SECUREC_SOURCE_DIR="${THIRD_PARTY_DIR}/securec" | |||
| build_securec() { | |||
| CMAKE=$(command -v cmake) | |||
| if [ -z "${CMAKE}" ]; then | |||
| echo "Could not find cmake command" | |||
| exit 1 | |||
| fi | |||
| cd "${SECUREC_SOURCE_DIR}" || exit | |||
| rm -rf build | |||
| mkdir build | |||
| cd build || exit | |||
| ${CMAKE} .. | |||
| make | |||
| cd - >/dev/null 2>&1 || exit | |||
| } | |||
| build_crc32() { | |||
| CPP=$(command -v c++) | |||
| if [ -z "${CPP}" ]; then | |||
| echo "Could not find c++ command" | |||
| exit 1 | |||
| fi | |||
| PYTHON=$(command -v python3 || command -v python) | |||
| if [ -z "${PYTHON}" ]; then | |||
| echo "Could not find python3 or python command" | |||
| exit 1 | |||
| fi | |||
| PYTHON_VERSION=$(${PYTHON} -c "import platform; print(platform.python_version())" | grep '^3.*') | |||
| if [ -z "${PYTHON_VERSION}" ]; then | |||
| echo "Could not find Python 3" | |||
| exit 1 | |||
| fi | |||
| DATAVISUAL_DIR=$(realpath "${SCRIPT_BASEDIR}/../../mindinsight/datavisual") | |||
| CRC32_SOURCE_DIR="${DATAVISUAL_DIR}/utils/crc32" | |||
| CRC32_OUTPUT_DIR="${DATAVISUAL_DIR}/utils" | |||
| CRC32_SO_FILE="crc32$(python3-config --extension-suffix)" | |||
| rm -f "${CRC32_SOURCE_DIR}/${CRC32_SO_FILE}" | |||
| rm -f "${CRC32_OUTPUT_DIR}/${CRC32_SO_FILE}" | |||
| cd "${CRC32_SOURCE_DIR}" || exit | |||
| PYBIND11_INCLUDES=$(${PYTHON} -m pybind11 --includes) | |||
| if [ -z "${PYBIND11_INCLUDES}" ]; then | |||
| echo "Could not find pybind11 module" | |||
| exit 1 | |||
| fi | |||
| PYTHON_INCLUDE=$(echo "${PYBIND11_INCLUDES}" | awk '{print $1}' | sed "s/^-I//g") | |||
| PYTHON_HEADERS=$(echo "${PYBIND11_INCLUDES}" | awk '{print $2}' | sed "s/^-I//g") | |||
| ${CPP} -O2 -O3 -shared -std=c++11 -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 \ | |||
| -Wno-maybe-uninitialized -Wno-unused-parameter -Wall -Wl,-z,relro,-z,now,-z,noexecstack \ | |||
| -I"${THIRD_PARTY_DIR}" -I"${DATAVISUAL_DIR}/utils" -I"${PYTHON_INCLUDE}" -I"${PYTHON_HEADERS}" \ | |||
| -o "${CRC32_SO_FILE}" crc32.cc "${SECUREC_SOURCE_DIR}/build/src/libsecurec.a" | |||
| if [ ! -f "${CRC32_SO_FILE}" ]; then | |||
| echo "crc so file does not exist, build failed" | |||
| exit 1 | |||
| fi | |||
| mv "${CRC32_SO_FILE}" "${CRC32_OUTPUT_DIR}" | |||
| } | |||
| cd "${SCRIPT_BASEDIR}" || exit | |||
| build_securec | |||
| cd "${SCRIPT_BASEDIR}" || exit | |||
| build_crc32 | |||
| @@ -0,0 +1,49 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd. | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| SCRIPT_BASEDIR=$( | |||
| cd "$(dirname "$0")" || exit | |||
| pwd | |||
| ) | |||
| build_ui() { | |||
| NPM=$(command -v npm) | |||
| if [ -z "${NPM}" ]; then | |||
| echo "Could not find npm command" | |||
| exit 1 | |||
| fi | |||
| UI_SOURCE_DIR=$(realpath "${SCRIPT_BASEDIR}/../../mindinsight/ui") | |||
| cd "${UI_SOURCE_DIR}" || exit | |||
| rm -rf dist | |||
| ${NPM} config set strict-ssl false | |||
| ${NPM} config set unsafe-perm true | |||
| ${NPM} config set user 0 | |||
| ${NPM} install | |||
| ${NPM} run build | |||
| if [ ! -f "dist/index.html" ]; then | |||
| echo "dist does not have file index.html, build failed" | |||
| exit 1 | |||
| fi | |||
| rm -rf node_modules | |||
| } | |||
| cd "${SCRIPT_BASEDIR}" || exit | |||
| build_ui | |||
| @@ -0,0 +1,3 @@ | |||
| # MindInsight Documentation | |||
| The MindInsight documentation is in the [MindSpore Docs](https://gitee.com/mindspore/docs) repository. | |||
| @@ -0,0 +1,26 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Mindinsight init module.""" | |||
| from mindinsight._version import VERSION | |||
| __version__ = VERSION | |||
| __version_info__ = tuple(VERSION.split('.')) | |||
| __all__ = [ | |||
| '__version__', | |||
| '__version_info__' | |||
| ] | |||
| @@ -0,0 +1,19 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Mindinsight main module.""" | |||
| from mindinsight.utils.command import main | |||
| main() | |||
| @@ -0,0 +1,17 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Mindinsight version module.""" | |||
| VERSION = '0.1.0' | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,130 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Web application module.""" | |||
| import os | |||
| from importlib import import_module | |||
| from werkzeug.datastructures import Headers | |||
| from werkzeug.exceptions import HTTPException | |||
| from flask import Flask | |||
| from flask import request | |||
| from flask import Response | |||
| from flask_cors import CORS | |||
| from mindinsight.conf import settings | |||
| from mindinsight.utils.hook import HookUtils | |||
| from mindinsight.datavisual.common.log import logger | |||
| from mindinsight.datavisual.common.exceptions import RequestMethodNotAllowed | |||
| from mindinsight.datavisual.common import error_handler | |||
| from mindinsight.datavisual.utils.tools import find_app_package | |||
| from mindinsight.datavisual.utils.tools import get_img_mimetype | |||
| from mindinsight.utils.exceptions import MindInsightException | |||
| def get_security_headers(): | |||
| """Get security headers.""" | |||
| domain_white_list = [] | |||
| for hook in HookUtils.instance().hooks(): | |||
| domain_white_list += hook.register_secure_domains() | |||
| content_security_policy = { | |||
| 'img-src': ["'self'", 'data:'], | |||
| 'style-src': ["'self'", "'unsafe-inline'"], | |||
| 'frame-src': ["'self'"] + domain_white_list, | |||
| 'frame-ancestors': ["'self'"] + domain_white_list, | |||
| 'default-src': ["'self'"], | |||
| } | |||
| headers = { | |||
| 'X-Frame-Options': 'SAMEORIGIN', | |||
| 'X-XSS-Protection': '1; mode=block', | |||
| 'X-Content-Type-Options': 'nosniff', | |||
| 'Access-Control-Allow-Methods': ', '.join(settings.SUPPORT_REQUEST_METHODS), | |||
| 'Content-Security-Policy': '; '.join([ | |||
| f"{k} {' '.join(v)}" for k, v in content_security_policy.items() | |||
| ]), | |||
| 'X-Download-Options': 'noopen', | |||
| 'Cache-Control': 'no-store', | |||
| 'Pragma': 'no-cache' | |||
| } | |||
| return list(headers.items()) | |||
| SECURITY_HEADERS = get_security_headers() | |||
| class CustomResponse(Response): | |||
| """Define custom response.""" | |||
| def __init__(self, response=None, **kwargs): | |||
| headers = kwargs.get("headers") | |||
| if isinstance(response, bytes): | |||
| mimetype = get_img_mimetype(response) | |||
| SECURITY_HEADERS.append(('Content-Type', mimetype)) | |||
| if headers is None: | |||
| headers = Headers(SECURITY_HEADERS) | |||
| else: | |||
| for header in SECURITY_HEADERS: | |||
| headers.add(*header) | |||
| kwargs['headers'] = headers | |||
| super(CustomResponse, self).__init__(response, **kwargs) | |||
| def _init_app_module(app): | |||
| """ | |||
| Init app module. | |||
| Args: | |||
| app (Flask): An instance of Flask. | |||
| """ | |||
| packages = find_app_package() | |||
| for package in packages: | |||
| try: | |||
| app_module = import_module(package) | |||
| app_module.init_module(app) | |||
| except AttributeError: | |||
| logger.debug('[%s].init_module not exists.', package) | |||
| def before_request(): | |||
| """A function to run before each request.""" | |||
| if request.method not in settings.SUPPORT_REQUEST_METHODS: | |||
| raise RequestMethodNotAllowed() | |||
| def create_app(): | |||
| """Set flask APP config, and start the data manager.""" | |||
| static_url_path = "/static" | |||
| static_folder_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.pardir, 'ui', 'dist', 'static')) | |||
| app = Flask(__name__, static_url_path=static_url_path, static_folder=static_folder_path) | |||
| if settings.ENABLE_CORS: | |||
| CORS(app, supports_credentials=True) | |||
| app.before_request(before_request) | |||
| app.register_error_handler(HTTPException, error_handler.handle_http_exception_error) | |||
| app.register_error_handler(MindInsightException, error_handler.handle_mindinsight_error) | |||
| app.register_error_handler(Exception, error_handler.handle_unknown_error) | |||
| app.response_class = CustomResponse | |||
| _init_app_module(app) | |||
| return app | |||
| APP = create_app() | |||
| @@ -0,0 +1,18 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Config.""" | |||
| import os | |||
| WEB_CONFIG_DIR = os.path.dirname(__file__) | |||
| @@ -0,0 +1,45 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Config file for gunicorn.""" | |||
| import os | |||
| import threading | |||
| from importlib import import_module | |||
| import gunicorn | |||
| gunicorn.SERVER_SOFTWARE = 'unknown' | |||
| worker_class = 'sync' | |||
| workers = 1 | |||
| threads = min(30, os.cpu_count() * 2 + 1) | |||
| worker_connections = 1000 | |||
| timeout = 30 | |||
| graceful_timeout = 30 | |||
| daemon = True | |||
| captureoutput = True | |||
| # write gunicorn default log to stream, and using mindinsight logger write gunicorn log to file. | |||
| accesslog = '-' | |||
| def on_starting(server): | |||
| """Hook function on starting gunicorn process.""" | |||
| hook_module = import_module('mindinsight.utils.hook') | |||
| for hook in hook_module.HookUtils.instance().hooks(): | |||
| threading.Thread(target=hook.on_startup, args=(server.log,)).start() | |||
| @@ -0,0 +1,38 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Datavisual.""" | |||
| from mindinsight.backend.datavisual.static_resource_api import init_module as static_init_module | |||
| from mindinsight.backend.datavisual.task_manager_api import init_module as task_init_module | |||
| from mindinsight.backend.datavisual.train_visual_api import init_module as train_init_module | |||
| from mindinsight.conf import settings | |||
| from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER | |||
| def init_module(app): | |||
| """ | |||
| Interface to init module. | |||
| Args: | |||
| app (Flask): An instance of Flask. | |||
| """ | |||
| static_init_module(app) | |||
| task_init_module(app) | |||
| train_init_module(app) | |||
| DATA_MANAGER.start_load_data(reload_interval=int(settings.RELOAD_INTERVAL), | |||
| max_threads_count=int(settings.MAX_THREADS_COUNT)) | |||
| @@ -0,0 +1,46 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Static resource api.""" | |||
| import os | |||
| import sys | |||
| from flask import current_app | |||
| from flask import send_from_directory | |||
| from flask import Blueprint | |||
| APP_PATH = os.path.realpath(os.path.dirname(sys.argv[0])) | |||
| BLUEPRINT = Blueprint("static_resource", __name__) | |||
| @BLUEPRINT.route("/", methods=["GET"]) | |||
| def index(): | |||
| """Interface to return static index.html.""" | |||
| return send_from_directory(get_index_resource_dir(), "index.html") | |||
| def get_index_resource_dir(): | |||
| """Interface to return index.html resource directory.""" | |||
| return os.path.realpath(os.path.join(APP_PATH, current_app.static_folder, os.pardir)) | |||
| def init_module(app): | |||
| """ | |||
| Init module entry. | |||
| Args: | |||
| app: the application obj. | |||
| """ | |||
| app.register_blueprint(BLUEPRINT) | |||
| @@ -0,0 +1,94 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Task manager api. | |||
| This module provides the interfaces to task manage functions. | |||
| """ | |||
| import os | |||
| from flask import Blueprint | |||
| from flask import request | |||
| from flask import jsonify | |||
| from mindinsight.conf import settings | |||
| from mindinsight.datavisual.utils.tools import str_to_bool | |||
| from mindinsight.datavisual.utils.tools import get_train_id | |||
| from mindinsight.datavisual.processors.train_task_manager import TrainTaskManager | |||
| from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher | |||
| from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER | |||
| BLUEPRINT = Blueprint("task_manager", __name__, url_prefix=settings.URL_PREFIX) | |||
| @BLUEPRINT.route("/datavisual/single-job", methods=["GET"]) | |||
| def query_single_train_task(): | |||
| """Query single train task""" | |||
| plugin_name = request.args.get('plugin_name') | |||
| train_id = get_train_id(request) | |||
| processor = TrainTaskManager(DATA_MANAGER) | |||
| tasks = processor.get_single_train_task(train_id=train_id, plugin_name=plugin_name) | |||
| return jsonify(tasks) | |||
| @BLUEPRINT.route("/datavisual/plugins", methods=["GET"]) | |||
| def query_plugins(): | |||
| """Query plugins.""" | |||
| train_id = get_train_id(request) | |||
| manual_update = request.args.get('manual_update', default='false') | |||
| manual_update = str_to_bool(manual_update, "manual_update") | |||
| processor = TrainTaskManager(DATA_MANAGER) | |||
| plugins = processor.get_plugins(train_id, manual_update) | |||
| return jsonify(plugins) | |||
| @BLUEPRINT.route("/datavisual/train-jobs", methods=["GET"]) | |||
| def query_train_jobs(): | |||
| """Query train jobs.""" | |||
| offset = request.args.get("offset", default=0) | |||
| limit = request.args.get("limit", default=10) | |||
| summary_watcher = SummaryWatcher() | |||
| total, directories = summary_watcher.list_summary_directories_by_pagination( | |||
| settings.SUMMARY_BASE_DIR, offset, limit) | |||
| train_jobs = [{ | |||
| 'train_id': directory['relative_path'], | |||
| 'relative_path': directory['relative_path'], | |||
| 'create_time': directory['create_time'].strftime('%Y-%m-%d %H:%M:%S'), | |||
| 'update_time': directory['update_time'].strftime('%Y-%m-%d %H:%M:%S'), | |||
| } for directory in directories] | |||
| return jsonify({ | |||
| 'name': os.path.basename(os.path.realpath(settings.SUMMARY_BASE_DIR)), | |||
| 'total': total, | |||
| 'train_jobs': train_jobs, | |||
| }) | |||
| def init_module(app): | |||
| """ | |||
| Init module entry. | |||
| Args: | |||
| app: the application obj. | |||
| """ | |||
| app.register_blueprint(BLUEPRINT) | |||
| @@ -0,0 +1,156 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Backend interface module. | |||
| This module provides the interfaces to train processors functions. | |||
| """ | |||
| from flask import Blueprint | |||
| from flask import request | |||
| from flask import jsonify | |||
| from mindinsight.conf import settings | |||
| from mindinsight.datavisual.utils.tools import get_train_id | |||
| from mindinsight.datavisual.utils.tools import if_nan_inf_to_none | |||
| from mindinsight.datavisual.processors.images_processor import ImageProcessor | |||
| from mindinsight.datavisual.processors.scalars_processor import ScalarsProcessor | |||
| from mindinsight.datavisual.processors.graph_processor import GraphProcessor | |||
| from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER | |||
| BLUEPRINT = Blueprint("train_visual", __name__, url_prefix=settings.URL_PREFIX) | |||
| @BLUEPRINT.route("/datavisual/image/metadata", methods=["GET"]) | |||
| def image_metadata(): | |||
| """ | |||
| Interface to fetch metadata about the images for the particular run,tag, and zero-indexed sample. | |||
| Returns: | |||
| Response, which contains a list in JSON containing image events, each | |||
| one of which is an object containing items wall_time, step, width, | |||
| height, and query. | |||
| """ | |||
| tag = request.args.get("tag") | |||
| train_id = get_train_id(request) | |||
| processor = ImageProcessor(DATA_MANAGER) | |||
| response = processor.get_metadata_list(train_id, tag) | |||
| return jsonify(response) | |||
| @BLUEPRINT.route("/datavisual/image/single-image", methods=["GET"]) | |||
| def single_image(): | |||
| """ | |||
| Interface to fetch raw image data for a particular image. | |||
| Returns: | |||
| Response, which contains a byte string of image. | |||
| """ | |||
| tag = request.args.get("tag") | |||
| step = request.args.get("step") | |||
| train_id = get_train_id(request) | |||
| processor = ImageProcessor(DATA_MANAGER) | |||
| img_data = processor.get_single_image(train_id, tag, step) | |||
| return img_data | |||
| @BLUEPRINT.route("/datavisual/scalar/metadata", methods=["GET"]) | |||
| def scalar_metadata(): | |||
| """ | |||
| Interface to fetch metadata about the scalars for the particular run and tag. | |||
| Returns: | |||
| Response, which contains a list in JSON containing scalar events, each | |||
| one of which is an object containing items' wall_time, step and value. | |||
| """ | |||
| tag = request.args.get("tag") | |||
| train_id = request.args.get("train_id") | |||
| processor = ScalarsProcessor(DATA_MANAGER) | |||
| response = processor.get_metadata_list(train_id, tag) | |||
| metadatas = response['metadatas'] | |||
| for metadata in metadatas: | |||
| value = metadata.get("value") | |||
| metadata["value"] = if_nan_inf_to_none('scalar_value', value) | |||
| return jsonify(response) | |||
| @BLUEPRINT.route("/datavisual/graphs/nodes", methods=["GET"]) | |||
| def graph_nodes(): | |||
| """ | |||
| Interface to get graph nodes. | |||
| Returns: | |||
| Response, which contains a JSON object. | |||
| """ | |||
| name = request.args.get('name', default=None) | |||
| node_type = request.args.get('type', default='name_scope') | |||
| tag = request.args.get("tag", default=None) | |||
| train_id = get_train_id(request) | |||
| graph_process = GraphProcessor(train_id, DATA_MANAGER, tag) | |||
| response = graph_process.get_nodes(name=name, node_type=node_type) | |||
| return jsonify(response) | |||
| @BLUEPRINT.route("/datavisual/graphs/nodes/names", methods=["GET"]) | |||
| def graph_node_names(): | |||
| """ | |||
| Interface to query node names. | |||
| Returns: | |||
| Response, which contains a JSON object. | |||
| """ | |||
| search_content = request.args.get("search") | |||
| offset = request.args.get("offset", default=0) | |||
| limit = request.args.get("limit", default=100) | |||
| tag = request.args.get("tag", default=None) | |||
| train_id = get_train_id(request) | |||
| graph_process = GraphProcessor(train_id, DATA_MANAGER, tag) | |||
| resp = graph_process.search_node_names(search_content, offset, limit) | |||
| return jsonify(resp) | |||
| @BLUEPRINT.route("/datavisual/graphs/single-node", methods=["GET"]) | |||
| def graph_search_single_node(): | |||
| """ | |||
| Interface to search single node. | |||
| Returns: | |||
| Response, which contains a JSON object. | |||
| """ | |||
| name = request.args.get("name") | |||
| tag = request.args.get("tag", default=None) | |||
| train_id = get_train_id(request) | |||
| graph_process = GraphProcessor(train_id, DATA_MANAGER, tag) | |||
| resp = graph_process.search_single_node(name) | |||
| return jsonify(resp) | |||
| def init_module(app): | |||
| """ | |||
| Init module entry. | |||
| Args: | |||
| app (Flask): The application obj. | |||
| """ | |||
| app.register_blueprint(BLUEPRINT) | |||
| @@ -0,0 +1,31 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| module init file. | |||
| """ | |||
| from mindinsight.backend.lineagemgr.lineage_api import init_module as init_query_module | |||
| def init_module(app): | |||
| """ | |||
| Init module entry. | |||
| Args: | |||
| app: Flask. A Flask instance. | |||
| Returns: | |||
| """ | |||
| init_query_module(app) | |||
| @@ -0,0 +1,191 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Lineage restful api.""" | |||
| import json | |||
| import os | |||
| from flask import Blueprint, jsonify, request | |||
| from mindinsight.conf import settings | |||
| from mindinsight.datavisual.utils.tools import get_train_id | |||
| from mindinsight.lineagemgr import filter_summary_lineage, get_summary_lineage | |||
| from mindinsight.lineagemgr.common.validator.validate import validate_path | |||
| from mindinsight.utils.exceptions import MindInsightException, ParamValueError | |||
| BLUEPRINT = Blueprint("lineage", __name__, url_prefix=settings.URL_PREFIX.rstrip("/")) | |||
| @BLUEPRINT.route("/models/model_lineage", methods=["POST"]) | |||
| def search_model(): | |||
| """ | |||
| Get model lineage info. | |||
| Get model info by summary base dir return a model lineage information list of dict | |||
| contains model's all kinds of param and count of summary log. | |||
| Returns: | |||
| str, the model lineage information. | |||
| Raises: | |||
| MindInsightException: If method fails to be called. | |||
| ParamValueError: If parsing json data search_condition fails. | |||
| Examples: | |||
| >>> POST http://xxxx/v1/mindinsight/models/model_lineage | |||
| """ | |||
| search_condition = request.stream.read() | |||
| try: | |||
| search_condition = json.loads(search_condition if search_condition else "{}") | |||
| except Exception: | |||
| raise ParamValueError("Json data parse failed.") | |||
| model_lineage_info = _get_lineage_info( | |||
| lineage_type="model", | |||
| search_condition=search_condition | |||
| ) | |||
| return jsonify(model_lineage_info) | |||
| @BLUEPRINT.route("/datasets/dataset_lineage", methods=["POST"]) | |||
| def get_datasets_lineage(): | |||
| """ | |||
| Get dataset lineage. | |||
| Returns: | |||
| str, the dataset lineage information. | |||
| Raises: | |||
| MindInsightException: If method fails to be called. | |||
| ParamValueError: If parsing json data search_condition fails. | |||
| Examples: | |||
| >>> POST http://xxxx/v1/minddata/datasets/dataset_lineage | |||
| """ | |||
| search_condition = request.stream.read() | |||
| try: | |||
| search_condition = json.loads(search_condition if search_condition else "{}") | |||
| except Exception: | |||
| raise ParamValueError("Json data parse failed.") | |||
| dataset_lineage_info = _get_lineage_info( | |||
| lineage_type="dataset", | |||
| search_condition=search_condition | |||
| ) | |||
| return jsonify(dataset_lineage_info) | |||
| def _get_lineage_info(lineage_type, search_condition): | |||
| """ | |||
| Get lineage info for dataset or model. | |||
| Args: | |||
| lineage_type (str): Lineage type, 'dataset' or 'model'. | |||
| search_condition (dict): Search condition. | |||
| Returns: | |||
| dict, lineage info. | |||
| Raises: | |||
| MindInsightException: If method fails to be called. | |||
| """ | |||
| if 'lineage_type' in search_condition: | |||
| raise ParamValueError("Lineage type does not need to be assigned in a specific interface.") | |||
| if lineage_type == 'dataset': | |||
| search_condition.update({'lineage_type': 'dataset'}) | |||
| summary_base_dir = str(settings.SUMMARY_BASE_DIR) | |||
| try: | |||
| lineage_info = filter_summary_lineage( | |||
| summary_base_dir, search_condition) | |||
| lineages = lineage_info['object'] | |||
| summary_base_dir = os.path.realpath(summary_base_dir) | |||
| length = len(summary_base_dir) | |||
| for lineage in lineages: | |||
| summary_dir = lineage['summary_dir'] | |||
| summary_dir = os.path.realpath(summary_dir) | |||
| if summary_base_dir == summary_dir: | |||
| relative_dir = './' | |||
| else: | |||
| relative_dir = os.path.join(os.curdir, summary_dir[length+1:]) | |||
| lineage['summary_dir'] = relative_dir | |||
| except MindInsightException as exception: | |||
| raise MindInsightException(exception.error, exception.message, http_code=400) | |||
| return lineage_info | |||
| @BLUEPRINT.route("/datasets/dataset_graph", methods=["GET"]) | |||
| def get_dataset_graph(): | |||
| """ | |||
| Get dataset graph. | |||
| Returns: | |||
| str, the dataset graph information. | |||
| Raises: | |||
| MindInsightException: If method fails to be called. | |||
| ParamValueError: If summary_dir is invalid. | |||
| Examples: | |||
| >>> GET http://xxxx/v1/mindinsight/datasets/dataset_graph?train_id=xxx | |||
| """ | |||
| summary_base_dir = str(settings.SUMMARY_BASE_DIR) | |||
| summary_dir = get_train_id(request) | |||
| if summary_dir.startswith('/'): | |||
| validate_path(summary_dir) | |||
| elif summary_dir.startswith('./'): | |||
| summary_dir = os.path.join(summary_base_dir, summary_dir[2:]) | |||
| summary_dir = validate_path(summary_dir) | |||
| else: | |||
| raise ParamValueError( | |||
| "Summary dir should be absolute path or " | |||
| "relative path that relate to summary base dir." | |||
| ) | |||
| try: | |||
| dataset_graph = get_summary_lineage( | |||
| summary_dir=summary_dir, | |||
| keys=['dataset_graph'] | |||
| ) | |||
| except MindInsightException as exception: | |||
| raise MindInsightException(exception.error, exception.message, http_code=400) | |||
| if dataset_graph: | |||
| summary_dir_result = dataset_graph.get('summary_dir') | |||
| base_dir_len = len(summary_base_dir) | |||
| if summary_base_dir == summary_dir_result: | |||
| relative_dir = './' | |||
| else: | |||
| relative_dir = os.path.join( | |||
| os.curdir, summary_dir[base_dir_len + 1:] | |||
| ) | |||
| dataset_graph['summary_dir'] = relative_dir | |||
| return jsonify(dataset_graph) | |||
| def init_module(app): | |||
| """ | |||
| Init module entry. | |||
| Args: | |||
| app (Flask): The application obj. | |||
| """ | |||
| app.register_blueprint(BLUEPRINT) | |||
| @@ -0,0 +1,256 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Web service entrance.""" | |||
| import os | |||
| import stat | |||
| import re | |||
| import subprocess | |||
| import time | |||
| import shlex | |||
| from gunicorn.glogging import Logger | |||
| from mindinsight.backend.config import gunicorn_conf | |||
| from mindinsight.backend.config import WEB_CONFIG_DIR | |||
| from mindinsight.conf import settings | |||
| from mindinsight.utils.log import setup_logger | |||
| MINDBOARD_APP_MODULE = "mindinsight.backend.application:APP" | |||
| GUNICORN_LOGGER = "mindinsight.backend.run.GunicornLogger" | |||
| _MIN_PORT = 1 | |||
| _MAX_PORT = 65535 | |||
| def _get_file_size(file_path): | |||
| """ | |||
| Get the file size. | |||
| Args: | |||
| file_path (str): The file path. | |||
| Returns: | |||
| int, the file size. If file is not existed, then return 0. | |||
| """ | |||
| try: | |||
| file_size = os.path.getsize(file_path) | |||
| except FileNotFoundError: | |||
| file_size = 0 | |||
| return file_size | |||
| def _is_match_one(sub_string_list, src_string): | |||
| """ | |||
| Whether the sub-string in the list can match the source string. | |||
| Args: | |||
| sub_string_list (list): The sub-string list. | |||
| src_string (str): The source string. | |||
| Returns: | |||
| bool, if matched return True, else return False. | |||
| """ | |||
| for match_info in sub_string_list: | |||
| if match_info in src_string: | |||
| return True | |||
| return False | |||
| def _check_stat_from_log(log_info): | |||
| """ | |||
| Determine the service startup status based on the log information. | |||
| Args: | |||
| log_info (str): The output log of service startup. | |||
| Returns: | |||
| str, the state value that is one of the follows: "unknown", "failed" and "success". | |||
| """ | |||
| server_state = "unknown" | |||
| match_success_info = "Listening at: http://%s:%d" % \ | |||
| (settings.HOST, int(settings.PORT)) | |||
| common_failed_info_list = [ | |||
| "[ERROR] Retrying in 1 second", | |||
| "[INFO] Reason: App failed to load", | |||
| "[ERROR] Exception in worker process" | |||
| ] | |||
| re_pattern = "\\[ERROR\\].+%s.+%d" % \ | |||
| (settings.HOST, int(settings.PORT)) | |||
| # matched failed output log by fuzzy match | |||
| if re.search(re_pattern, log_info) or \ | |||
| _is_match_one(common_failed_info_list, log_info): | |||
| server_state = "failed" | |||
| if match_success_info in log_info: | |||
| server_state = "success" | |||
| return server_state | |||
| def _get_error_log_path(): | |||
| """ | |||
| Get gunicorn error log path. | |||
| Returns: | |||
| str, the path of error log. | |||
| """ | |||
| path = os.path.join(settings.WORKSPACE, 'log/gunicorn/error.log') | |||
| errorlog_abspath = os.path.realpath(path) | |||
| return errorlog_abspath | |||
| def _get_access_log_path(): | |||
| """Get gunicorn access log path.""" | |||
| access_log_path = os.path.join(settings.WORKSPACE, 'log/gunicorn/access.log') | |||
| access_log_path = os.path.realpath(access_log_path) | |||
| return access_log_path | |||
| def _check_state_from_log(log_abspath, start_pos=0): | |||
| """ | |||
| Check the service startup status based on the log file. | |||
| Args: | |||
| log_abspath (str): Absolute path of the log file. | |||
| start_pos (int): Offset position of the log file. | |||
| Returns: | |||
| dict, a dict with "state" and "prompt_message" key. | |||
| The value of the "state" key is as follows:"unknown", "failed" and "success". | |||
| The value of the "prompt_message" key is a list of prompt messages. | |||
| """ | |||
| server_is_start = False | |||
| state_result = {"state": "unknown", "prompt_message": []} | |||
| prompt_messages = [] | |||
| match_start_log = "Starting gunicorn" | |||
| with open(log_abspath) as f_log: | |||
| f_log.seek(start_pos) | |||
| for line in f_log.readlines(): | |||
| if match_start_log in line: | |||
| if server_is_start: | |||
| break | |||
| server_is_start = True | |||
| continue | |||
| if server_is_start: | |||
| log_result = _check_stat_from_log(line) | |||
| # ignore "unknown" result | |||
| if log_result != "unknown": | |||
| state_result["state"] = log_result | |||
| if log_result == "failed": | |||
| prompt_messages.append(line.strip()) | |||
| prompt_messages.append( | |||
| "more failed details in log: %s" % log_abspath) | |||
| break | |||
| state_result["prompt_message"].append( | |||
| "service start state: %s" % state_result["state"]) | |||
| for prompt_message in prompt_messages: | |||
| state_result["prompt_message"].append(prompt_message) | |||
| return state_result | |||
| def _check_server_start_stat(log_abspath, start_pos=None): | |||
| """ | |||
| Checking the Server Startup Status. | |||
| Args: | |||
| log_abspath (str): The log file path. | |||
| start_pos (int): The log file start position. | |||
| Returns: | |||
| dict, an dict object that contains the state and prompt_message fields. | |||
| The state values are as follows: "unknown", "failed" and "success". | |||
| """ | |||
| state_result = {"state": "unknown", "prompt_message": []} | |||
| # return unknown when not config gunicorn error log file | |||
| if not log_abspath: | |||
| return state_result | |||
| log_pos = _get_file_size(log_abspath) if start_pos is None else start_pos | |||
| try_cnt = 0 | |||
| try_cnt_max = 2 | |||
| while try_cnt < try_cnt_max: | |||
| try_cnt += 1 | |||
| time.sleep(1) | |||
| if _get_file_size(log_abspath) > log_pos: | |||
| state_result.update(_check_state_from_log(log_abspath, log_pos)) | |||
| break | |||
| return state_result | |||
| class GunicornLogger(Logger): | |||
| """Rewrite gunicorn default logger.""" | |||
| def __init__(self, cfg): | |||
| self.access_log = setup_logger('gunicorn', 'access') | |||
| self.error_log = setup_logger('gunicorn', 'error') | |||
| super(GunicornLogger, self).__init__(cfg) | |||
| access_log_path = _get_access_log_path() | |||
| error_log_path = _get_error_log_path() | |||
| os.chmod(access_log_path, stat.S_IREAD | stat.S_IWRITE) | |||
| os.chmod(error_log_path, stat.S_IREAD | stat.S_IWRITE) | |||
| def start(): | |||
| """Start web service.""" | |||
| errorlog_abspath = _get_error_log_path() | |||
| gunicorn_conf_file = os.path.join(WEB_CONFIG_DIR, "gunicorn_conf.py") | |||
| cmd = "gunicorn " \ | |||
| "-b {host}:{port} {app_module} " \ | |||
| "-c {conf_file} " \ | |||
| "--logger-class {logger_class} " \ | |||
| "--access-logformat {log_format}"\ | |||
| .format(host=settings.HOST, | |||
| port=settings.PORT, | |||
| conf_file=gunicorn_conf_file, | |||
| app_module=MINDBOARD_APP_MODULE, | |||
| logger_class=GUNICORN_LOGGER, | |||
| log_format=settings.GUNICORN_ACCESS_FORMAT | |||
| ) | |||
| log_size = _get_file_size(errorlog_abspath) | |||
| # start server | |||
| process = subprocess.Popen( | |||
| shlex.split(cmd), | |||
| shell=False, | |||
| stdin=subprocess.PIPE, | |||
| stdout=subprocess.PIPE, | |||
| stderr=subprocess.PIPE | |||
| ) | |||
| _, stderr = process.communicate() | |||
| if stderr: | |||
| print(stderr.decode()) | |||
| # wait command success to end when gunicorn running in daemon. | |||
| if gunicorn_conf.daemon and process.wait() == 0: | |||
| state_result = _check_server_start_stat(errorlog_abspath, log_size) | |||
| # print gunicorn start state to stdout | |||
| print('Web address: http://{}:{}'.format(settings.HOST, settings.PORT)) | |||
| for line in state_result["prompt_message"]: | |||
| print(line) | |||
| if __name__ == '__main__': | |||
| start() | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,89 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Datavisual hook.""" | |||
| import argparse | |||
| import os | |||
| from mindinsight.conf import settings | |||
| from mindinsight.utils.hook import BaseHook | |||
| class ReloadIntervalAction(argparse.Action): | |||
| """Reload interval action class definition.""" | |||
| def __call__(self, parser, namespace, values, option_string=None): | |||
| """ | |||
| Inherited __call__ method from argparse.Action. | |||
| Args: | |||
| parser (ArgumentParser): Passed-in argument parser. | |||
| namespace (Namespace): Namespace object to hold arguments. | |||
| values (object): Argument values with type depending on argument definition. | |||
| option_string (str): Option string for specific argument name. | |||
| """ | |||
| reload_interval = values | |||
| if reload_interval < 0: | |||
| parser.error(f'{option_string} should be greater than or equal to 0') | |||
| setattr(namespace, self.dest, reload_interval) | |||
| class SummaryBaseDirAction(argparse.Action): | |||
| """Summary base dir action class definition.""" | |||
| def __call__(self, parser, namespace, values, option_string=None): | |||
| """ | |||
| Inherited __call__ method from argparse.Action. | |||
| Args: | |||
| parser (ArgumentParser): Passed-in argument parser. | |||
| namespace (Namespace): Namespace object to hold arguments. | |||
| values (object): Argument values with type depending on argument definition. | |||
| option_string (str): Option string for specific argument name. | |||
| """ | |||
| summary_base_dir = os.path.realpath(values) | |||
| setattr(namespace, self.dest, summary_base_dir) | |||
| class Hook(BaseHook): | |||
| """Hook class definition.""" | |||
| def register_startup_arguments(self, parser): | |||
| """ | |||
| Hook function to register startup arguments. | |||
| Args: | |||
| parser (ArgumentParser): Specify parser to which arguments are added. | |||
| """ | |||
| parser.add_argument( | |||
| '--reload-interval', | |||
| type=int, | |||
| action=ReloadIntervalAction, | |||
| help=""" | |||
| data reload time(Seconds). It should be greater than 0 or equal to 0. | |||
| If it equals 0, load data only once. Default value is %s seconds. | |||
| """ % settings.RELOAD_INTERVAL) | |||
| parser.add_argument( | |||
| '--summary-base-dir', | |||
| type=str, | |||
| action=SummaryBaseDirAction, | |||
| help=""" | |||
| directory where MindInsight will walk through its direct subdirectories | |||
| and look for summary files naming with regex 'summary.\\d+' or '\\.pb$'. Any direct | |||
| subdirectory containing summary files will turn out to be the summary | |||
| file directory. Summary file existing in summary-base-dir indicates that | |||
| sumamry-base-dir is one of the summary file directories as well. Default | |||
| value is current directory.""") | |||
| @@ -0,0 +1,150 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Conf module.""" | |||
| import os | |||
| import json | |||
| import types | |||
| from importlib import import_module | |||
| class Settings: | |||
| """ | |||
| Definition of Settings class. | |||
| Examples: | |||
| >>> from mindinsight.conf import settings | |||
| >>> print(settings.PORT) | |||
| """ | |||
| _prefix = 'MINDINSIGHT_' | |||
| _explicit_settings = set() | |||
| _default_settings = set() | |||
| def __init__(self): | |||
| """Initialization of Settings.""" | |||
| self.load_from_defaults() | |||
| self.load_from_constants() | |||
| self.refresh() | |||
| def refresh(self): | |||
| """Refresh settings from config file and environment variables.""" | |||
| self.update_from_file() | |||
| self.update_from_env() | |||
| def load_from_defaults(self): | |||
| """Update settings from defaults module.""" | |||
| default_settings = import_module('mindinsight.conf.defaults') | |||
| for setting in dir(default_settings): | |||
| if setting.isupper(): | |||
| setattr(self, setting, getattr(default_settings, setting)) | |||
| self._default_settings.add(setting) | |||
| def load_from_constants(self): | |||
| """Update settings from constants module""" | |||
| constant_settings = import_module('mindinsight.conf.constants') | |||
| for setting in dir(constant_settings): | |||
| if setting.isupper(): | |||
| setattr(self, setting, getattr(constant_settings, setting)) | |||
| def update_from_file(self): | |||
| """Update settings from config file.""" | |||
| config_path = os.environ.get('MINDINSIGHT_CONFIG', '') | |||
| if not config_path: | |||
| return | |||
| config_module = None | |||
| # python:full.path.for.config.module | |||
| if config_path.startswith('python:'): | |||
| config_module = import_module(config_path[len('python:'):]) | |||
| # file:full/path/for/config.py | |||
| elif config_path.startswith('file:'): | |||
| config_path = config_path[len('file:'):] | |||
| module_name = '__mindinsightconfig__' | |||
| config_module = types.ModuleType(module_name) | |||
| machinery = import_module('importlib.machinery') | |||
| loader = machinery.SourceFileLoader(module_name, config_path) | |||
| loader.exec_module(config_module) | |||
| if config_module is None: | |||
| return | |||
| for setting in dir(config_module): | |||
| if setting.isupper() and setting in self._default_settings: | |||
| setting_value = getattr(config_module, setting) | |||
| setattr(self, setting, setting_value) | |||
| self._explicit_settings.add(setting) | |||
| def update_from_env(self): | |||
| """Update settings from environment variables.""" | |||
| for key, value in os.environ.items(): | |||
| if not key.startswith(self._prefix): | |||
| continue | |||
| setting = key[len(self._prefix):] | |||
| if setting not in self._default_settings: | |||
| continue | |||
| setting_value = getattr(self, setting) | |||
| if isinstance(setting_value, bool): | |||
| value = (value == 'True') | |||
| elif isinstance(setting_value, (int, float)): | |||
| value = type(setting_value)(value) | |||
| elif isinstance(setting_value, (list, dict)): | |||
| value = json.loads(value) | |||
| setattr(self, setting, value) | |||
| self._explicit_settings.add(setting) | |||
| def config_workspace(self, workspace): | |||
| """ | |||
| Config workspace value. | |||
| Args: | |||
| workspace (str): Path of workspace. | |||
| """ | |||
| setattr(self, 'WORKSPACE', workspace) | |||
| self._explicit_settings.add('WORKSPACE') | |||
| def is_overridden(self, setting_name): | |||
| """ | |||
| Check if specified setting is overridden. | |||
| Args: | |||
| setting_name (str): Setting name to be checked. | |||
| Returns: | |||
| bool, indicate whether given setting name is overridden. | |||
| """ | |||
| return setting_name in self._explicit_settings | |||
| def dump(self): | |||
| """ | |||
| Dump settings data. | |||
| Returns: | |||
| dict, json formatted data of settings. | |||
| """ | |||
| config = {} | |||
| for setting in dir(self): | |||
| if setting.isupper(): | |||
| config[setting] = getattr(self, setting) | |||
| return config | |||
| settings = Settings() | |||
| @@ -0,0 +1,58 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Constants module for mindinsight settings.""" | |||
| import logging | |||
| #################################### | |||
| # Global default settings. | |||
| #################################### | |||
| LOG_FORMAT = '[%(levelname)s] MI(%(process)d:%(thread)d,%(processName)s):%(asctime)s ' \ | |||
| '[%(filepath)s:%(lineno)d][%(sub_module)s] %(message)s' | |||
| GUNICORN_ACCESS_FORMAT = "'%(h)s <%(r)s> %(s)s %(b)s <%(f)s> <%(a)s> %(D)s'" | |||
| LOG_LEVEL = logging.INFO | |||
| # rotating max bytes, default is 50M | |||
| LOG_ROTATING_MAXBYTES = 52428800 | |||
| # rotating backup count, default is 30 | |||
| LOG_ROTATING_BACKUPCOUNT = 30 | |||
| #################################### | |||
| # Web default settings. | |||
| #################################### | |||
| HOST = '127.0.0.1' | |||
| # Allow to support cross origin resource sharing(CORS) enable. Default is disable. | |||
| # If enable CORS, `SUPPORT_REQUEST_METHODS` should enable 'OPTIONS' method. | |||
| ENABLE_CORS = False | |||
| SUPPORT_REQUEST_METHODS = {'POST', 'GET', 'PUT', 'DELETE'} | |||
| # url prefix should not end with slash, correct format is /v1/url | |||
| URL_PREFIX = '/v1/mindinsight' | |||
| #################################### | |||
| # Datavisual default settings. | |||
| #################################### | |||
| MAX_THREADS_COUNT = 15 | |||
| MAX_TAG_SIZE_PER_EVENTS_DATA = 300 | |||
| DEFAULT_STEP_SIZES_PER_TAG = 500 | |||
| MAX_GRAPH_TAG_SIZE = 10 | |||
| MAX_IMAGE_STEP_SIZE_PER_TAG = 10 | |||
| MAX_SCALAR_STEP_SIZE_PER_TAG = 1000 | |||
| MAX_GRAPH_STEP_SIZE_PER_TAG = 1 | |||
| @@ -0,0 +1,32 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Defaults module for mindinsight settings.""" | |||
| import os | |||
| #################################### | |||
| # Global default settings. | |||
| #################################### | |||
| WORKSPACE = os.path.join(os.environ['HOME'], 'mindinsight') | |||
| #################################### | |||
| # Web default settings. | |||
| #################################### | |||
| PORT = 8080 | |||
| #################################### | |||
| # Datavisual default settings. | |||
| #################################### | |||
| RELOAD_INTERVAL = 3 # Seconds | |||
| SUMMARY_BASE_DIR = os.getcwd() | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,39 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Enums.""" | |||
| from enum import Enum | |||
| class BaseEnum(Enum): | |||
| @classmethod | |||
| def list_members(cls): | |||
| """List all members.""" | |||
| return [member.value for member in cls] | |||
| class DataManagerStatus(BaseEnum): | |||
| """Data manager status.""" | |||
| INIT = 'INIT' | |||
| LOADING = 'LOADING' | |||
| DONE = 'DONE' | |||
| INVALID = 'INVALID' | |||
| class PluginNameEnum(BaseEnum): | |||
| """Plugin Name Enum.""" | |||
| IMAGE = 'image' | |||
| SCALAR = 'scalar' | |||
| GRAPH = 'graph' | |||
| @@ -0,0 +1,63 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Handle custom error.""" | |||
| from urllib.parse import quote | |||
| from werkzeug.exceptions import NotFound | |||
| from werkzeug.exceptions import MethodNotAllowed | |||
| from flask import request, jsonify | |||
| from mindinsight.datavisual.common.exceptions import RequestMethodNotAllowed | |||
| from mindinsight.datavisual.common.exceptions import RestfulApiNotExist | |||
| from mindinsight.datavisual.common.log import restful_logger as logger | |||
| from mindinsight.utils.exceptions import UnknownError | |||
| from mindinsight.utils.exceptions import FileSystemPermissionError | |||
| def handle_http_exception_error(ex): | |||
| """Handle http exception error.""" | |||
| logger.warning('%r %r, detail: %r', request.method, quote(request.path), str(ex)) | |||
| if isinstance(ex, NotFound): | |||
| error = RestfulApiNotExist() | |||
| elif isinstance(ex, MethodNotAllowed): | |||
| error = RequestMethodNotAllowed() | |||
| else: | |||
| logger.exception(ex) | |||
| error = UnknownError('System error or http error.') | |||
| res_body = {"error_code": error.error_code, "error_msg": error.message} | |||
| return jsonify(res_body), error.http_code | |||
| def handle_mindinsight_error(ex): | |||
| """Handle mindinsight error.""" | |||
| if int(ex.http_code) < 500: | |||
| logger.warning('%r %r detail: %r', request.method, quote(request.path), ex.message) | |||
| else: | |||
| logger.error('%r %r detail: %r', request.method, quote(request.path), ex.message) | |||
| logger.exception(ex) | |||
| res_body = dict(error_code=ex.error_code, error_msg=ex.message) | |||
| return jsonify(res_body), ex.http_code | |||
| def handle_unknown_error(ex): | |||
| """Handle unknown error.""" | |||
| logger.error('%r %r detail: %r', request.method, quote(request.path), str(ex)) | |||
| logger.exception(ex) | |||
| if isinstance(ex, PermissionError): | |||
| error = FileSystemPermissionError('File System Permission Error') | |||
| else: | |||
| error = UnknownError('System error.') | |||
| res_body = dict(error_code=error.error_code, error_msg=error.message) | |||
| return jsonify(res_body), error.http_code | |||
| @@ -0,0 +1,83 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Define custom exception.""" | |||
| from mindinsight.utils.constant import DataVisualErrors | |||
| from mindinsight.utils.exceptions import MindInsightException | |||
| class RestfulApiNotExist(MindInsightException): | |||
| """404 not found.""" | |||
| def __init__(self): | |||
| error_msg = '404 Not Found.' | |||
| super(RestfulApiNotExist, self).__init__(DataVisualErrors.RESTFUL_API_NOT_EXIST, | |||
| error_msg, | |||
| http_code=404) | |||
| class RequestMethodNotAllowed(MindInsightException): | |||
| """Request method not allowed.""" | |||
| def __init__(self): | |||
| error_msg = '405 Method Not Allowed.' | |||
| super(RequestMethodNotAllowed, self).__init__(DataVisualErrors.REQUEST_METHOD_NOT_ALLOWED, | |||
| error_msg, | |||
| http_code=405) | |||
| class PathNotDirectoryError(MindInsightException): | |||
| """Raised when specified path do not exist.""" | |||
| def __init__(self, error_detail): | |||
| """Initialize PathNotExistError""" | |||
| error_msg = 'Specified path is not a directory. Detail: {}'.format(error_detail) | |||
| super(PathNotDirectoryError, self).__init__(DataVisualErrors.PATH_NOT_DIRECTORY_ERROR, | |||
| error_msg, | |||
| http_code=400) | |||
| class SummaryLogPathInvalid(MindInsightException): | |||
| """No valid log file in the path.""" | |||
| def __init__(self): | |||
| error_msg = 'No valid summary log file in path' | |||
| super(SummaryLogPathInvalid, self).__init__(DataVisualErrors.SUMMARY_LOG_PATH_INVALID, | |||
| error_msg, | |||
| http_code=400) | |||
| class CRCFailedError(MindInsightException): | |||
| """CRC fail, record corrupted.""" | |||
| def __init__(self): | |||
| error_msg = 'CRC Failed.' | |||
| super(CRCFailedError, self).__init__(DataVisualErrors.CRC_FAILED, | |||
| error_msg, | |||
| http_code=400) | |||
| class SummaryLogIsLoading(MindInsightException): | |||
| """Data is loading.""" | |||
| def __init__(self, error_detail): | |||
| error_msg = "Data is loading. Detail: %s" % error_detail | |||
| super(SummaryLogIsLoading, self).__init__(DataVisualErrors.SUMMARY_LOG_IS_LOADING, | |||
| error_msg, | |||
| http_code=400) | |||
| class NodeNotInGraphError(MindInsightException): | |||
| """Can not find node in graph error.""" | |||
| def __init__(self): | |||
| error_msg = "Can not find node in graph by given node name." | |||
| super(NodeNotInGraphError, self).__init__(DataVisualErrors.NODE_NOT_IN_GRAPH_ERROR, | |||
| error_msg, | |||
| http_code=400) | |||
| @@ -0,0 +1,19 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Create a logger.""" | |||
| from mindinsight.utils.log import setup_logger | |||
| logger = setup_logger("datavisual", "datavisual") | |||
| restful_logger = setup_logger("restful_api", "restful_api") | |||
| @@ -0,0 +1,102 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Define a validation class which contain all check methods of datavisual module.""" | |||
| from numbers import Number | |||
| from mindinsight.utils.exceptions import ParamValueError | |||
| from mindinsight.utils.exceptions import ParamMissError | |||
| from mindinsight.datavisual.common.enums import PluginNameEnum | |||
| from mindinsight.datavisual.utils.tools import to_int | |||
| class Validation: | |||
| """Validation class, define all check methods.""" | |||
| @classmethod | |||
| def check_offset(cls, offset, default_value=0): | |||
| """ | |||
| Check offset parameter, it must be greater or equal 0. | |||
| Args: | |||
| offset (Union[str, int]): Value can be string number or int. | |||
| default_value (int): Default value for checked offset. Default: 0. | |||
| Returns: | |||
| int, offset. | |||
| """ | |||
| if offset is None: | |||
| return default_value | |||
| offset = to_int(offset, 'offset') | |||
| if offset < 0: | |||
| raise ParamValueError("'offset' should be greater than or equal to 0.") | |||
| return offset | |||
| @classmethod | |||
| def check_limit(cls, limit, min_value=1, max_value=1000, default_value=100): | |||
| """ | |||
| Check limit parameter, it should between min_value and max_value. | |||
| Args: | |||
| limit (Union[str, int]): Value can be string number or int. | |||
| min_value (int): Limit should greater or equal this value. Default: 1. | |||
| max_value (int): Limit should less or equal this value. Default: 1000. | |||
| default_value (int): Default value for limit. Default: 100. | |||
| Returns: | |||
| int, limit. | |||
| """ | |||
| if limit is None: | |||
| return default_value | |||
| limit = to_int(limit, 'limit') | |||
| if limit < min_value or limit > max_value: | |||
| raise ParamValueError("'limit' should in [{}, {}].".format(min_value, max_value)) | |||
| return limit | |||
| @classmethod | |||
| def check_param_empty(cls, **kwargs): | |||
| """ | |||
| Check param. | |||
| Args: | |||
| kwargs (Any): Check if arg is truthy. | |||
| Raises: | |||
| ParamMissError: When param missing. | |||
| """ | |||
| for key, value in kwargs.items(): | |||
| # When value is 0, 0.0 or False, it is not empty. | |||
| if isinstance(value, Number): | |||
| continue | |||
| if not value: | |||
| raise ParamMissError(key) | |||
| @classmethod | |||
| def check_plugin_name(cls, plugin_name): | |||
| """ | |||
| Check plugin name. | |||
| Args: | |||
| plugin_name (str): The plugin name. | |||
| Raises: | |||
| ParamValueError: When plugin name is not valid. | |||
| """ | |||
| plugin_name_list = PluginNameEnum.list_members() | |||
| if plugin_name not in plugin_name_list: | |||
| raise ParamValueError("'plugin_name' only can be one of {}" | |||
| "".format(plugin_name_list)) | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,68 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Base file system.""" | |||
| from abc import ABC, abstractmethod | |||
| from collections import namedtuple | |||
| StatInfo = namedtuple("Info", ["size", "mtime"]) | |||
| class BaseFileSystem(ABC): | |||
| """Base class for file systems.""" | |||
| @abstractmethod | |||
| def list_dir(self, path): | |||
| """ | |||
| Abstract method for listing directories by path. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| """ | |||
| @abstractmethod | |||
| def is_dir(self, path): | |||
| """ | |||
| Abstract method for determining if it is a directory. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| """ | |||
| @abstractmethod | |||
| def exists(self, path): | |||
| """ | |||
| Abstract method for determining if it exists. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| """ | |||
| @abstractmethod | |||
| def file_stat(self, file_path): | |||
| """ | |||
| Abstract method for getting file stat information. | |||
| Args: | |||
| file_path (str): File path. | |||
| """ | |||
| @abstractmethod | |||
| def join(self, path, *paths): | |||
| """ | |||
| Abstract method for combining paths. | |||
| Args: | |||
| path (str): Directory path. | |||
| *paths (str): Path or paths. | |||
| """ | |||
| @@ -0,0 +1,290 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """File handler for file operations.""" | |||
| from mindinsight.utils.exceptions import PathNotExistError | |||
| from mindinsight.datavisual.common.log import logger | |||
| from mindinsight.datavisual.utils.tools import to_str | |||
| from mindinsight.datavisual.data_access.local_file_system import LocalFileSystem | |||
| _DEFAULT_BUFFER_SIZE = 24 * 1024 * 1024 | |||
| # _FILE_SYSTEMS, key: FileProtocolHead, value: FileSystem | |||
| _FILE_SYSTEMS = dict() | |||
| _FILE_SYSTEMS[""] = LocalFileSystem() | |||
| class FileHandler: | |||
| """File handler.""" | |||
| def __init__(self, file_path, mode='rb'): | |||
| """ | |||
| Init FileHandler. | |||
| Args: | |||
| file_path (str): File path. | |||
| mode (Literal['r', 'rb', 'br', 'w', 'wb', 'bw']): It must be | |||
| in ['r', 'rb', 'br', 'w', 'wb', 'bw']. | |||
| """ | |||
| logger.debug("The __init__ method enter, param: file_path=%s" | |||
| "mode=%s", file_path, mode) | |||
| if mode not in ('r', 'rb', 'br', 'w', 'wb', 'bw'): | |||
| raise ValueError("mode %s is not supported by FileHandler." % mode) | |||
| self._file_path = to_str(file_path) | |||
| self._file_system = self.get_file_system(self._file_path) | |||
| self._buff_chunk_size = _DEFAULT_BUFFER_SIZE | |||
| self._buff = None | |||
| self._buff_offset = 0 | |||
| self._offset = 0 | |||
| self._binary_mode = 'b' in mode | |||
| @staticmethod | |||
| def get_file_system(path): | |||
| """ | |||
| Get file system object from path. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| Returns: | |||
| BaseFileSystem, a file system object. | |||
| """ | |||
| path = to_str(path) | |||
| prefix_index = path.find("://") | |||
| prefix = path[:prefix_index] if prefix_index >= 0 else "" | |||
| file_system = _FILE_SYSTEMS.get(prefix, None) | |||
| if file_system is None: | |||
| raise ValueError("No filesystem can be found for prefix %s" % prefix) | |||
| return file_system | |||
| @staticmethod | |||
| def walk(node, forward=True, onerror=None): | |||
| """ | |||
| Traverse path for directory and file tree. | |||
| Read from the buffer first.If there is not enough data in the buffer, | |||
| data will be read from the file system. | |||
| Args: | |||
| node (str): Current path. | |||
| forward (bool): If True, it will return the sub-directories and files in the top-level | |||
| directory first and then iterate the files in the sub-directories. Default: True. | |||
| onerror (Optional[Callable]): If None, it indicates that errors during file traversal | |||
| will be ignored. Default: None. | |||
| Yields: | |||
| Tuple, (node, sub_dirs, files). | |||
| """ | |||
| logger.debug("The walk method enter, param: node=%s, " | |||
| "forward=%s, onerror=%s.", node, forward, type(onerror)) | |||
| file_system = FileHandler.get_file_system(node) | |||
| node = to_str(node) | |||
| dirs = [] | |||
| try: | |||
| dirs = file_system.list_dir(node) | |||
| except PathNotExistError as err: | |||
| if onerror: | |||
| onerror(err) | |||
| else: | |||
| logger.warning("Get dir list error, dir_path=%s error=%s.", node, str(err)) | |||
| return | |||
| sub_dirs, files = [], [] | |||
| for item in dirs: | |||
| full_path = file_system.join(node, to_str(item)) | |||
| if file_system.is_dir(full_path): | |||
| sub_dirs.append(item) | |||
| else: | |||
| files.append(item) | |||
| result = (node, sub_dirs, files) | |||
| if forward: | |||
| logger.debug("The walk method return, pre result=%s.", result) | |||
| yield result | |||
| for subdir in sub_dirs: | |||
| joined_subdir = file_system.join(node, to_str(subdir)) | |||
| for sub_results in FileHandler.walk(joined_subdir, forward, onerror): | |||
| yield sub_results | |||
| if not forward: | |||
| logger.debug("The walk method return, post result=%s.", result) | |||
| yield result | |||
| def read(self, size=None): | |||
| """ | |||
| Read bytes from buffer or file by size. | |||
| Args: | |||
| size (Union[None, int]): Number of bytes to read, If set None, read the whole file. Default: None. | |||
| Returns: | |||
| str, a certain number of bytes. | |||
| """ | |||
| if size is None: | |||
| result = self._file_system.read(self._file_path, self._binary_mode) | |||
| self._offset = len(result) | |||
| return result | |||
| result = None | |||
| if self._buff and len(self._buff) > self._buff_offset: | |||
| read_offset = self._buff_offset + size if size is not None else len(self._buff) | |||
| result = self._read_buffer_by_offset(read_offset) | |||
| if size is not None: | |||
| if len(result) == size: | |||
| return result | |||
| size -= len(result) | |||
| read_size = max(self._buff_chunk_size, size) if size is not None else None | |||
| self._buff = self._file_system.read(self._file_path, self._binary_mode, | |||
| read_size, self._offset) | |||
| self._buff_offset = 0 | |||
| read_offset = size if size is not None else len(self._buff) | |||
| chunk = self._read_buffer_by_offset(read_offset) | |||
| result = result + chunk if result else chunk | |||
| return result | |||
| def _read_buffer_by_offset(self, new_buff_offset): | |||
| """ | |||
| Read buffer by offset. | |||
| Args: | |||
| new_buff_offset (int): Ending offset to read. | |||
| Returns: | |||
| str, bytes from old offset to new offset. | |||
| """ | |||
| old_buff_offset = self._buff_offset | |||
| read_size = min(len(self._buff), new_buff_offset) - old_buff_offset | |||
| self._offset += read_size | |||
| self._buff_offset += read_size | |||
| return self._buff[old_buff_offset:old_buff_offset + read_size] | |||
| def reset_offset(self, offset): | |||
| """ | |||
| Reset offset and buff_offset, clean buff. | |||
| Args: | |||
| offset (int): Offset. | |||
| """ | |||
| self._offset = offset | |||
| self._buff = None | |||
| self._buff_offset = 0 | |||
| @staticmethod | |||
| def list_dir(path): | |||
| """ | |||
| List directories by path. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| Returns: | |||
| list[str], directories. | |||
| """ | |||
| file_system = FileHandler.get_file_system(path) | |||
| return file_system.list_dir(path) | |||
| @staticmethod | |||
| def is_dir(path): | |||
| """ | |||
| Determine if it is a directory. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| Returns: | |||
| bool, if it is a directory path, return True. | |||
| """ | |||
| file_system = FileHandler.get_file_system(path) | |||
| return file_system.is_dir(path) | |||
| @staticmethod | |||
| def is_file(path): | |||
| """ | |||
| Determine if it is a file. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| Returns: | |||
| bool, if it is a file path, return True. | |||
| """ | |||
| file_system = FileHandler.get_file_system(path) | |||
| return file_system.is_file(path) | |||
| @staticmethod | |||
| def exists(path): | |||
| """ | |||
| Determine if it exists. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| Returns: | |||
| bool, if it exists, return True. | |||
| """ | |||
| file_system = FileHandler.get_file_system(path) | |||
| return file_system.exists(path) | |||
| @staticmethod | |||
| def file_stat(file_path): | |||
| """ | |||
| Get file stat information. | |||
| Args: | |||
| file_path (str): File path. | |||
| Returns: | |||
| Nametuple, the (size, mtime) of file. | |||
| """ | |||
| file_system = FileHandler.get_file_system(file_path) | |||
| return file_system.file_stat(file_path) | |||
| @staticmethod | |||
| def join(path, *paths): | |||
| """ | |||
| Join paths. | |||
| Args: | |||
| path (str): Directory path. | |||
| paths (str): Path or paths. | |||
| Returns: | |||
| str, the joined path. | |||
| """ | |||
| file_system = FileHandler.get_file_system(path) | |||
| return file_system.join(path, *paths) | |||
| @property | |||
| def offset(self): | |||
| """Get offset.""" | |||
| return self._offset | |||
| @property | |||
| def file_path(self): | |||
| """Get file path.""" | |||
| return self._file_path | |||
| @@ -0,0 +1,143 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Local File System.""" | |||
| import io | |||
| import os | |||
| from mindinsight.datavisual.common import exceptions | |||
| from mindinsight.datavisual.utils.tools import to_str | |||
| from mindinsight.datavisual.data_access.base_file_system import BaseFileSystem | |||
| from mindinsight.datavisual.data_access.base_file_system import StatInfo | |||
| from mindinsight.utils.exceptions import PathNotExistError | |||
| class LocalFileSystem(BaseFileSystem): | |||
| """Local file system.""" | |||
| def list_dir(self, path): | |||
| """ | |||
| List directories by path. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| Returns: | |||
| list[str], directories. | |||
| """ | |||
| path = to_str(path) | |||
| if not self.is_dir(path): | |||
| raise exceptions.PathNotDirectoryError("Path is %s." % path) | |||
| return os.listdir(path) | |||
| def is_dir(self, path): | |||
| """ | |||
| Determine if it is a directory. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| Returns: | |||
| bool, if it is a directory path, return True. | |||
| """ | |||
| return os.path.isdir(to_str(path)) | |||
| def is_file(self, path): | |||
| """ | |||
| Determine if it is a file. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| Returns: | |||
| bool, if it is a file path, return True. | |||
| """ | |||
| return os.path.isfile(to_str(path)) | |||
| def exists(self, path): | |||
| """ | |||
| Determine if it exists. | |||
| Args: | |||
| path (str): Directory path or file path. | |||
| Returns: | |||
| bool, if it exists, return True. | |||
| """ | |||
| return os.path.exists(to_str(path)) | |||
| def file_stat(self, file_path): | |||
| """ | |||
| Get file stat information. | |||
| Args: | |||
| file_path (str): File path. | |||
| Returns: | |||
| Nametuple, the (size, mtime) of file. | |||
| """ | |||
| try: | |||
| file_info = os.stat(to_str(file_path)) | |||
| except OSError: | |||
| raise PathNotExistError("File %s is not exist." % file_path) | |||
| return StatInfo(size=file_info.st_size, mtime=file_info.st_mtime) | |||
| @staticmethod | |||
| def read_access(file_path): | |||
| """ | |||
| Determine if it has read permission. | |||
| Args: | |||
| file_path (str): File path. | |||
| Returns: | |||
| bool, if it has read permission, return True. | |||
| """ | |||
| return os.access(to_str(file_path), os.R_OK) | |||
| def join(self, path, *paths): | |||
| """ | |||
| Join paths. | |||
| Args: | |||
| path (str): Directory path. | |||
| paths (str): Path or paths. | |||
| Returns: | |||
| str, the joined path. | |||
| """ | |||
| return os.path.join(path, *paths) | |||
| @staticmethod | |||
| def read(file_path, binary_mode=False, size=None, offset=None): | |||
| """ | |||
| Read file. | |||
| Args: | |||
| file_path (str): File path. | |||
| binary_mode (bool): If true, mode will be 'rb'. Else, 'r'. | |||
| size (int): Size of bytes to read. | |||
| offset (int): Offset of file to read. | |||
| Returns: | |||
| bytes, the content read. | |||
| """ | |||
| mode = "rb" if binary_mode else "r" | |||
| encoding = None if binary_mode else "utf8" | |||
| with io.open(file_path, mode, encoding=encoding) as file: | |||
| if offset is not None: | |||
| file.seek(offset) | |||
| if size is not None: | |||
| return file.read(size) | |||
| return file.read() | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,70 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| DataLoader is an adapter for all other loaders. | |||
| This module can identify what loader should be used to load data. | |||
| """ | |||
| from mindinsight.datavisual.common.log import logger | |||
| from mindinsight.datavisual.data_transform.ms_data_loader import MSDataLoader | |||
| from mindinsight.datavisual.common import exceptions | |||
| class DataLoader: | |||
| """ | |||
| The adapter of all kinds of loaders. | |||
| Args: | |||
| summary_dir (str): A directory path. | |||
| """ | |||
| def __init__(self, summary_dir): | |||
| self._summary_dir = summary_dir | |||
| self._loader = None | |||
| def load(self): | |||
| """Load the data when loader is exist.""" | |||
| if self._loader is None: | |||
| ms_dataloader = MSDataLoader(self._summary_dir) | |||
| loaders = [ms_dataloader] | |||
| for loader in loaders: | |||
| if loader.filter_valid_files(): | |||
| self._loader = loader | |||
| break | |||
| if self._loader is None: | |||
| logger.warning("No valid files can be loaded, summary_dir: %s.", self._summary_dir) | |||
| raise exceptions.SummaryLogPathInvalid() | |||
| self._loader.load() | |||
| def get_events_data(self): | |||
| """ | |||
| Get events data from log file. | |||
| Returns: | |||
| Optional[EventsData], None or events data. | |||
| """ | |||
| return self._loader.get_events_data() | |||
| def has_valid_files(self): | |||
| """ | |||
| Check the directory for valid files. | |||
| Returns: | |||
| bool, if the directory has valid files, return True. | |||
| """ | |||
| ms_dataloader = MSDataLoader(self._summary_dir) | |||
| return bool(ms_dataloader.filter_valid_files()) | |||
| @@ -0,0 +1,514 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Management of all events data. | |||
| This module exists to all loaders. | |||
| It can read events data through the DataLoader. | |||
| This module also acts as a thread pool manager. | |||
| """ | |||
| import threading | |||
| import time | |||
| from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED | |||
| from mindinsight.conf import settings | |||
| from mindinsight.datavisual.common import exceptions | |||
| from mindinsight.datavisual.common.log import logger | |||
| from mindinsight.datavisual.common.enums import DataManagerStatus | |||
| from mindinsight.datavisual.common.enums import PluginNameEnum | |||
| from mindinsight.datavisual.data_transform.loader_generators.loader_generator import MAX_DATA_LOADER_SIZE | |||
| from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator | |||
| from mindinsight.utils.exceptions import MindInsightException | |||
| from mindinsight.utils.exceptions import ParamValueError | |||
| class DataManager: | |||
| """ | |||
| DataManager manages a pool of loader which help access events data. | |||
| Each loader helps deal the data of the events. | |||
| A loader corresponds to an events_data. | |||
| The DataManager build a pool including all the data_loader. | |||
| The data_loader provides extracting | |||
| method to get the information of events. | |||
| """ | |||
| def __init__(self, loader_generators): | |||
| """ | |||
| Initialize the pool of loader and the dict of name-to-path. | |||
| Args: | |||
| loader_generators (list[LoaderGenerator]): Loader generators help generate loaders. | |||
| self._status: Refer `datavisual.common.enums.DataManagerStatus`. | |||
| self._loader_pool: {'loader_id': <LoaderStruct>}. | |||
| """ | |||
| self._loader_pool = {} | |||
| self._deleted_id_list = [] | |||
| self._status = DataManagerStatus.INIT.value | |||
| self._status_mutex = threading.Lock() | |||
| self._loader_pool_mutex = threading.Lock() | |||
| self._max_threads_count = 30 | |||
| self._reload_interval = 3 | |||
| self._loader_generators = loader_generators | |||
| def _add_loader(self, loader): | |||
| """ | |||
| Add a loader to load data. | |||
| Args: | |||
| loader (LoaderStruct): A object of `Loader`. | |||
| """ | |||
| if len(self._loader_pool) >= MAX_DATA_LOADER_SIZE: | |||
| delete_number = len(self._loader_pool) - MAX_DATA_LOADER_SIZE + 1 | |||
| sorted_loaders = sorted(self._loader_pool.items(), | |||
| key=lambda loader: loader[1].latest_update_time) | |||
| for index in range(delete_number): | |||
| delete_loader_id = sorted_loaders[index][0] | |||
| self._delete_loader(delete_loader_id) | |||
| self._loader_pool.update({loader.loader_id: loader}) | |||
| def _delete_loader(self, loader_id): | |||
| """ | |||
| Delete loader from loader pool by loader id. | |||
| Args: | |||
| loader_id (str): ID of loader. | |||
| """ | |||
| if self._loader_pool.get(loader_id) is not None: | |||
| logger.debug("delete loader %s", loader_id) | |||
| self._loader_pool.pop(loader_id) | |||
| def _execute_loader(self, loader_id): | |||
| """ | |||
| Load data form data_loader. | |||
| If there is something wrong by loading, add logs and delete the loader. | |||
| Args: | |||
| loader_id (str): An ID for `Loader`. | |||
| """ | |||
| try: | |||
| with self._loader_pool_mutex: | |||
| loader = self._loader_pool.get(loader_id, None) | |||
| if loader is None: | |||
| logger.debug("Loader %r has been deleted, will not load data.", loader_id) | |||
| return | |||
| loader.data_loader.load() | |||
| except MindInsightException as ex: | |||
| logger.warning("Data loader %r load data failed. " | |||
| "Delete data_loader. Detail: %s", loader_id, ex) | |||
| with self._loader_pool_mutex: | |||
| self._delete_loader(loader_id) | |||
| def start_load_data(self, | |||
| reload_interval=settings.RELOAD_INTERVAL, | |||
| max_threads_count=MAX_DATA_LOADER_SIZE): | |||
| """ | |||
| Start threads for loading data. | |||
| Args: | |||
| reload_interval (int): Time to reload data once. | |||
| max_threads_count (int): Max number of threads of execution. | |||
| """ | |||
| logger.info("Start to load data, reload_interval: %s, " | |||
| "max_threads_count: %s.", reload_interval, max_threads_count) | |||
| DataManager.check_reload_interval(reload_interval) | |||
| DataManager.check_max_threads_count(max_threads_count) | |||
| self._reload_interval = reload_interval | |||
| self._max_threads_count = max_threads_count | |||
| thread = threading.Thread(target=self._reload_data, | |||
| name='start_load_data_thread') | |||
| thread.daemon = True | |||
| thread.start() | |||
| def _reload_data(self): | |||
| """This function periodically loads the data.""" | |||
| # Let gunicorn load other modules first. | |||
| time.sleep(1) | |||
| while True: | |||
| self._load_data() | |||
| if not self._reload_interval: | |||
| break | |||
| time.sleep(self._reload_interval) | |||
| def reload_data(self): | |||
| """ | |||
| Reload the data once. | |||
| This function needs to be used after `start_load_data` function. | |||
| """ | |||
| logger.debug("start to reload data") | |||
| thread = threading.Thread(target=self._load_data, | |||
| name='reload_data_thread') | |||
| thread.daemon = False | |||
| thread.start() | |||
| def _load_data(self): | |||
| """This function will load data once and ignore it if the status is loading.""" | |||
| logger.info("Start to load data, reload interval: %r.", self._reload_interval) | |||
| with self._status_mutex: | |||
| if self.status == DataManagerStatus.LOADING.value: | |||
| logger.debug("Current status is %s , will ignore to load data.", self.status) | |||
| return | |||
| self.status = DataManagerStatus.LOADING.value | |||
| self._generate_loaders() | |||
| self._execute_load_data() | |||
| if not self._loader_pool: | |||
| self.status = DataManagerStatus.INVALID.value | |||
| else: | |||
| self.status = DataManagerStatus.DONE.value | |||
| logger.info("Load event data end, status: %r, and loader pool size is %r.", | |||
| self.status, len(self._loader_pool)) | |||
| def _generate_loaders(self): | |||
| """This function generates the loader from given path.""" | |||
| loader_dict = {} | |||
| for generator in self._loader_generators: | |||
| loader_dict.update(generator.generate_loaders(self._loader_pool)) | |||
| sorted_loaders = sorted(loader_dict.items(), key=lambda loader: loader[1].latest_update_time) | |||
| latest_loaders = sorted_loaders[-MAX_DATA_LOADER_SIZE:] | |||
| self._deal_loaders(latest_loaders) | |||
| def _deal_loaders(self, latest_loaders): | |||
| """ | |||
| This function determines which loaders to keep or remove or added. | |||
| It is based on the given dict of loaders. | |||
| Args: | |||
| latest_loaders (list[dict]): A list of <loader_id: LoaderStruct>. | |||
| """ | |||
| with self._loader_pool_mutex: | |||
| for loader_id, loader in latest_loaders: | |||
| if self._loader_pool.get(loader_id, None) is None: | |||
| self._add_loader(loader) | |||
| continue | |||
| # If this loader was updated manually before, | |||
| # its latest_update_time may bigger than update_time in summary. | |||
| if self._loader_pool[loader_id].latest_update_time < loader.latest_update_time: | |||
| self._update_loader_latest_update_time(loader_id, loader.latest_update_time) | |||
| def _execute_load_data(self): | |||
| """Load data through multiple threads.""" | |||
| threads_count = self._get_threads_count() | |||
| if not threads_count: | |||
| logger.info("Can not find any valid train log path to load, loader pool is empty.") | |||
| return | |||
| logger.info("Start to execute load data. threads_count: %s.", threads_count) | |||
| with ThreadPoolExecutor(max_workers=threads_count) as executor: | |||
| futures = [] | |||
| loader_pool = self._get_snapshot_loader_pool() | |||
| for loader_id in loader_pool: | |||
| future = executor.submit(self._execute_loader, loader_id) | |||
| futures.append(future) | |||
| wait(futures, return_when=ALL_COMPLETED) | |||
| @staticmethod | |||
| def check_reload_interval(reload_interval): | |||
| """ | |||
| Check reload interval is valid. | |||
| Args: | |||
| reload_interval (int): Reload interval >= 0. | |||
| """ | |||
| if not isinstance(reload_interval, int): | |||
| raise ParamValueError("The value of reload interval should be integer.") | |||
| if reload_interval < 0: | |||
| raise ParamValueError("The value of reload interval should be >= 0.") | |||
| @staticmethod | |||
| def check_max_threads_count(max_threads_count): | |||
| """ | |||
| Threads count should be a integer, and should > 0. | |||
| Args: | |||
| max_threads_count (int), should > 0. | |||
| """ | |||
| if not isinstance(max_threads_count, int): | |||
| raise ParamValueError("The value of max threads count should be integer.") | |||
| if max_threads_count <= 0: | |||
| raise ParamValueError("The value of max threads count should be > 0.") | |||
| def _get_threads_count(self): | |||
| """ | |||
| Use the maximum number of threads available. | |||
| Returns: | |||
| int, number of threads. | |||
| """ | |||
| threads_count = min(self._max_threads_count, len(self._loader_pool)) | |||
| return threads_count | |||
| def get_train_job_by_plugin(self, train_id, plugin_name): | |||
| """ | |||
| Get a train job by train job id. | |||
| If the given train job does not has the given plugin data, the tag list will be empty. | |||
| Args: | |||
| train_id (str): Get train job info by the given id. | |||
| plugin_name (str): Get tags by given plugin. | |||
| Returns: | |||
| TypedDict('TrainJobEntity', {'id': str, 'name': str, 'tags': List[str]}), | |||
| a train job object. | |||
| """ | |||
| self._check_status_valid() | |||
| self._check_train_job_exist(train_id, self._loader_pool) | |||
| loader = self._get_loader(train_id) | |||
| if loader is None: | |||
| logger.warning("No valid summary log in train job %s, " | |||
| "or it is not in the cache.", train_id) | |||
| return None | |||
| name = loader.name | |||
| data_loader = loader.data_loader | |||
| tags = [] | |||
| try: | |||
| events_data = data_loader.get_events_data() | |||
| tags = events_data.list_tags_by_plugin(plugin_name) | |||
| except KeyError: | |||
| logger.debug("Plugin name %r does not exist " | |||
| "in train job %r, and set tags to empty list.", plugin_name, name) | |||
| except AttributeError: | |||
| logger.debug("Train job %r has been deleted or it has not loaded data, " | |||
| "and set tags to empty list.", name) | |||
| result = dict(id=train_id, name=name, tags=tags) | |||
| return result | |||
| def delete_train_job(self, train_id): | |||
| """ | |||
| Delete train job with a train id. | |||
| Args: | |||
| train_id (str): ID for train job. | |||
| """ | |||
| with self._loader_pool_mutex: | |||
| self._delete_loader(train_id) | |||
| def list_tensors(self, train_id, tag): | |||
| """ | |||
| List tensors of the given train job and tag. | |||
| If the tensor can not find by the given tag, will raise exception. | |||
| Args: | |||
| train_id (str): ID for train job. | |||
| tag (str): The tag name. | |||
| Returns: | |||
| NamedTuple, the tuple format is `collections.namedtuple('_Tensor', ['wall_time', 'event_step', 'value'])`. | |||
| the value will contain the given tag data. | |||
| """ | |||
| self._check_status_valid() | |||
| loader_pool = self._get_snapshot_loader_pool() | |||
| if not self._is_loader_in_loader_pool(train_id, loader_pool): | |||
| raise ParamValueError("Can not find any data in loader pool about the train job.") | |||
| data_loader = loader_pool[train_id].data_loader | |||
| events_data = data_loader.get_events_data() | |||
| try: | |||
| tensors = events_data.tensors(tag) | |||
| except KeyError: | |||
| error_msg = "Can not find any data in this train job by given tag." | |||
| raise ParamValueError(error_msg) | |||
| return tensors | |||
| def _check_train_job_exist(self, train_id, loader_pool): | |||
| """ | |||
| Check train job exist, if not exist, will raise exception. | |||
| Args: | |||
| train_id (str): The given train job id. | |||
| loader_pool (dict[str, LoaderStruct]): Refer to self._loader_pool. | |||
| Raises: | |||
| ParamValueError: Can not found train job in data manager. | |||
| """ | |||
| is_exist = False | |||
| if train_id in loader_pool: | |||
| return | |||
| for generator in self._loader_generators: | |||
| if generator.check_train_job_exist(train_id): | |||
| is_exist = True | |||
| break | |||
| if not is_exist: | |||
| raise ParamValueError("Can not find the train job in data manager.") | |||
| def _is_loader_in_loader_pool(self, train_id, loader_pool): | |||
| """ | |||
| Check train job exist, if not exist, return False. Else, return True. | |||
| Args: | |||
| train_id (str): The given train job id. | |||
| loader_pool (dict): See self._loader_pool. | |||
| Returns: | |||
| bool, if loader in loader pool, return True. | |||
| """ | |||
| if train_id in loader_pool: | |||
| return True | |||
| return False | |||
| def _get_snapshot_loader_pool(self): | |||
| """ | |||
| Create a snapshot of data loader pool to avoid concurrent mutation and iteration issues. | |||
| Returns: | |||
| dict, a copy of `self._loader_pool`. | |||
| """ | |||
| with self._loader_pool_mutex: | |||
| return dict(self._loader_pool) | |||
| def _check_status_valid(self): | |||
| """Check if the status is valid to load data.""" | |||
| if self.status == DataManagerStatus.INIT.value: | |||
| raise exceptions.SummaryLogIsLoading("Data is being loaded, " | |||
| "current status: %s." % self._status) | |||
| def get_single_train_job(self, train_id, manual_update=False): | |||
| """ | |||
| Get train job by train ID. | |||
| Args: | |||
| train_id (str): Train ID for train job. | |||
| manual_update (bool): If manual update, True. | |||
| Returns: | |||
| dict, single train job, if can not find any data, will return None. | |||
| """ | |||
| self._check_status_valid() | |||
| self._check_train_job_exist(train_id, self._loader_pool) | |||
| loader = self._get_loader(train_id, manual_update) | |||
| if loader is None: | |||
| logger.warning("No valid summary log in train job %s, " | |||
| "or it is not in the cache.", train_id) | |||
| return None | |||
| train_job = loader.to_dict() | |||
| train_job.pop('data_loader') | |||
| plugin_data = {} | |||
| for plugin_name in PluginNameEnum.list_members(): | |||
| job = self.get_train_job_by_plugin(train_id, plugin_name=plugin_name) | |||
| if job is None: | |||
| plugin_data[plugin_name] = [] | |||
| else: | |||
| plugin_data[plugin_name] = job['tags'] | |||
| train_job.update({'tag_mapping': plugin_data}) | |||
| return train_job | |||
| def _get_loader(self, train_id, manual_update=False): | |||
| """ | |||
| Get loader by train id. | |||
| Args: | |||
| train_id (str): Train Id. | |||
| manual_update (bool): If manual, True. Else False. | |||
| Returns: | |||
| LoaderStruct, the loader. | |||
| """ | |||
| loader = None | |||
| is_reload = False | |||
| with self._loader_pool_mutex: | |||
| if self._is_loader_in_loader_pool(train_id, self._loader_pool): | |||
| loader = self._loader_pool.get(train_id) | |||
| if manual_update and loader is None: | |||
| for generator in self._loader_generators: | |||
| tmp_loader = generator.generate_loader_by_train_id(train_id) | |||
| if loader and loader.latest_update_time > tmp_loader.latest_update_time: | |||
| continue | |||
| loader = tmp_loader | |||
| if loader is None: | |||
| return None | |||
| self._add_loader(loader) | |||
| is_reload = True | |||
| if manual_update: | |||
| self._update_loader_latest_update_time(loader.loader_id) | |||
| if is_reload: | |||
| self.reload_data() | |||
| return loader | |||
| def _update_loader_latest_update_time(self, loader_id, latest_update_time=None): | |||
| """ | |||
| Update loader with latest_update_time. | |||
| Args: | |||
| loader_id (str): ID of loader. | |||
| latest_update_time (float): Timestamp. | |||
| """ | |||
| if latest_update_time is None: | |||
| latest_update_time = time.time() | |||
| self._loader_pool[loader_id].latest_update_time = latest_update_time | |||
| @property | |||
| def status(self): | |||
| """ | |||
| Get the status of data manager. | |||
| Returns: | |||
| DataManagerStatus, the status of data manager. | |||
| """ | |||
| return self._status | |||
| @status.setter | |||
| def status(self, status): | |||
| """Set data manger status.""" | |||
| self._status = status | |||
| _loader_generators = [DataLoaderGenerator(settings.SUMMARY_BASE_DIR)] | |||
| DATA_MANAGER = DataManager(_loader_generators) | |||
| @@ -0,0 +1,216 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Takes a generator of values, and collects them for a frontend.""" | |||
| import collections | |||
| import threading | |||
| from mindinsight.datavisual.common.enums import PluginNameEnum | |||
| from mindinsight.datavisual.data_transform import reservoir | |||
| from mindinsight.conf import settings | |||
| # Type of the tensor event from external component | |||
| _Tensor = collections.namedtuple('_Tensor', ['wall_time', 'step', 'value']) | |||
| TensorEvent = collections.namedtuple( | |||
| 'TensorEvent', ['wall_time', 'step', 'tag', 'plugin_name', 'value']) | |||
| # config for `EventsData` | |||
| _DEFAULT_STEP_SIZES_PER_TAG = settings.DEFAULT_STEP_SIZES_PER_TAG | |||
| CONFIG = { | |||
| 'max_total_tag_sizes': settings.MAX_TAG_SIZE_PER_EVENTS_DATA, | |||
| 'max_tag_sizes_per_plugin': | |||
| { | |||
| PluginNameEnum.GRAPH.value: settings.MAX_GRAPH_TAG_SIZE, | |||
| }, | |||
| 'max_step_sizes_per_tag': | |||
| { | |||
| PluginNameEnum.SCALAR.value: settings.MAX_SCALAR_STEP_SIZE_PER_TAG, | |||
| PluginNameEnum.IMAGE.value: settings.MAX_IMAGE_STEP_SIZE_PER_TAG, | |||
| PluginNameEnum.GRAPH.value: settings.MAX_GRAPH_STEP_SIZE_PER_TAG, | |||
| } | |||
| } | |||
| class EventsData: | |||
| """ | |||
| EventsData is an event data manager. | |||
| It manages the log events generated during a training process. | |||
| The log event records information such as graph, tag, and tensor. | |||
| Data such as tensor can be retrieved based on its tag. | |||
| """ | |||
| def __init__(self): | |||
| self._config = CONFIG | |||
| self._max_step_sizes_per_tag = self._config['max_step_sizes_per_tag'] | |||
| self._tags = list() | |||
| self._reservoir_by_tag = {} | |||
| self._reservoir_mutex_lock = threading.Lock() | |||
| self._tags_by_plugin = collections.defaultdict(list) | |||
| self._tags_by_plugin_mutex_lock = collections.defaultdict(threading.Lock) | |||
| def add_tensor_event(self, tensor_event): | |||
| """ | |||
| Add a new tensor event to the tensors_data. | |||
| Args: | |||
| tensor_event (TensorEvent): Refer to `TensorEvent` object. | |||
| """ | |||
| if not isinstance(tensor_event, TensorEvent): | |||
| raise TypeError('Expect to get data of type `TensorEvent`.') | |||
| tag = tensor_event.tag | |||
| plugin_name = tensor_event.plugin_name | |||
| if tag not in set(self._tags): | |||
| deleted_tag = self._check_tag_out_of_spec(plugin_name) | |||
| if deleted_tag is not None: | |||
| self.delete_tensor_event(deleted_tag) | |||
| self._tags.append(tag) | |||
| with self._tags_by_plugin_mutex_lock[plugin_name]: | |||
| if tag not in self._tags_by_plugin[plugin_name]: | |||
| self._tags_by_plugin[plugin_name].append(tag) | |||
| with self._reservoir_mutex_lock: | |||
| if tag not in self._reservoir_by_tag: | |||
| reservoir_size = self._get_reservoir_size(tensor_event.plugin_name) | |||
| self._reservoir_by_tag[tag] = reservoir.Reservoir(reservoir_size) | |||
| tensor = _Tensor(wall_time=tensor_event.wall_time, | |||
| step=tensor_event.step, | |||
| value=tensor_event.value) | |||
| if self._is_out_of_order_step(tensor_event.step, tensor_event.tag): | |||
| self.purge_reservoir_data(tensor_event.step, self._reservoir_by_tag[tag]) | |||
| self._reservoir_by_tag[tag].add_sample(tensor) | |||
| def delete_tensor_event(self, tag): | |||
| """ | |||
| This function will delete tensor event by the given tag in memory record. | |||
| Args: | |||
| tag (str): The tag name. | |||
| """ | |||
| self._tags.remove(tag) | |||
| for plugin_name, lock in self._tags_by_plugin_mutex_lock.items(): | |||
| with lock: | |||
| if tag in self._tags_by_plugin[plugin_name]: | |||
| self._tags_by_plugin[plugin_name].remove(tag) | |||
| break | |||
| with self._reservoir_mutex_lock: | |||
| if tag in self._reservoir_by_tag: | |||
| self._reservoir_by_tag.pop(tag) | |||
| def list_tags_by_plugin(self, plugin_name): | |||
| """ | |||
| Return all the tag names of the plugin. | |||
| Args: | |||
| plugin_name (str): The Plugin name. | |||
| Returns: | |||
| list[str], tags of the plugin. | |||
| Raises: | |||
| KeyError: when plugin name could not be found. | |||
| """ | |||
| if plugin_name not in self._tags_by_plugin: | |||
| raise KeyError('Plugin %r could not be found.' % plugin_name) | |||
| with self._tags_by_plugin_mutex_lock[plugin_name]: | |||
| # Return a snapshot to avoid concurrent mutation and iteration issues. | |||
| return list(self._tags_by_plugin[plugin_name]) | |||
| def tensors(self, tag): | |||
| """ | |||
| Return all tensors of the tag. | |||
| Args: | |||
| tag (str): The tag name. | |||
| Returns: | |||
| list[_Tensor], the list of tensors to the tag. | |||
| """ | |||
| if tag not in self._reservoir_by_tag: | |||
| raise KeyError('TAG %r could not be found.' % tag) | |||
| return self._reservoir_by_tag[tag].samples() | |||
| def _is_out_of_order_step(self, step, tag): | |||
| """ | |||
| If the current step is smaller than the latest one, it is out-of-order step. | |||
| Args: | |||
| step (int): Check if the given step out of order. | |||
| tag (str): The checked tensor of the given tag. | |||
| Returns: | |||
| bool, boolean value. | |||
| """ | |||
| if self.tensors(tag): | |||
| tensors = self.tensors(tag) | |||
| last_step = tensors[-1].step | |||
| if step <= last_step: | |||
| return True | |||
| return False | |||
| @staticmethod | |||
| def purge_reservoir_data(start_step, tensor_reservoir): | |||
| """ | |||
| Purge all tensor event that are out-of-order step after the given start step. | |||
| Args: | |||
| start_step (int): Urge start step. All previously seen events with | |||
| a greater or equal to step will be purged. | |||
| tensor_reservoir (Reservoir): A `Reservoir` object. | |||
| Returns: | |||
| int, the number of items removed. | |||
| """ | |||
| cnt_out_of_order = tensor_reservoir.remove_sample(lambda x: x.step < start_step) | |||
| return cnt_out_of_order | |||
| def _get_reservoir_size(self, plugin_name): | |||
| max_step_sizes_per_tag = self._config['max_step_sizes_per_tag'] | |||
| return max_step_sizes_per_tag.get(plugin_name, _DEFAULT_STEP_SIZES_PER_TAG) | |||
| def _check_tag_out_of_spec(self, plugin_name): | |||
| """ | |||
| Check whether the tag is out of specification. | |||
| Args: | |||
| plugin_name (str): The given plugin name. | |||
| Returns: | |||
| Union[str, None], if out of specification, will return the first tag, else return None. | |||
| """ | |||
| tag_specifications = self._config['max_tag_sizes_per_plugin'].get(plugin_name) | |||
| if tag_specifications is not None and len(self._tags_by_plugin[plugin_name]) >= tag_specifications: | |||
| deleted_tag = self._tags_by_plugin[plugin_name][0] | |||
| return deleted_tag | |||
| if len(self._tags) >= self._config['max_total_tag_sizes']: | |||
| deleted_tag = self._tags[0] | |||
| return deleted_tag | |||
| return None | |||
| @@ -0,0 +1,20 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This file is used to define the graph.""" | |||
| from .msgraph import MSGraph | |||
| from .node import NodeTypeEnum | |||
| __all__ = ['MSGraph', 'NodeTypeEnum'] | |||
| @@ -0,0 +1,455 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| This file is used to define the basic graph. | |||
| """ | |||
| import copy | |||
| import time | |||
| from mindinsight.datavisual.common.log import logger | |||
| from mindinsight.datavisual.common import exceptions | |||
| from .node import NodeTypeEnum | |||
| from .node import Node | |||
| class EdgeTypeEnum: | |||
| """Node edge type enum.""" | |||
| control = 'control' | |||
| data = 'data' | |||
| class DataTypeEnum: | |||
| """Data type enum.""" | |||
| DT_TENSOR = 13 | |||
| class Graph: | |||
| """The `Graph` object is used to describe a graph file.""" | |||
| MIN_POLYMERIC_NODE_COUNT = 5 | |||
| def __init__(self): | |||
| # Store nodes contain leaf nodes, name scope node, except polymeric nodes | |||
| self._normal_nodes = {} | |||
| # Store polymeric nodes. | |||
| self._polymeric_nodes = {} | |||
| # Store all nodes resolved from the file. | |||
| self._leaf_nodes = {} | |||
| # The format of node groups is {'group_name': {'node_name': <Node>}} | |||
| self._node_groups = {} | |||
| def exist_node(self, name): | |||
| """ | |||
| Check node exist in graph. | |||
| Args: | |||
| name (str): The node name. | |||
| Returns: | |||
| bool, if node is exist will return True. | |||
| """ | |||
| if self._normal_nodes.get(name) is None: | |||
| return False | |||
| return True | |||
| def get_normal_nodes(self, namescope=None): | |||
| """ | |||
| Get nodes by namescope. | |||
| Args: | |||
| namescope (str): A namescope of nodes. | |||
| Returns: | |||
| list[dict], a list object contain `Node` object. | |||
| """ | |||
| nodes = [] | |||
| if namescope is None: | |||
| for name, node in self._normal_nodes.items(): | |||
| if '/' not in name: | |||
| # Get first layer nodes | |||
| nodes.append(node.to_dict()) | |||
| return nodes | |||
| namescope = namescope + '/' | |||
| for name, node in self._normal_nodes.items(): | |||
| if name.startswith(namescope) and '/' not in name.split(namescope)[1]: | |||
| nodes.append(node.to_dict()) | |||
| return nodes | |||
| def get_polymeric_nodes(self, polymeric_scope): | |||
| """ | |||
| Get polymeric nodes by polymeric scope. | |||
| Args: | |||
| polymeric_scope (str): The polymeric scope name of nodes. | |||
| Returns: | |||
| list[dict], a list object contain `Node` object. | |||
| """ | |||
| nodes = [] | |||
| for node in self._polymeric_nodes.values(): | |||
| if node.polymeric_scope_name == polymeric_scope: | |||
| nodes.append(node.to_dict()) | |||
| return nodes | |||
| def search_node_names(self, content, offset, limit): | |||
| """ | |||
| Search node names by content. | |||
| Args: | |||
| content (Union[str, None]): This content can be the key content of the node to search, | |||
| if None, will get all node names. | |||
| offset (int): An offset for page. Ex, offset is 0, mean current page is 1. | |||
| limit (int): An offset for page. Ex, offset is 0, mean current page is 1. | |||
| Returns: | |||
| list[str], a list of node names. | |||
| """ | |||
| all_names = [] | |||
| all_names.extend(list(self._normal_nodes.keys())) | |||
| all_names.extend(list(self._polymeric_nodes.keys())) | |||
| if content is not None: | |||
| content = content.lower() | |||
| catch_names = [name for name in all_names if content in name.lower()] | |||
| else: | |||
| catch_names = all_names | |||
| catch_names = sorted(catch_names) | |||
| real_offset = offset * limit | |||
| return catch_names[real_offset:real_offset+limit] | |||
| def search_single_node(self, node_name): | |||
| """ | |||
| Search node, and return every layer nodes until this node. | |||
| Args: | |||
| node_name (str): The name of node. | |||
| Returns: | |||
| dict, a dict object, format is : | |||
| item_object = {'nodes': [<Node object>], | |||
| 'scope_name': '<Node scope>', | |||
| 'children': {<item_object>}} | |||
| """ | |||
| if node_name and self._polymeric_nodes.get(node_name) is None \ | |||
| and self._normal_nodes.get(node_name) is None: | |||
| raise exceptions.NodeNotInGraphError() | |||
| response = {} | |||
| nodes = self.get_normal_nodes() | |||
| response.update({ | |||
| 'nodes': nodes, | |||
| 'scope_name': '', | |||
| 'children': {} | |||
| }) | |||
| names = node_name.split('/') | |||
| children = response['children'] | |||
| for i in range(1, len(names)+1): | |||
| if i == len(names): | |||
| polymeric_node = self._polymeric_nodes.get(node_name) | |||
| if polymeric_node: | |||
| polymeric_scope = polymeric_node.polymeric_scope_name | |||
| nodes = self.get_polymeric_nodes(polymeric_scope) | |||
| children.update({'nodes': nodes, | |||
| 'scope_name': polymeric_scope, | |||
| 'children': {}}) | |||
| break | |||
| name_scope = '/'.join(names[:i]) | |||
| nodes = self.get_normal_nodes(name_scope) | |||
| children.update({ | |||
| 'nodes': nodes, | |||
| 'scope_name': name_scope, | |||
| 'children': {} | |||
| }) | |||
| children = children['children'] | |||
| return response | |||
| def _build_polymeric_nodes(self): | |||
| """Build polymeric node.""" | |||
| logger.debug("Start to build polymeric nodes") | |||
| self._find_polymeric_nodes() | |||
| group_count_map = {} | |||
| for group_name, group in self._node_groups.items(): | |||
| name = group_name.split('/')[-1] | |||
| count = group_count_map.get(name, 0) | |||
| count += 1 | |||
| group_count_map[name] = count | |||
| polymeric_node_name = group_name + '_{}_[{}]'.format(count, len(group)) | |||
| polymeric_node = Node(polymeric_node_name, node_id=polymeric_node_name) | |||
| polymeric_node.node_type = NodeTypeEnum.POLYMERIC_SCOPE.value | |||
| polymeric_node.name_scope = '/'.join(group_name.split('/')[:-1]) | |||
| polymeric_node.subnode_count = len(group) | |||
| for name_tmp, node_tmp in group.items(): | |||
| node_tmp.polymeric_scope_name = polymeric_node_name | |||
| self._polymeric_nodes.update({name_tmp: node_tmp}) | |||
| polymeric_node.update_input(node_tmp.input) | |||
| polymeric_node.update_output(node_tmp.output) | |||
| self._normal_nodes.update({polymeric_node_name: polymeric_node}) | |||
| self._update_input_output() | |||
| def _find_polymeric_nodes(self): | |||
| """Find polymeric nodes from node groups.""" | |||
| node_groups = copy.deepcopy(self._node_groups) | |||
| for group_name, group in node_groups.items(): | |||
| if len(group) < self.MIN_POLYMERIC_NODE_COUNT: | |||
| self._normal_nodes.update(group) | |||
| self._node_groups.pop(group_name) | |||
| continue | |||
| move_node_names = [] | |||
| is_move_group = False | |||
| for node_name, group_node in group.items(): | |||
| node_list = [] | |||
| is_in_group = False | |||
| for dst_name in group_node.output: | |||
| node_tmp = self._leaf_nodes[dst_name] | |||
| node_list.append(node_tmp) | |||
| start = time.time() | |||
| run_count = 0 | |||
| visit_nodes = {} | |||
| while node_list: | |||
| # Iterate to find if the output of the node in the group causes a loop | |||
| # example: there is a group A, and node_a is a Node in group. | |||
| # if there is a loop in node_a, like A/node_a -> B/node_b -> A/node_b | |||
| # we will remove the node_a from group A. | |||
| node_tmp = node_list[0] | |||
| node_list = node_list[1:] | |||
| visit_nodes.update({node_tmp.name: True}) | |||
| if node_tmp in group.values(): | |||
| is_in_group = True | |||
| break | |||
| for dst_name_tmp in node_tmp.output: | |||
| run_count += 1 | |||
| node_tmp = self._leaf_nodes[dst_name_tmp] | |||
| if visit_nodes.get(dst_name_tmp): | |||
| continue | |||
| node_list.append(node_tmp) | |||
| logger.debug("Find group %s node end, is_in_group: %s, use time: %s, " | |||
| "run count: %s.", group_name, is_in_group, | |||
| time.time() - start, run_count) | |||
| if is_in_group: | |||
| move_node_names.append(node_name) | |||
| if (len(group) - len(move_node_names)) < self.MIN_POLYMERIC_NODE_COUNT: | |||
| is_move_group = True | |||
| break | |||
| if is_move_group: | |||
| self._normal_nodes.update(group) | |||
| self._node_groups.pop(group_name) | |||
| else: | |||
| for name_tmp in move_node_names: | |||
| node_tmp = self._node_groups[group_name].pop(name_tmp) | |||
| self._normal_nodes.update({name_tmp: node_tmp}) | |||
| def _update_input_output(self): | |||
| """We need to update input and output attribute after build polymeric node.""" | |||
| for node in self._normal_nodes.values(): | |||
| for src_name, input_attr in node.input.items(): | |||
| if self._polymeric_nodes.get(src_name): | |||
| input_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value | |||
| node.update_input({src_name: input_attr}) | |||
| for dst_name, output_attr in node.output.items(): | |||
| if self._polymeric_nodes.get(dst_name): | |||
| output_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value | |||
| node.update_output({dst_name: output_attr}) | |||
| for node in self._polymeric_nodes.values(): | |||
| for src_name, input_attr in node.input.items(): | |||
| if self._polymeric_nodes.get(src_name): | |||
| input_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value | |||
| node.update_input({src_name: input_attr}) | |||
| for dst_name, output_attr in node.output.items(): | |||
| if self._polymeric_nodes.get(dst_name): | |||
| output_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value | |||
| node.update_output({dst_name: output_attr}) | |||
| def _calc_polymeric_input_output(self): | |||
| """Calc polymeric input and output after build polymeric node.""" | |||
| for name, node in self._normal_nodes.items(): | |||
| polymeric_input = {} | |||
| for src_name in node.input: | |||
| src_node = self._polymeric_nodes.get(src_name) | |||
| if node.node_type == NodeTypeEnum.POLYMERIC_SCOPE.value: | |||
| src_name = src_name if not src_node else src_node.polymeric_scope_name | |||
| output_name = self._calc_dummy_node_name(name, src_name) | |||
| polymeric_input.update({output_name: {'edge_type': EdgeTypeEnum.data}}) | |||
| continue | |||
| if not src_node: | |||
| continue | |||
| if not node.name_scope and src_node.name_scope: | |||
| # if current node is in first layer, and the src node is not in | |||
| # the first layer, the src node will not be the polymeric input of current node. | |||
| continue | |||
| if node.name_scope == src_node.name_scope \ | |||
| or node.name_scope.startswith(src_node.name_scope): | |||
| polymeric_input.update( | |||
| {src_node.polymeric_scope_name: {'edge_type': EdgeTypeEnum.data}}) | |||
| node.update_polymeric_input(polymeric_input) | |||
| polymeric_output = {} | |||
| for dst_name in node.output: | |||
| dst_node = self._polymeric_nodes.get(dst_name) | |||
| if node.node_type == NodeTypeEnum.POLYMERIC_SCOPE.value: | |||
| dst_name = dst_name if not dst_node else dst_node.polymeric_scope_name | |||
| output_name = self._calc_dummy_node_name(name, dst_name) | |||
| polymeric_output.update({output_name: {'edge_type': EdgeTypeEnum.data}}) | |||
| continue | |||
| if not dst_node: | |||
| continue | |||
| if not node.name_scope and dst_node.name_scope: | |||
| continue | |||
| if node.name_scope == dst_node.name_scope \ | |||
| or node.name_scope.startswith(dst_node.name_scope): | |||
| polymeric_output.update( | |||
| {dst_node.polymeric_scope_name: {'edge_type': EdgeTypeEnum.data}}) | |||
| node.update_polymeric_output(polymeric_output) | |||
| for name, node in self._polymeric_nodes.items(): | |||
| polymeric_input = {} | |||
| for src_name in node.input: | |||
| output_name = self._calc_dummy_node_name(name, src_name) | |||
| polymeric_input.update({output_name: {'edge_type': EdgeTypeEnum.data}}) | |||
| node.update_polymeric_input(polymeric_input) | |||
| polymeric_output = {} | |||
| for dst_name in node.output: | |||
| polymeric_output = {} | |||
| output_name = self._calc_dummy_node_name(name, dst_name) | |||
| polymeric_output.update({output_name: {'edge_type': EdgeTypeEnum.data}}) | |||
| node.update_polymeric_output(polymeric_output) | |||
| def _calc_dummy_node_name(self, current_node_name, other_node_name): | |||
| """ | |||
| Calc dummy node name. | |||
| Args: | |||
| current_node_name (str): The name of current node. | |||
| other_node_name (str): The target dummy node name. | |||
| Returns: | |||
| str, the dummy node name. | |||
| """ | |||
| name_tmp = other_node_name | |||
| if self._polymeric_nodes.get(other_node_name): | |||
| name_tmp = self._polymeric_nodes[other_node_name].polymeric_scope_name | |||
| name_tmp_list = name_tmp.split('/') | |||
| current_name_list = current_node_name.split('/') | |||
| index = 0 | |||
| min_len = min(len(name_tmp_list), len(current_name_list)) | |||
| for i in range(min_len): | |||
| index = i | |||
| if name_tmp_list[index] != current_name_list[index]: | |||
| break | |||
| dummy_node_name = '/'.join(name_tmp_list[:index+1]) | |||
| return dummy_node_name | |||
| def _build_name_scope_nodes(self): | |||
| """Build name scope node by every node name.""" | |||
| normal_nodes = dict(self._normal_nodes) | |||
| rename_node_names = {} | |||
| for name, node in normal_nodes.items(): | |||
| name_list = name.split('/') | |||
| for i in range(1, len(name_list)): | |||
| name_scope = '/'.join(name_list[:i]) | |||
| name_scope_node = self._normal_nodes.get(name_scope) | |||
| if name_scope_node is None: | |||
| name_scope_node = Node(name_scope, node_id=name_scope) | |||
| name_scope_node.node_type = NodeTypeEnum.NAME_SCOPE.value | |||
| name_scope_node.name_scope = '/'.join(name_list[:i-1]) | |||
| elif name_scope_node.node_type != NodeTypeEnum.NAME_SCOPE.value: | |||
| # The name of this node conflicts with namescope, so rename this node | |||
| old_name = name_scope_node.name | |||
| old_names = name_scope_node.name.split('/') | |||
| old_names[-1] = f'({old_names[-1]})' | |||
| new_name = '/'.join(old_names) | |||
| name_scope_node.name = new_name | |||
| self._normal_nodes.pop(old_name) | |||
| self._normal_nodes.update({new_name: name_scope_node}) | |||
| rename_node_names.update({old_name: new_name}) | |||
| # create new namescope | |||
| name_scope_node = Node(name_scope, node_id=name_scope) | |||
| name_scope_node.node_type = NodeTypeEnum.NAME_SCOPE.value | |||
| name_scope_node.name_scope = '/'.join(name_list[:i-1]) | |||
| # update the input and output of this to namescope node | |||
| name_scope_with_slash = name_scope + '/' | |||
| for src_name, input_attr in node.input.items(): | |||
| if src_name.startswith(name_scope_with_slash): | |||
| continue | |||
| name_scope_node.update_input({src_name: input_attr}) | |||
| for dst_name, output_attr in node.output.items(): | |||
| if dst_name.startswith(name_scope_with_slash): | |||
| continue | |||
| name_scope_node.update_output({dst_name: output_attr}) | |||
| self._normal_nodes.update({name_scope: name_scope_node}) | |||
| if rename_node_names: | |||
| # If existing nodes are renamed, the inputs and outputs of all nodes need to be refreshed | |||
| nodes = [] | |||
| nodes.extend(self._normal_nodes.values()) | |||
| nodes.extend(self._polymeric_nodes.values()) | |||
| for node in nodes: | |||
| attrs = ['input', 'output', 'polymeric_input', 'polymeric_output'] | |||
| for item in attrs: | |||
| tmp_dict = dict(getattr(node, item)) | |||
| for name, value in tmp_dict.items(): | |||
| new_name = rename_node_names.get(name, False) | |||
| if new_name: | |||
| getattr(node, item).pop(name) | |||
| getattr(node, f'update_{item}')({new_name: value}) | |||
| self._calc_subnode_count() | |||
| def _calc_subnode_count(self): | |||
| """Calc the sub node count of scope node.""" | |||
| name_scope_mapping = {} | |||
| for node in self._normal_nodes.values(): | |||
| if node.name_scope: | |||
| count = name_scope_mapping.get(node.name_scope, 0) | |||
| name_scope_mapping[node.name_scope] = count + 1 | |||
| for name_scope, count in name_scope_mapping.items(): | |||
| node = self._normal_nodes[name_scope] | |||
| node.subnode_count = count | |||
| @@ -0,0 +1,274 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This file is used to define the MindSpore graph.""" | |||
| import re | |||
| import copy | |||
| from mindinsight.datavisual.common.log import logger | |||
| from .node import Node | |||
| from .node import NodeTypeEnum | |||
| from .graph import Graph | |||
| from .graph import EdgeTypeEnum | |||
| from .graph import DataTypeEnum | |||
| class MSGraph(Graph): | |||
| """The object describes the MindSpore graph, and it is defined in the anf_if proto file.""" | |||
| def build_graph(self, graph_proto): | |||
| """ | |||
| Build graph by graph proto which refer to `anf_ir_pb2.GraphProto`, and set status to loading. | |||
| Args: | |||
| graph_proto (anf_ir_pb2.GraphProto): Refer to `anf_ir_pb2.GraphProto`. | |||
| """ | |||
| logger.info("Start to build graph.") | |||
| self._build_leaf_nodes(graph_proto) | |||
| self._build_polymeric_nodes() | |||
| self._build_name_scope_nodes() | |||
| self._calc_polymeric_input_output() | |||
| logger.info("Build graph end, normal node count: %s, polymeric node " | |||
| "count: %s.", len(self._normal_nodes), len(self._polymeric_nodes)) | |||
| def _build_leaf_nodes(self, graph_proto): | |||
| """ | |||
| Build leaf node from graph proto. | |||
| Left node will contain operation node, parameter node, const node. | |||
| Args: | |||
| graph_proto (anf_ir_pb2.model_proto.graph): Refer to anf_ir_pb2.model_proto.graph. | |||
| """ | |||
| logger.info("Start to build leaf nodes.") | |||
| leaf_node_id_map_name = {} | |||
| const_nodes_map = {} | |||
| for node_def in graph_proto.node: | |||
| node = self._parse_graph_proto_node(node_def) | |||
| leaf_node_id_map_name.update({node.node_id: node.name}) | |||
| for parameter in graph_proto.parameters: | |||
| node = self._parse_graph_proto_parameter(parameter) | |||
| const_nodes_map.update({node.name: node}) | |||
| for i, const in enumerate(graph_proto.const_vals): | |||
| node_id = 'const_{}'.format(i) | |||
| node = self._parse_graph_proto_const(const, node_id) | |||
| const_nodes_map.update({const.key: node}) | |||
| self._calc_input(leaf_node_id_map_name, graph_proto, const_nodes_map) | |||
| self._calc_output() | |||
| logger.info("Build leaf nodes end, normal nodes count: %s, group count: %s, " | |||
| "left node count: %s.", len(self._normal_nodes), len(self._node_groups), | |||
| len(self._leaf_nodes)) | |||
| def _calc_input(self, leaf_node_id_map_name, graph_proto, const_nodes_map): | |||
| """ | |||
| Calc input for every leaf node. | |||
| Args: | |||
| leaf_node_id_map_name (dict[str, str]): Format is {'node_id': 'node_name'}. | |||
| graph_proto (anf_ir_pb2.model_proto.graph): See anf_ir_pb2.model_proto.graph. | |||
| const_nodes_map (dict[str, Node]): Format is {'node name': <Const node>}. | |||
| """ | |||
| logger.debug("Start to calc input.") | |||
| for node_def in graph_proto.node: | |||
| node_name = leaf_node_id_map_name[node_def.name] | |||
| node = self._leaf_nodes[node_name] | |||
| for input_def in node_def.input: | |||
| edge_type = EdgeTypeEnum.data | |||
| if input_def.type == "CONTROL_EDGE": | |||
| edge_type = EdgeTypeEnum.control | |||
| if const_nodes_map.get(input_def.name): | |||
| const_node = copy.deepcopy(const_nodes_map[input_def.name]) | |||
| src_name = '{}/{}'.format(node.name_scope, input_def.name) | |||
| if not self._normal_nodes.get(src_name): | |||
| const_node.name = src_name | |||
| const_node.name_scope = node.name_scope | |||
| self._normal_nodes.update({src_name: const_node}) | |||
| self._leaf_nodes.update({src_name: const_node}) | |||
| src_node = self._leaf_nodes.get(src_name) | |||
| else: | |||
| src_name = leaf_node_id_map_name.get(input_def.name) | |||
| if not src_name: | |||
| logger.warning("The input_def name '%s' in node '%s' is invalid, " | |||
| "will be ignore.", input_def.name, node_name) | |||
| continue | |||
| src_node = self._leaf_nodes.get(src_name) | |||
| if src_node is None: | |||
| logger.warning("The input '%s' in node '%s' is not in " | |||
| "leaf nodes.", src_name, node_name) | |||
| continue | |||
| input_item = { | |||
| src_name: { | |||
| "shape": src_node.shape, | |||
| "edge_type": edge_type, | |||
| "scope": NodeTypeEnum.NAME_SCOPE.value | |||
| } | |||
| } | |||
| node.update_input(input_item) | |||
| if self._normal_nodes.get(node_name): | |||
| self._normal_nodes[node_name] = node | |||
| else: | |||
| group_name = self._create_group_name(node.name_scope, node.node_type, node.name) | |||
| self._node_groups[group_name][node.name] = node | |||
| def _calc_output(self): | |||
| """Calc output of every node.""" | |||
| logger.debug("Start to calc output.") | |||
| for name, node in self._leaf_nodes.items(): | |||
| if node.node_type == NodeTypeEnum.CONST.value: | |||
| continue | |||
| for src_name, input_attr in node.input.items(): | |||
| src_node = self._leaf_nodes[src_name] | |||
| if src_node.node_type == NodeTypeEnum.CONST.value: | |||
| continue | |||
| if self._normal_nodes.get(src_name): | |||
| self._normal_nodes[src_name].update_output({name: input_attr}) | |||
| else: | |||
| group_name = self._create_group_name(src_node.name_scope, | |||
| src_node.node_type, src_node.name) | |||
| self._node_groups[group_name][src_name].update_output({name: input_attr}) | |||
| def _parse_graph_proto_node(self, node_def): | |||
| """ | |||
| Parse `anf_ir_pb2.model_proto.graph.node_def`, and create a a node. | |||
| Args: | |||
| node_def (anf_ir_pb2.model_proto.graph.node_def): Refer to anf_ir_pb2.model_proto.graph.node_def. | |||
| Returns: | |||
| Node, a `Node` object. | |||
| """ | |||
| node_name = '/'.join([node_def.scope, node_def.op_type])+node_def.name | |||
| node = Node(name=node_name, node_id=node_def.name) | |||
| node.node_type = node_def.op_type | |||
| logger.debug("Foreach graph proto nodes, node id: %s, node name: %s, node def name: %s, " | |||
| "input count: %s", node.node_id, node.name, node_def.name, len(node_def.input)) | |||
| for attr in node_def.attribute: | |||
| node.update_attr({attr.name: str(attr.value)}) | |||
| node.output_i = node_def.output_i | |||
| node.name_scope = node_def.scope | |||
| output_type = node_def.output_type | |||
| shape = self._parse_type_proto(output_type) | |||
| node.shape = shape | |||
| self._leaf_nodes.update({node.name: node}) | |||
| group_name = self._create_group_name(node.name_scope, node.node_type, node.name) | |||
| if group_name is not None: | |||
| node_dict = self._node_groups.get(group_name, {}) | |||
| node_dict.update({node.name: node}) | |||
| self._node_groups.update({group_name: node_dict}) | |||
| else: | |||
| self._normal_nodes.update({node.name: node}) | |||
| return node | |||
| def _parse_graph_proto_parameter(self, parameter): | |||
| """ | |||
| Parse anf_ir_pb2.model_proto.graph.parameter, and create a parameter node. | |||
| Args: | |||
| parameter (anf_ir_pb2.model_proto.graph.parameter): Refer to anf_ir_pb2.model_proto.graph.parameter. | |||
| Returns: | |||
| Node, a `Node` object. | |||
| """ | |||
| node = Node(name=parameter.name, node_id=parameter.name) | |||
| node.node_type = NodeTypeEnum.PARAMETER.value | |||
| node.shape = self._parse_type_proto(parameter.type) | |||
| logger.debug("Foreach graph proto parameters, node id: %s, node name: %s, " | |||
| "node def name: %s", node.node_id, node.name, parameter.name) | |||
| return node | |||
| def _parse_graph_proto_const(self, const, const_node_id): | |||
| """ | |||
| Parse anf_ir_pb2.model_proto.graph.const, and create a const node. | |||
| Args: | |||
| const (anf_ir_pb2.model_proto.graph.const): Refer to anf_ir_pb2.model_proto.graph.const | |||
| const_node_id (str): The id of the new const node, it should be unique in graph. | |||
| Returns: | |||
| Node, a `Node` object. | |||
| """ | |||
| node = Node(name=const.key, node_id=const_node_id) | |||
| node.node_type = NodeTypeEnum.CONST.value | |||
| node.update_attr({const.key: str(const.value)}) | |||
| if const.value.dtype == DataTypeEnum.DT_TENSOR: | |||
| shape = [] | |||
| for dim in const.value.tensor_val.dims: | |||
| shape.append(dim) | |||
| node.shape = shape | |||
| return node | |||
| def _parse_type_proto(self, type_proto): | |||
| """ | |||
| Parse proto's `message TypeProto` to get shape information. | |||
| Args: | |||
| type_proto (anf_ir_pb2.TypeProto): Refer to anf_ir_pb2.TypeProto. | |||
| Returns: | |||
| list, a list of shape. | |||
| """ | |||
| shapes = [] | |||
| if type_proto.HasField('tensor_type'): | |||
| tensor_type = type_proto.tensor_type | |||
| tensor_shape_proto = tensor_type.shape | |||
| for dim in tensor_shape_proto.dim: | |||
| shapes.append(dim.size) | |||
| if type_proto.HasField('sequence_type'): | |||
| for elem_type in type_proto.sequence_type.elem_types: | |||
| shapes.append(self._parse_type_proto(elem_type)) | |||
| return shapes | |||
| def _create_group_name(self, name_scope, node_type, node_name): | |||
| """ | |||
| Create group name by node name, name scope, node type. | |||
| Only nodes that conform to the rules are aggregated. | |||
| Args: | |||
| name_scope (str): The node name scope. | |||
| node_type (str): The node type. | |||
| node_name (str): The node name. | |||
| Returns: | |||
| Optional[str], if match the rules will return a group name, else return None. | |||
| """ | |||
| group_types = ['Reshape', 'Variable'] | |||
| pattern_names = r'.*?/Cast-op\d+' | |||
| if node_type in group_types: | |||
| group_name = name_scope + '/' + node_type if name_scope else node_type | |||
| return group_name | |||
| if node_type == 'FrameworkOp' and re.search(pattern_names, node_name): | |||
| group_name = name_scope + '/' + 'Cast-op' if name_scope else 'Cast-op' | |||
| return group_name | |||
| return None | |||
| @@ -0,0 +1,211 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| This file is used to define the node of graph and associated base types. | |||
| """ | |||
| from enum import Enum | |||
| class NodeTypeEnum(Enum): | |||
| """Node type enum. The following types are new to our custom.""" | |||
| NAME_SCOPE = 'name_scope' | |||
| POLYMERIC_SCOPE = 'polymeric_scope' | |||
| PARAMETER = 'Parameter' | |||
| CONST = 'Const' | |||
| class Node: | |||
| """ | |||
| Define a node object. | |||
| Args: | |||
| name (str): Name of new node. | |||
| node_id (str): The id of this node, and node id is unique in graph. | |||
| """ | |||
| def __init__(self, name, node_id): | |||
| self._node_id = node_id | |||
| self._name = name | |||
| self._type = "" | |||
| self._attr = dict() | |||
| self._input = dict() | |||
| self._output_i = -1 | |||
| self._output = {} | |||
| self._polymeric_input = {} | |||
| self._polymeric_output = {} | |||
| self._polymeric_scope_name = "" | |||
| self._subnode_count = 0 | |||
| self._name_scope = "" | |||
| self.shape = [] | |||
| def to_dict(self): | |||
| """Converts the node object to dictionary format.""" | |||
| return { | |||
| 'name': self._name, | |||
| 'type': self._type, | |||
| 'attr': self._attr, | |||
| 'input': self._input, | |||
| 'output_i': self._output_i, | |||
| 'output': self._output, | |||
| 'polymeric_input': self._polymeric_input, | |||
| 'polymeric_output': self._polymeric_output, | |||
| 'subnode_count': self._subnode_count, | |||
| 'polymeric_scope_name': self._polymeric_scope_name | |||
| } | |||
| @property | |||
| def node_id(self): | |||
| """The id of this node, and id is unique in graph.""" | |||
| return self._node_id | |||
| @property | |||
| def name(self): | |||
| """Get node name.""" | |||
| return self._name | |||
| @name.setter | |||
| def name(self, name): | |||
| """Set node name.""" | |||
| self._name = name | |||
| @property | |||
| def node_type(self): | |||
| """Get node type.""" | |||
| return self._type | |||
| @node_type.setter | |||
| def node_type(self, node_type): | |||
| """Set node type.""" | |||
| self._type = node_type | |||
| @property | |||
| def attr(self): | |||
| """Get node attr.""" | |||
| return self._attr | |||
| def update_attr(self, attr_dict): | |||
| """ | |||
| Update node attr. | |||
| Args: | |||
| attr_dict (dict[str, str]): Format is {'<key>': '<value>'}. | |||
| """ | |||
| self._attr.update(attr_dict) | |||
| @property | |||
| def input(self): | |||
| """ | |||
| Get all input of current node. | |||
| Returns: | |||
| dict[str, dict], format is {'<src_name>': {'shape': [], 'edge_type', 'scope'}}. | |||
| """ | |||
| return self._input | |||
| def update_input(self, input_dict): | |||
| """ | |||
| Update input. | |||
| Args: | |||
| input_dict (dict[str, dict]): Format is {'<src_name>': {'shape': [], 'edge_type', 'scope'}}. | |||
| """ | |||
| self._input.update(input_dict) | |||
| @property | |||
| def output_i(self): | |||
| """The memory address of this node when it is in run time.""" | |||
| return self._output_i | |||
| @output_i.setter | |||
| def output_i(self, output_i): | |||
| """Set memory address.""" | |||
| self._output_i = output_i | |||
| @property | |||
| def polymeric_input(self): | |||
| """ | |||
| The polymeric input is the input of the polymeric nodes. | |||
| Returns: | |||
| dict[str, dict], format is {'<src_name>': {'edge_type': '<value>'}}. | |||
| """ | |||
| return self._polymeric_input | |||
| def update_polymeric_input(self, polymeric_input): | |||
| """The polymeric input is the input of the polymeric nodes.""" | |||
| self._polymeric_input.update(polymeric_input) | |||
| @property | |||
| def output(self): | |||
| """The output node of this node.""" | |||
| return self._output | |||
| def update_output(self, output): | |||
| """ | |||
| Update output node. | |||
| Args: | |||
| output (dict[str, TypedDict('NodeType', {'type': str})]): Format | |||
| is {"<node_name>": {"type": "<node type>"}}. | |||
| """ | |||
| self._output.update(output) | |||
| @property | |||
| def polymeric_output(self): | |||
| """Get polymeric output.""" | |||
| return self._polymeric_output | |||
| def update_polymeric_output(self, polymeric_output): | |||
| """ | |||
| Update polymeric output. | |||
| Args: | |||
| polymeric_output (dict[str, dict): Format is {dst_node.polymeric_scope_name: | |||
| {'edge_type': EdgeTypeEnum.data}}). | |||
| """ | |||
| self._polymeric_output.update(polymeric_output) | |||
| @property | |||
| def polymeric_scope_name(self): | |||
| """Get polymeric scope name.""" | |||
| return self._polymeric_scope_name | |||
| @polymeric_scope_name.setter | |||
| def polymeric_scope_name(self, name): | |||
| """Set polymeric scope name.""" | |||
| self._polymeric_scope_name = name | |||
| @property | |||
| def subnode_count(self): | |||
| """The sub node count of this node, if this node is a scope node, this count will not be zero.""" | |||
| return self._subnode_count | |||
| @subnode_count.setter | |||
| def subnode_count(self, count): | |||
| """Set sub node count.""" | |||
| self._subnode_count = count | |||
| @property | |||
| def name_scope(self): | |||
| """Get name scope of this node.""" | |||
| return self._name_scope | |||
| @name_scope.setter | |||
| def name_scope(self, name_scope): | |||
| """Set name scope.""" | |||
| self._name_scope = name_scope | |||
| def __str__(self): | |||
| return f'<Node, name: {self._name}, type: {self._type}>' | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,246 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Data Loader Generator. | |||
| This module generate loaders from summary logs. | |||
| """ | |||
| import os | |||
| from mindinsight.datavisual.common.log import logger | |||
| from mindinsight.datavisual.data_access.file_handler import FileHandler | |||
| from mindinsight.datavisual.data_transform.data_loader import DataLoader | |||
| from mindinsight.datavisual.data_transform.loader_generators.loader_generator import MAX_DATA_LOADER_SIZE | |||
| from mindinsight.datavisual.data_transform.loader_generators.loader_struct import LoaderStruct | |||
| from mindinsight.datavisual.data_transform.loader_generators.loader_generator import LoaderGenerator | |||
| from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher | |||
| from mindinsight.utils.exceptions import ParamValueError | |||
| class DataLoaderGenerator(LoaderGenerator): | |||
| """ | |||
| DataLoaderGenerator generate a loader_dict of loader from summary logs. | |||
| Each loader helps deal the data of the events. | |||
| It helps DataManager to generate loaders. | |||
| """ | |||
| def __init__(self, summary_path): | |||
| """ | |||
| Init DataLoaderGenerator. | |||
| Args: | |||
| summary_path (str): A directory path, e.g. '/data/ImageNet/'. | |||
| """ | |||
| self._summary_path = self._check_and_normalize_summary_path(summary_path) | |||
| self._summary_watcher = SummaryWatcher() | |||
| def _check_and_normalize_summary_path(self, summary_path): | |||
| """ | |||
| Check and normalize summary path. | |||
| Args: | |||
| summary_path (str): A directory path, e.g. '/data/ImageNet/'. | |||
| Returns: | |||
| str, normalized summary path. | |||
| """ | |||
| if summary_path is None: | |||
| logger.warning("Summary path is None. It will not init data loader generator.") | |||
| raise ParamValueError("Summary path is None.") | |||
| summary_path = os.path.realpath(summary_path) | |||
| return summary_path | |||
| def generate_loaders(self, loader_pool): | |||
| """ | |||
| Generate loader from summary path, if summary path is empty, will return empty list. | |||
| Args: | |||
| loader_pool (dict[str, LoaderStruct]): Current loader pool in data_manager. | |||
| Returns: | |||
| dict[str, LoaderStruct], a dict of `Loader`. | |||
| """ | |||
| loader_dict = {} | |||
| if not FileHandler.exists(self._summary_path): | |||
| logger.warning("Summary path does not exist. It will not start loading events data. " | |||
| "Current path is %r.", self._summary_path) | |||
| return loader_dict | |||
| dir_map_mtime_dict = {} | |||
| min_modify_time = None | |||
| summaries_info = self._summary_watcher.list_summary_directories(self._summary_path) | |||
| for item in summaries_info: | |||
| relative_path = item.get("relative_path") | |||
| current_dir = FileHandler.join(self._summary_path, relative_path) | |||
| dataloader = DataLoader(current_dir) | |||
| if not dataloader.has_valid_files(): | |||
| logger.debug("Can not find valid train log file in folder %s , " | |||
| "will ignore.", relative_path) | |||
| continue | |||
| modify_time = item.get("update_time").timestamp() | |||
| # if loader exists in loader pool and newer time, update its time | |||
| loader_id = self._generate_loader_id(relative_path) | |||
| loader = loader_pool.get(loader_id) | |||
| if loader is not None and loader.latest_update_time > modify_time: | |||
| modify_time = loader.latest_update_time | |||
| if not min_modify_time: | |||
| # The first load, init min modify time | |||
| min_modify_time = modify_time | |||
| # We need to find `MAX_DATA_LOADER_SIZE` newly modified folders. | |||
| if len(dir_map_mtime_dict) < MAX_DATA_LOADER_SIZE: | |||
| if modify_time < min_modify_time: | |||
| min_modify_time = modify_time | |||
| dir_map_mtime_dict.update({relative_path: modify_time}) | |||
| else: | |||
| if modify_time >= min_modify_time: | |||
| dir_map_mtime_dict.update({relative_path: modify_time}) | |||
| sorted_dir_tuple = sorted(dir_map_mtime_dict.items(), | |||
| key=lambda d: d[1])[-MAX_DATA_LOADER_SIZE:] | |||
| for relative_path, modify_time in sorted_dir_tuple: | |||
| loader_id = self._generate_loader_id(relative_path) | |||
| loader = self._generate_loader_by_relative_path(relative_path) | |||
| loader_dict.update({loader_id: loader}) | |||
| return loader_dict | |||
| def _generate_loader_by_relative_path(self, relative_path): | |||
| """ | |||
| Generate loader by relative path. | |||
| Args: | |||
| relative_path (str): Relative path of a summary directory, e.g. './log1'. | |||
| Returns: | |||
| dict[str, LoaderStruct], a dict of `Loader`. | |||
| """ | |||
| current_dir = os.path.realpath(FileHandler.join(self._summary_path, relative_path)) | |||
| data_loader = DataLoader(current_dir) | |||
| loader_id = self._generate_loader_id(relative_path) | |||
| loader = LoaderStruct(loader_id=loader_id, | |||
| name=self._generate_loader_name(relative_path), | |||
| path=current_dir, | |||
| latest_update_time=FileHandler.file_stat(current_dir).mtime, | |||
| data_loader=data_loader) | |||
| return loader | |||
| def _generate_loader_id(self, relative_path): | |||
| """ | |||
| Generate loader id from relative path. | |||
| Args: | |||
| relative_path (str): Relative path of a summary directory, e.g. './log1'. | |||
| Returns: | |||
| str, loader_id for `Loader`. | |||
| """ | |||
| loader_id = relative_path | |||
| return loader_id | |||
| def _generate_loader_name(self, relative_path): | |||
| """ | |||
| Generate loader name from relative path. | |||
| Args: | |||
| relative_path (str): Relative path of a summary directory, e.g. './log1'. | |||
| Returns: | |||
| str, loader_name for `Loader`. | |||
| """ | |||
| loader_name = relative_path | |||
| return loader_name | |||
| def _get_relative_path_from_train_id(self, train_id): | |||
| """ | |||
| Get relative from train_id. | |||
| Args: | |||
| train_id (str): Train ID of a summary directory, e.g. './log1'. | |||
| Returns: | |||
| str, relative path of `Loader`. | |||
| """ | |||
| relative_path = train_id | |||
| return relative_path | |||
| def check_train_job_exist(self, train_id): | |||
| """ | |||
| Check if train job exists. | |||
| Args: | |||
| train_id (str): Train ID of a summary directory, e.g. './log1'. | |||
| Returns: | |||
| bool, if train job exists, return True. | |||
| """ | |||
| if not self._is_train_id_valid(train_id): | |||
| return False | |||
| relative_path = self._get_relative_path_from_train_id(train_id) | |||
| if self._summary_watcher.is_summary_directory(self._summary_path, relative_path): | |||
| return True | |||
| return False | |||
| def _is_train_id_valid(self, train_id): | |||
| """ | |||
| Check if train_id is valid. | |||
| Args: | |||
| train_id (str): Train ID of a summary directory, e.g. './log1'. | |||
| Returns: | |||
| bool, if train id is valid, return True. | |||
| """ | |||
| if not train_id.startswith('./'): | |||
| logger.warning("The train_id does not start with './'.") | |||
| return False | |||
| if len(train_id.split("/")) > 2: | |||
| logger.warning("The train_id contains multiple '/'.") | |||
| return False | |||
| return True | |||
| def generate_loader_by_train_id(self, train_id): | |||
| """ | |||
| Generate loader by train_id. | |||
| Args: | |||
| train_id (str): Train ID of a summary directory, e.g. './log1'. | |||
| Returns: | |||
| dict[str, LoaderStruct], a dict of `Loader`. | |||
| """ | |||
| relative_path = self._get_relative_path_from_train_id(train_id) | |||
| loader = self._generate_loader_by_relative_path(relative_path) | |||
| return loader | |||
| @@ -0,0 +1,60 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Base loader generator.""" | |||
| from abc import abstractmethod | |||
| MAX_DATA_LOADER_SIZE = 15 | |||
| class LoaderGenerator: | |||
| """Base loader generator for loader generators.""" | |||
| @abstractmethod | |||
| def generate_loaders(self, loader_pool): | |||
| """ | |||
| Abstract method for generating loaders. | |||
| Args: | |||
| loader_pool (dict[str, LoaderStruct]): Current loader pool in data_manager. | |||
| Returns: | |||
| dict[str, LoaderStruct], a dict of `Loader`. | |||
| """ | |||
| @abstractmethod | |||
| def check_train_job_exist(self, train_id): | |||
| """ | |||
| Abstract method for checking if train job exists. | |||
| Args: | |||
| train_id (str): Train ID. | |||
| Returns: | |||
| bool, if train job exists, return True. | |||
| """ | |||
| @abstractmethod | |||
| def generate_loader_by_train_id(self, train_id): | |||
| """ | |||
| Abstract method for generating loader by train id. | |||
| Args: | |||
| train_id (str): Train ID. | |||
| Returns: | |||
| dict[str, LoaderStruct], a dict of `Loader`. | |||
| """ | |||
| @@ -0,0 +1,64 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Loader struct.""" | |||
| class LoaderStruct: | |||
| """ | |||
| Loader to save summary info. | |||
| LoaderStruct contains: loader_id, name, path, latest_update_time, status, data_loader. | |||
| """ | |||
| def __init__(self, loader_id, name, path, latest_update_time, data_loader): | |||
| self._loader_id = loader_id | |||
| self._name = name | |||
| self._path = path | |||
| self._latest_update_time = latest_update_time | |||
| self._data_loader = data_loader | |||
| @property | |||
| def loader_id(self): | |||
| """Get loader ID.""" | |||
| return self._loader_id | |||
| @property | |||
| def name(self): | |||
| """Get loader name.""" | |||
| return self._name | |||
| @property | |||
| def latest_update_time(self): | |||
| """Get the latest update time of loader.""" | |||
| return self._latest_update_time | |||
| @property | |||
| def data_loader(self): | |||
| """Get data loader.""" | |||
| return self._data_loader | |||
| @latest_update_time.setter | |||
| def latest_update_time(self, latest_update_time): | |||
| """Set the latest update time of loader.""" | |||
| self._latest_update_time = latest_update_time | |||
| def to_dict(self): | |||
| """Transform LoaderStruct to dict.""" | |||
| return dict( | |||
| loader_id=self._loader_id, | |||
| name=self._name, | |||
| path=self._path, | |||
| latest_update_time=self._latest_update_time, | |||
| data_loader=self._data_loader | |||
| ) | |||
| @@ -0,0 +1,373 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| DataLoader for MindSpore data. | |||
| This module is used to load the MindSpore training log file. | |||
| Each instance will read an entire run, a run can contain one or | |||
| more log file. | |||
| """ | |||
| import re | |||
| import struct | |||
| from google.protobuf.message import DecodeError | |||
| from google.protobuf.text_format import ParseError | |||
| from mindinsight.datavisual.common import exceptions | |||
| from mindinsight.datavisual.common.enums import PluginNameEnum | |||
| from mindinsight.datavisual.common.log import logger | |||
| from mindinsight.datavisual.data_access.file_handler import FileHandler | |||
| from mindinsight.datavisual.data_transform.events_data import EventsData | |||
| from mindinsight.datavisual.data_transform.events_data import TensorEvent | |||
| from mindinsight.datavisual.data_transform.graph import MSGraph | |||
| from mindinsight.datavisual.proto_files import mindinsight_summary_pb2 as summary_pb2 | |||
| from mindinsight.datavisual.proto_files import mindinsight_anf_ir_pb2 as anf_ir_pb2 | |||
| from mindinsight.datavisual.utils import crc32 | |||
| from mindinsight.utils.exceptions import UnknownError | |||
| HEADER_SIZE = 8 | |||
| CRC_STR_SIZE = 4 | |||
| class MSDataLoader: | |||
| """ | |||
| MSDataLoader class, load MindSpore event data. | |||
| Args: | |||
| summary_dir (str): Log directory. | |||
| """ | |||
| def __init__(self, summary_dir): | |||
| self._init_instance(summary_dir) | |||
| def _init_instance(self, summary_dir): | |||
| self._summary_dir = summary_dir | |||
| self._valid_filenames = [] | |||
| self._events_data = EventsData() | |||
| self._latest_summary_filename = '' | |||
| self._latest_summary_file_size = 0 | |||
| self._summary_file_handler = None | |||
| self._latest_pb_file_mtime = 0 | |||
| def get_events_data(self): | |||
| """Return events data read from log file.""" | |||
| return self._events_data | |||
| def _check_files_deleted(self, filenames, old_filenames): | |||
| """ | |||
| Check the file list for updates. | |||
| Args: | |||
| filenames (list[str]): The latest files list. | |||
| old_filenames (list[str]): List of old files. | |||
| """ | |||
| deleted_files = set(old_filenames) - set(filenames) | |||
| if deleted_files: | |||
| logger.warning("There are some files has been deleted, " | |||
| "we will reload all files in path %s.", self._summary_dir) | |||
| self._init_instance(self._summary_dir) | |||
| def load(self): | |||
| """ | |||
| Load all log valid files. | |||
| When the file is reloaded, it will continue to load from where it left off. | |||
| """ | |||
| logger.debug("Start to load data in ms data loader.") | |||
| filenames = self.filter_valid_files() | |||
| if not filenames: | |||
| logger.warning("No valid files can be loaded, summary_dir: %s.", self._summary_dir) | |||
| raise exceptions.SummaryLogPathInvalid() | |||
| old_filenames = list(self._valid_filenames) | |||
| self._valid_filenames = filenames | |||
| self._check_files_deleted(filenames, old_filenames) | |||
| self._load_summary_files(self._valid_filenames) | |||
| self._load_pb_files(self._valid_filenames) | |||
| def _load_summary_files(self, filenames): | |||
| """ | |||
| Load summary file and parse file content. | |||
| Args: | |||
| filenames (list[str]): File name list. | |||
| """ | |||
| summary_files = self._filter_summary_files(filenames) | |||
| summary_files = self._sorted_summary_files(summary_files) | |||
| for filename in summary_files: | |||
| if self._latest_summary_filename and \ | |||
| (self._compare_summary_file(self._latest_summary_filename, filename)): | |||
| continue | |||
| file_path = FileHandler.join(self._summary_dir, filename) | |||
| if filename != self._latest_summary_filename: | |||
| self._summary_file_handler = FileHandler(file_path, 'rb') | |||
| self._latest_summary_filename = filename | |||
| self._latest_summary_file_size = 0 | |||
| new_size = FileHandler.file_stat(file_path).size | |||
| if new_size == self._latest_summary_file_size: | |||
| continue | |||
| self._latest_summary_file_size = new_size | |||
| try: | |||
| self._load_single_file(self._summary_file_handler) | |||
| except UnknownError as ex: | |||
| logger.warning("Parse summary file failed, detail: %r," | |||
| "file path: %s.", str(ex), file_path) | |||
| def _load_single_file(self, file_handler): | |||
| """ | |||
| Load a log file data. | |||
| Args: | |||
| file_handler (FileHandler): A file handler. | |||
| """ | |||
| logger.debug("Load single summary file, file path: %s.", file_handler.file_path) | |||
| while True: | |||
| start_offset = file_handler.offset | |||
| try: | |||
| event_str = self._event_load(file_handler) | |||
| if event_str is None: | |||
| file_handler.reset_offset(start_offset) | |||
| break | |||
| event = summary_pb2.Event.FromString(event_str) | |||
| self._event_parse(event) | |||
| except exceptions.CRCFailedError: | |||
| file_handler.reset_offset(start_offset) | |||
| logger.warning("Check crc faild and ignore this file, file_path=%s, " | |||
| "offset=%s.", file_handler.file_path, file_handler.offset) | |||
| break | |||
| except (OSError, DecodeError, exceptions.MindInsightException) as ex: | |||
| logger.warning("Parse log file fail, and ignore this file, detail: %r," | |||
| "file path: %s.", str(ex), file_handler.file_path) | |||
| break | |||
| except Exception as ex: | |||
| logger.exception(ex) | |||
| raise UnknownError(str(ex)) | |||
| def _event_load(self, file_handler): | |||
| """ | |||
| Load binary string to event string. | |||
| Args: | |||
| file_handler (FileHandler): A file handler. | |||
| Returns: | |||
| bytes, MindSpore event in bytes. | |||
| """ | |||
| # read the header | |||
| header_str = file_handler.read(HEADER_SIZE) | |||
| if not header_str: | |||
| logger.info("End of file, file_path=%s.", file_handler.file_path) | |||
| return None | |||
| header_crc_str = file_handler.read(CRC_STR_SIZE) | |||
| if not header_crc_str: | |||
| header_crc_str = '' | |||
| if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE: | |||
| logger.warning("Check header size and crc, record truncated at offset %s, " | |||
| "file_path=%s.", file_handler.offset, file_handler.file_path) | |||
| return None | |||
| if crc32.GetValueFromStr(header_crc_str) != crc32.GetMaskCrc32cValue(header_str, HEADER_SIZE): | |||
| raise exceptions.CRCFailedError() | |||
| # read the event body if integrity of header is verified | |||
| header = struct.unpack('Q', header_str) | |||
| event_len = int(header[0]) | |||
| event_str = file_handler.read(event_len) | |||
| if not event_str: | |||
| event_str = '' | |||
| event_crc_str = file_handler.read(CRC_STR_SIZE) | |||
| if not event_crc_str: | |||
| event_crc_str = '' | |||
| if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE: | |||
| logger.warning("Check event crc, record truncated at offset %d, file_path: %s.", | |||
| file_handler.offset, file_handler.file_path) | |||
| return None | |||
| if crc32.GetValueFromStr(event_crc_str) != crc32.GetMaskCrc32cValue(event_str, event_len): | |||
| raise exceptions.CRCFailedError() | |||
| return event_str | |||
| def _event_parse(self, event): | |||
| """ | |||
| Transform `Event` data to tensor_event and update it to EventsData. | |||
| Args: | |||
| event (Event): Message event in summary proto, data read from file handler. | |||
| """ | |||
| if event.HasField('summary'): | |||
| for value in event.summary.value: | |||
| if value.HasField('scalar_value'): | |||
| tag = '{}/{}'.format(value.tag, PluginNameEnum.SCALAR.value) | |||
| tensor_event = TensorEvent(wall_time=event.wall_time, | |||
| step=event.step, | |||
| tag=tag, | |||
| plugin_name=PluginNameEnum.SCALAR.value, | |||
| value=value.scalar_value) | |||
| self._events_data.add_tensor_event(tensor_event) | |||
| if value.HasField('image'): | |||
| tag = '{}/{}'.format(value.tag, PluginNameEnum.IMAGE.value) | |||
| tensor_event = TensorEvent(wall_time=event.wall_time, | |||
| step=event.step, | |||
| tag=tag, | |||
| plugin_name=PluginNameEnum.IMAGE.value, | |||
| value=value.image) | |||
| self._events_data.add_tensor_event(tensor_event) | |||
| if event.HasField('graph_def'): | |||
| graph_proto = event.graph_def | |||
| graph = MSGraph() | |||
| graph.build_graph(graph_proto) | |||
| tensor_event = TensorEvent(wall_time=event.wall_time, | |||
| step=event.step, | |||
| tag=self._latest_summary_filename, | |||
| plugin_name=PluginNameEnum.GRAPH.value, | |||
| value=graph) | |||
| try: | |||
| graph_tags = self._events_data.list_tags_by_plugin(PluginNameEnum.GRAPH.value) | |||
| except KeyError: | |||
| graph_tags = [] | |||
| summary_tags = self._filter_summary_files(graph_tags) | |||
| for tag in summary_tags: | |||
| self._events_data.delete_tensor_event(tag) | |||
| self._events_data.add_tensor_event(tensor_event) | |||
| def filter_valid_files(self): | |||
| """ | |||
| Gets a list of valid files from the given file path. | |||
| Returns: | |||
| list[str], file name list. | |||
| """ | |||
| filenames = [] | |||
| for filename in FileHandler.list_dir(self._summary_dir): | |||
| if FileHandler.is_file(FileHandler.join(self._summary_dir, filename)): | |||
| filenames.append(filename) | |||
| valid_filenames = [] | |||
| valid_filenames.extend(self._filter_summary_files(filenames)) | |||
| valid_filenames.extend(self._filter_pb_files(filenames)) | |||
| return list(set(valid_filenames)) | |||
| @staticmethod | |||
| def _filter_summary_files(filenames): | |||
| """ | |||
| Gets a list of summary files. | |||
| Args: | |||
| filenames (list[str]): File name list, like [filename1, filename2]. | |||
| Returns: | |||
| list[str], filename list. | |||
| """ | |||
| return list(filter( | |||
| lambda filename: (re.search(r'summary\.\d+', filename) | |||
| and not filename.endswith("_lineage")), filenames)) | |||
| @staticmethod | |||
| def _compare_summary_file(current_file, dst_file): | |||
| """ | |||
| Compare the creation times of the two summary log files. | |||
| Args: | |||
| current_file (str): Must be the summary log file path. | |||
| dst_file (str): Must be the summary log file path. | |||
| Returns: | |||
| bool, returns True if the current file is new, or False if not. | |||
| """ | |||
| current_time = int(re.search(r'summary\.(\d+)', current_file)[1]) | |||
| dst_time = int(re.search(r'summary\.(\d+)', dst_file)[1]) | |||
| if current_time > dst_time or (current_time == dst_time and current_file > dst_file): | |||
| return True | |||
| return False | |||
| @staticmethod | |||
| def _sorted_summary_files(summary_files): | |||
| """Sort by creating time increments and filenames decrement.""" | |||
| filenames = sorted(summary_files, | |||
| key=lambda filename: (-int(re.search(r'summary\.(\d+)', filename)[1]), filename), | |||
| reverse=True) | |||
| return filenames | |||
| @staticmethod | |||
| def _filter_pb_files(filenames): | |||
| """ | |||
| Get a list of pb files. | |||
| Args: | |||
| filenames (list[str]): File name list, like [filename1, filename2]. | |||
| Returns: | |||
| list[str], filename list. | |||
| """ | |||
| return list(filter(lambda filename: re.search(r'\.pb$', filename), filenames)) | |||
| def _load_pb_files(self, filenames): | |||
| """ | |||
| Load and parse the pb files. | |||
| Args: | |||
| filenames (list[str]): File name list, like [filename1, filename2]. | |||
| Returns: | |||
| list[str], filename list. | |||
| """ | |||
| pb_filenames = self._filter_pb_files(filenames) | |||
| pb_filenames = sorted(pb_filenames, key=lambda file: FileHandler.file_stat( | |||
| FileHandler.join(self._summary_dir, file)).mtime) | |||
| for filename in pb_filenames: | |||
| mtime = FileHandler.file_stat(FileHandler.join(self._summary_dir, filename)).mtime | |||
| if mtime <= self._latest_pb_file_mtime: | |||
| continue | |||
| self._latest_pb_file_mtime = mtime | |||
| self._parse_pb_file(filename) | |||
| def _parse_pb_file(self, filename): | |||
| """ | |||
| Parse pb file and write content to `EventsData`. | |||
| Args: | |||
| filename (str): The file path of pb file. | |||
| """ | |||
| file_path = FileHandler.join(self._summary_dir, filename) | |||
| logger.info("Start to load graph from pb file, file path: %s.", file_path) | |||
| filehandler = FileHandler(file_path) | |||
| model_proto = anf_ir_pb2.ModelProto() | |||
| try: | |||
| model_proto.ParseFromString(filehandler.read()) | |||
| except ParseError: | |||
| logger.warning("The given file is not a valid pb file, file path: %s.", file_path) | |||
| return | |||
| graph = MSGraph() | |||
| graph.build_graph(model_proto.graph) | |||
| tensor_event = TensorEvent(wall_time=FileHandler.file_stat(file_path), | |||
| step=0, | |||
| tag=filename, | |||
| plugin_name=PluginNameEnum.GRAPH.value, | |||
| value=graph) | |||
| self._events_data.add_tensor_event(tensor_event) | |||
| @@ -0,0 +1,108 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """A reservoir sampling on the values.""" | |||
| import random | |||
| import threading | |||
| from mindinsight.utils.exceptions import ParamValueError | |||
| class Reservoir: | |||
| """ | |||
| A container based on Reservoir Sampling algorithm. | |||
| The newly added sample will be preserved. If the container is full, an old | |||
| sample will be replaced randomly. The probability of each sample being | |||
| replaced is the same. | |||
| """ | |||
| def __init__(self, size): | |||
| """ | |||
| A Container constructor which create a new Reservoir. | |||
| Args: | |||
| size (int): Container Size. If the size is 0, the container is not limited. | |||
| Raises: | |||
| ValueError: If size is negative integer. | |||
| """ | |||
| if not isinstance(size, (int,)) or size < 0: | |||
| raise ParamValueError('size must be nonnegative integer, was %s' % size) | |||
| self._samples_max_size = size | |||
| self._samples = [] | |||
| self._sample_counter = 0 | |||
| self._sample_selector = random.Random(0) | |||
| self._mutex = threading.Lock() | |||
| def samples(self): | |||
| """Return all stored samples.""" | |||
| with self._mutex: | |||
| return list(self._samples) | |||
| def add_sample(self, sample): | |||
| """ | |||
| Add a sample to Reservoir. | |||
| Replace the old sample when the capacity is full. | |||
| New added samples are guaranteed to be added to the reservoir. | |||
| Args: | |||
| sample (Any): The sample to add to the Reservoir. | |||
| """ | |||
| with self._mutex: | |||
| if len(self._samples) < self._samples_max_size or self._samples_max_size == 0: | |||
| self._samples.append(sample) | |||
| else: | |||
| # Use the Reservoir Sampling algorithm to replace the old sample. | |||
| rand_int = self._sample_selector.randint( | |||
| 0, self._sample_counter) | |||
| if rand_int < self._samples_max_size: | |||
| self._samples.pop(rand_int) | |||
| self._samples.append(sample) | |||
| else: | |||
| self._samples[-1] = sample | |||
| self._sample_counter += 1 | |||
| def remove_sample(self, filter_fun): | |||
| """ | |||
| Remove the samples from Reservoir that do not meet the filter criteria. | |||
| Args: | |||
| filter_fun (Callable[..., Any]): Determines whether a sample meets | |||
| the deletion condition. | |||
| Returns: | |||
| int, the number of samples removed. | |||
| """ | |||
| remove_size = 0 | |||
| with self._mutex: | |||
| before_remove_size = len(self._samples) | |||
| if before_remove_size > 0: | |||
| # remove samples that meet the filter criteria. | |||
| self._samples = list(filter(filter_fun, self._samples)) | |||
| after_remove_size = len(self._samples) | |||
| remove_size = before_remove_size - after_remove_size | |||
| if remove_size > 0: | |||
| # update _sample_counter when samples has been removed. | |||
| sample_remaining_rate = float( | |||
| after_remove_size) / before_remove_size | |||
| self._sample_counter = int( | |||
| round(self._sample_counter * sample_remaining_rate)) | |||
| return remove_size | |||
| @@ -0,0 +1,344 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Summary watcher module.""" | |||
| import os | |||
| import re | |||
| import datetime | |||
| from pathlib import Path | |||
| from mindinsight.datavisual.common.log import logger | |||
| from mindinsight.datavisual.common.validation import Validation | |||
| from mindinsight.utils.exceptions import FileSystemPermissionError | |||
| class SummaryWatcher: | |||
| """SummaryWatcher class.""" | |||
| SUMMARY_FILENAME_REGEX = r'summary\.(?P<timestamp>\d+)' | |||
| PB_FILENAME_REGEX = r'\.pb$' | |||
| MAX_SUMMARY_DIR_COUNT = 999 | |||
| # scan at most 20000 files/directories (approximately 1 seconds) | |||
| # if overall=False in SummaryWatcher.list_summary_directories | |||
| # to avoid long-time blocking | |||
| MAX_SCAN_COUNT = 20000 | |||
| def list_summary_directories(self, summary_base_dir, overall=True): | |||
| """ | |||
| List summary directories within base directory. | |||
| Args: | |||
| summary_base_dir (str): Path of summary base directory. | |||
| Returns: | |||
| list, list of summary directory info, each of which including the following attributes. | |||
| - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR, | |||
| starting with "./". | |||
| - create_time (datetime): Creation time of summary file. | |||
| - update_time (datetime): Modification time of summary file. | |||
| Examples: | |||
| >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher | |||
| >>> summary_watcher = SummaryWatcher() | |||
| >>> directories = summary_watcher.list_summary_directories('/summary/base/dir') | |||
| """ | |||
| if self._contains_null_byte(summary_base_dir=summary_base_dir): | |||
| return [] | |||
| if not os.path.exists(summary_base_dir): | |||
| logger.warning('Path of summary base directory not exists.') | |||
| return [] | |||
| if not os.path.isdir(summary_base_dir): | |||
| logger.warning('Path of summary base directory is not a valid directory.') | |||
| return [] | |||
| summary_dict = {} | |||
| scan_count = 0 | |||
| try: | |||
| entries = os.scandir(summary_base_dir) | |||
| except PermissionError: | |||
| logger.error('Path of summary base directory is not accessible.') | |||
| raise FileSystemPermissionError('Path of summary base directory is not accessible.') | |||
| for entry in entries: | |||
| if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT: | |||
| break | |||
| relative_path = os.path.join('.', '') | |||
| if entry.is_symlink(): | |||
| pass | |||
| elif entry.is_file(): | |||
| self._update_summary_dict(summary_dict, relative_path, entry) | |||
| elif entry.is_dir(): | |||
| full_path = os.path.realpath(os.path.join(summary_base_dir, entry.name)) | |||
| try: | |||
| subdir_entries = os.scandir(full_path) | |||
| except PermissionError: | |||
| logger.warning('Path of %s under summary base directory is not accessible.', entry.name) | |||
| else: | |||
| for subdir_entry in subdir_entries: | |||
| if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT: | |||
| break | |||
| subdir_relative_path = os.path.join('.', entry.name) | |||
| if subdir_entry.is_symlink(): | |||
| pass | |||
| elif subdir_entry.is_file(): | |||
| self._update_summary_dict(summary_dict, subdir_relative_path, subdir_entry) | |||
| scan_count += 1 | |||
| if not overall and scan_count >= self.MAX_SCAN_COUNT: | |||
| break | |||
| scan_count += 1 | |||
| if not overall and scan_count >= self.MAX_SCAN_COUNT: | |||
| logger.info('Stop further scanning due to overall is False and ' | |||
| 'number of scanned files exceeds upper limit.') | |||
| break | |||
| directories = [{ | |||
| 'relative_path': key, | |||
| 'create_time': value['ctime'], | |||
| 'update_time': value['mtime'], | |||
| } for key, value in summary_dict.items()] | |||
| # sort by update time in descending order and relative path in ascending order | |||
| directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x['relative_path'])) | |||
| return directories | |||
| def _contains_null_byte(self, **kwargs): | |||
| """ | |||
| Check if arg contains null byte. | |||
| Args: | |||
| kwargs (Any): Check if arg contains null byte. | |||
| Returns: | |||
| bool, indicates if any arg contains null byte. | |||
| """ | |||
| for key, value in kwargs.items(): | |||
| if not isinstance(value, str): | |||
| continue | |||
| if '\x00' in value: | |||
| logger.warning('%s contains null byte \\x00.', key) | |||
| return True | |||
| return False | |||
| def _is_valid_summary_directory(self, summary_base_dir, relative_path): | |||
| """ | |||
| Check if the given summary directory is valid. | |||
| Args: | |||
| summary_base_dir (str): Path of summary base directory. | |||
| relative_path (str): Relative path of summary directory, referring to summary base directory, | |||
| starting with "./" . | |||
| Returns: | |||
| bool, indicates if summary directory is valid. | |||
| """ | |||
| summary_base_dir = os.path.realpath(summary_base_dir) | |||
| summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path)) | |||
| if summary_base_dir == summary_directory: | |||
| return True | |||
| if not os.path.exists(summary_directory): | |||
| logger.warning('Path of summary directory not exists.') | |||
| return False | |||
| if not os.path.isdir(summary_directory): | |||
| logger.warning('Path of summary directory is not a valid directory.') | |||
| return False | |||
| try: | |||
| Path(summary_directory).relative_to(Path(summary_base_dir)) | |||
| except ValueError: | |||
| logger.warning('Relative path %s is not subdirectory of summary_base_dir', relative_path) | |||
| return False | |||
| return True | |||
| def _update_summary_dict(self, summary_dict, relative_path, entry): | |||
| """ | |||
| Update summary_dict with ctime and mtime. | |||
| Args: | |||
| summary_dict (dict): Temporary data structure to hold summary directory info. | |||
| relative_path (str): Relative path of summary directory, referring to summary base directory, | |||
| starting with "./" . | |||
| entry (DirEntry): Directory entry instance needed to check with regular expression. | |||
| """ | |||
| summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name) | |||
| pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name) | |||
| if summary_pattern is None and pb_pattern is None: | |||
| return | |||
| if summary_pattern is not None: | |||
| timestamp = int(summary_pattern.groupdict().get('timestamp')) | |||
| try: | |||
| # extract created time from filename | |||
| ctime = datetime.datetime.fromtimestamp(timestamp).astimezone() | |||
| except OverflowError: | |||
| return | |||
| else: | |||
| ctime = datetime.datetime.fromtimestamp(entry.stat().st_ctime).astimezone() | |||
| # extract modified time from filesystem | |||
| mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone() | |||
| if relative_path not in summary_dict or summary_dict[relative_path]['ctime'] < ctime: | |||
| summary_dict[relative_path] = { | |||
| 'ctime': ctime, | |||
| 'mtime': mtime, | |||
| } | |||
| def is_summary_directory(self, summary_base_dir, relative_path): | |||
| """ | |||
| Check if the given summary directory is valid. | |||
| Args: | |||
| summary_base_dir (str): Path of summary base directory. | |||
| relative_path (str): Relative path of summary directory, referring to summary base directory, | |||
| starting with "./" . | |||
| Returns: | |||
| bool, indicates if the given summary directory is valid. | |||
| Examples: | |||
| >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher | |||
| >>> summary_watcher = SummaryWatcher() | |||
| >>> summaries = summary_watcher.is_summary_directory('/summary/base/dir', './job-01') | |||
| """ | |||
| if self._contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path): | |||
| return False | |||
| if not self._is_valid_summary_directory(summary_base_dir, relative_path): | |||
| return False | |||
| summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path)) | |||
| try: | |||
| entries = os.scandir(summary_directory) | |||
| except PermissionError: | |||
| logger.error('Path of summary base directory is not accessible.') | |||
| raise FileSystemPermissionError('Path of summary base directory is not accessible.') | |||
| for entry in entries: | |||
| if entry.is_symlink() or not entry.is_file(): | |||
| continue | |||
| summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name) | |||
| pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name) | |||
| if summary_pattern or pb_pattern: | |||
| return True | |||
| return False | |||
| def list_summary_directories_by_pagination(self, summary_base_dir, offset=0, limit=10): | |||
| """ | |||
| List summary directories within base directory. | |||
| Args: | |||
| summary_base_dir (str): Path of summary base directory. | |||
| offset (int): An offset for page. Ex, offset is 0, mean current page is 1. Default value is 0. | |||
| limit (int): The max data items for per page. Default value is 10. | |||
| Returns: | |||
| tuple[total, directories], total indicates the overall number of summary directories and directories | |||
| indicate list of summary directory info including the following attributes. | |||
| - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR, | |||
| starting with "./". | |||
| - create_time (datetime): Creation time of summary file. | |||
| - update_time (datetime): Modification time of summary file. | |||
| Raises: | |||
| ParamValueError, if offset < 0 or limit is out of valid value range. | |||
| ParamTypeError, if offset or limit is not valid integer. | |||
| Examples: | |||
| >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher | |||
| >>> summary_watcher = SummaryWatcher() | |||
| >>> total, directories = summary_watcher.list_summary_directories_by_pagination( | |||
| '/summary/base/dir', offset=0, limit=10) | |||
| """ | |||
| offset = Validation.check_offset(offset=offset) | |||
| limit = Validation.check_limit(limit, min_value=1, max_value=999) | |||
| directories = self.list_summary_directories(summary_base_dir, overall=False) | |||
| return len(directories), directories[offset * limit:(offset + 1) * limit] | |||
| def list_summaries(self, summary_base_dir, relative_path='./'): | |||
| """ | |||
| Get info of latest summary file within the given summary directory. | |||
| Args: | |||
| summary_base_dir (str): Path of summary base directory. | |||
| relative_path (str): Relative path of summary directory, referring to summary base directory, | |||
| starting with "./" . | |||
| Returns: | |||
| list, list of summary file including the following attributes. | |||
| - file_name (str): Summary file name. | |||
| - create_time (datetime): Creation time of summary file. | |||
| - update_time (datetime): Modification time of summary file. | |||
| Examples: | |||
| >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher | |||
| >>> summary_watcher = SummaryWatcher() | |||
| >>> summaries = summary_watcher.list_summaries('/summary/base/dir', './job-01') | |||
| """ | |||
| if self._contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path): | |||
| return [] | |||
| if not self._is_valid_summary_directory(summary_base_dir, relative_path): | |||
| return [] | |||
| summaries = [] | |||
| summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path)) | |||
| try: | |||
| entries = os.scandir(summary_directory) | |||
| except PermissionError: | |||
| logger.error('Path of summary directory is not accessible.') | |||
| raise FileSystemPermissionError('Path of summary directory is not accessible.') | |||
| for entry in entries: | |||
| if entry.is_symlink() or not entry.is_file(): | |||
| continue | |||
| pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name) | |||
| if pattern is None: | |||
| continue | |||
| timestamp = int(pattern.groupdict().get('timestamp')) | |||
| try: | |||
| # extract created time from filename | |||
| ctime = datetime.datetime.fromtimestamp(timestamp).astimezone() | |||
| except OverflowError: | |||
| continue | |||
| # extract modified time from filesystem | |||
| mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone() | |||
| summaries.append({ | |||
| 'file_name': entry.name, | |||
| 'create_time': ctime, | |||
| 'update_time': mtime, | |||
| }) | |||
| # sort by update time in descending order and filename in ascending order | |||
| summaries.sort(key=lambda x: (-int(x['update_time'].timestamp()), x['file_name'])) | |||
| return summaries | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,28 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Base processor, and init data manager parameter.""" | |||
| class BaseProcessor: | |||
| """Base processors processor. All processors should inherit this class.""" | |||
| def __init__(self, data_manager): | |||
| """ | |||
| Init image processor. | |||
| Args: | |||
| data_manager (DataManager): A DataManager instance. | |||
| """ | |||
| self._data_manager = data_manager | |||
| @@ -0,0 +1,145 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| This file is to process `data_transform.data_manager` to handle graph, | |||
| and the status of graph will be checked before calling `Graph` object. | |||
| """ | |||
| from mindinsight.datavisual.common import exceptions | |||
| from mindinsight.datavisual.common.enums import PluginNameEnum | |||
| from mindinsight.datavisual.common.validation import Validation | |||
| from mindinsight.datavisual.data_transform.graph import NodeTypeEnum | |||
| from mindinsight.datavisual.processors.base_processor import BaseProcessor | |||
| from mindinsight.utils.exceptions import ParamValueError | |||
| class GraphProcessor(BaseProcessor): | |||
| """ | |||
| This object is to handle `DataManager` object, and process graph object. | |||
| Args: | |||
| train_id (str): To get train job data by this given id. | |||
| data_manager (DataManager): A `DataManager` object. | |||
| tag (str): The tag of graph, if tag is None, will load the first graph. | |||
| """ | |||
| def __init__(self, train_id, data_manager, tag=None): | |||
| Validation.check_param_empty(train_id=train_id) | |||
| super(GraphProcessor, self).__init__(data_manager) | |||
| train_job = self._data_manager.get_train_job_by_plugin(train_id, PluginNameEnum.GRAPH.value) | |||
| if train_job is None: | |||
| raise exceptions.SummaryLogPathInvalid() | |||
| if not train_job['tags']: | |||
| raise ParamValueError("Can not find any graph data in the train job.") | |||
| if tag is None: | |||
| tag = train_job['tags'][0] | |||
| tensors = self._data_manager.list_tensors(train_id, tag=tag) | |||
| self._graph = tensors[0].value | |||
| def get_nodes(self, name, node_type): | |||
| """ | |||
| Get the nodes of every layer in graph. | |||
| Args: | |||
| name (str): The name of a node. | |||
| node_type (Any): The type of node, either 'name_scope' or 'polymeric'. | |||
| Returns: | |||
| TypedDict('Nodes', {'nodes': list[Node]}), format is {'nodes': [<Node object>]}. | |||
| example: | |||
| { | |||
| "nodes" : [ | |||
| { | |||
| "attr" : | |||
| { | |||
| "index" : "i: 0\n" | |||
| }, | |||
| "input" : {}, | |||
| "name" : "input_tensor", | |||
| "output" : | |||
| { | |||
| "Default/TensorAdd-op17" : | |||
| { | |||
| "edge_type" : "data", | |||
| "scope" : "name_scope", | |||
| "shape" : [1, 16, 128, 128] | |||
| } | |||
| }, | |||
| "output_i" : -1, | |||
| "polymeric_input" : {}, | |||
| "polymeric_output" : {}, | |||
| "polymeric_scope_name" : "", | |||
| "subnode_count" : 0, | |||
| "type" : "Data" | |||
| } | |||
| ] | |||
| } | |||
| """ | |||
| if node_type not in [NodeTypeEnum.NAME_SCOPE.value, NodeTypeEnum.POLYMERIC_SCOPE.value]: | |||
| raise ParamValueError( | |||
| 'The node type is not support, only either %s or %s.' | |||
| '' % (NodeTypeEnum.NAME_SCOPE.value, NodeTypeEnum.POLYMERIC_SCOPE.value)) | |||
| if name and not self._graph.exist_node(name): | |||
| raise ParamValueError("The node name is not in graph.") | |||
| nodes = [] | |||
| if node_type == NodeTypeEnum.NAME_SCOPE.value: | |||
| nodes = self._graph.get_normal_nodes(name) | |||
| if node_type == NodeTypeEnum.POLYMERIC_SCOPE.value: | |||
| if not name: | |||
| raise ParamValueError('The node name "%s" not in graph, node type is %s.' % | |||
| (name, node_type)) | |||
| polymeric_scope_name = name | |||
| nodes = self._graph.get_polymeric_nodes(polymeric_scope_name) | |||
| return {'nodes': nodes} | |||
| def search_node_names(self, search_content, offset, limit): | |||
| """ | |||
| Search node names by search content. | |||
| Args: | |||
| search_content (Any): This content can be the key content of the node to search. | |||
| offset (int): An offset for page. Ex, offset is 0, mean current page is 1. | |||
| limit (int): The max data items for per page. | |||
| Returns: | |||
| TypedDict('Names', {'names': list[str]}), {"names": ["node_names"]}. | |||
| """ | |||
| offset = Validation.check_offset(offset=offset) | |||
| limit = Validation.check_limit(limit, min_value=1, max_value=1000) | |||
| names = self._graph.search_node_names(search_content, offset, limit) | |||
| return {"names": names} | |||
| def search_single_node(self, name): | |||
| """ | |||
| Search node by node name. | |||
| Args: | |||
| name (str): The name of node. | |||
| Returns: | |||
| dict, format is: | |||
| item_object = {'nodes': [<Node object>], | |||
| 'scope_name': '', | |||
| 'children': {<item_object>}} | |||
| """ | |||
| Validation.check_param_empty(name=name) | |||
| nodes = self._graph.search_single_node(name) | |||
| return nodes | |||
| @@ -0,0 +1,92 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Image Processor APIs.""" | |||
| from mindinsight.datavisual.utils.tools import to_int | |||
| from mindinsight.utils.exceptions import ParamValueError | |||
| from mindinsight.datavisual.common.validation import Validation | |||
| from mindinsight.datavisual.processors.base_processor import BaseProcessor | |||
| class ImageProcessor(BaseProcessor): | |||
| """Image Processor.""" | |||
| def get_metadata_list(self, train_id, tag): | |||
| """ | |||
| Builds a JSON-serializable object with information about images. | |||
| Args: | |||
| train_id (str): The ID of the events data. | |||
| tag (str): The name of the tag the images all belong to. | |||
| Returns: | |||
| list[dict], a list of dictionaries containing the `wall_time`, `step`, `width`, | |||
| and `height` for each image. | |||
| [ | |||
| { | |||
| "wall_time": ****, | |||
| "step": ****, | |||
| "width": ****, | |||
| "height": ****, | |||
| }, | |||
| {...} | |||
| ] | |||
| """ | |||
| Validation.check_param_empty(train_id=train_id, tag=tag) | |||
| result = [] | |||
| tensors = self._data_manager.list_tensors(train_id, tag) | |||
| for tensor in tensors: | |||
| # no tensor_proto in TensorEvent | |||
| (width, height) = (tensor.value.width, tensor.value.height) | |||
| result.append({ | |||
| 'wall_time': tensor.wall_time, | |||
| 'step': tensor.step, | |||
| 'width': int(width), | |||
| 'height': int(height), | |||
| }) | |||
| return dict(metadatas=result) | |||
| def get_single_image(self, train_id, tag, step): | |||
| """ | |||
| Returns the actual image bytes for a given image. | |||
| Args: | |||
| train_id (str): The ID of the events data the image belongs to. | |||
| tag (str): The name of the tag the images belongs to. | |||
| step (int): The step of the image in the current reservoir. | |||
| Returns: | |||
| bytes, a byte string of the raw image bytes. | |||
| """ | |||
| Validation.check_param_empty(train_id=train_id, tag=tag, step=step) | |||
| step = to_int(step, "step") | |||
| tensors = self._data_manager.list_tensors(train_id, tag) | |||
| image = None | |||
| for tensor in tensors: | |||
| if tensor.step == step: | |||
| # Default value for bytes field is empty byte string normally, | |||
| # see also "Optional Fields And Default Values" in protobuf | |||
| # documentation. | |||
| image = tensor.value.encoded_image | |||
| break | |||
| if image is None: | |||
| raise ParamValueError("Can not find the step with given train job id and tag.") | |||
| return image | |||
| @@ -0,0 +1,43 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Scalar Processor APIs.""" | |||
| from mindinsight.datavisual.common.validation import Validation | |||
| from mindinsight.datavisual.processors.base_processor import BaseProcessor | |||
| class ScalarsProcessor(BaseProcessor): | |||
| """Scalar Processor.""" | |||
| def get_metadata_list(self, train_id, tag): | |||
| """ | |||
| Builds a JSON-serializable object with information about scalars. | |||
| Args: | |||
| train_id (str): The ID of the events data. | |||
| tag (str): The name of the tag the scalars all belonging to. | |||
| Returns: | |||
| list[dict], a list of dictionaries containing the `wall_time`, `step`, `value` for each scalar. | |||
| """ | |||
| Validation.check_param_empty(train_id=train_id, tag=tag) | |||
| job_response = [] | |||
| tensors = self._data_manager.list_tensors(train_id, tag) | |||
| for tensor in tensors: | |||
| job_response.append({ | |||
| 'wall_time': tensor.wall_time, | |||
| 'step': tensor.step, | |||
| 'value': tensor.value}) | |||
| return dict(metadatas=job_response) | |||
| @@ -0,0 +1,65 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Train task manager.""" | |||
| from mindinsight.datavisual.common import exceptions | |||
| from mindinsight.datavisual.common.enums import PluginNameEnum | |||
| from mindinsight.datavisual.common.validation import Validation | |||
| from mindinsight.datavisual.processors.base_processor import BaseProcessor | |||
| class TrainTaskManager(BaseProcessor): | |||
| """Train task manager.""" | |||
| def get_single_train_task(self, plugin_name, train_id): | |||
| """ | |||
| get single train task. | |||
| Args: | |||
| plugin_name (str): Plugin name, refer `PluginNameEnum`. | |||
| train_id (str): Specify a training job to query. | |||
| Returns: | |||
| {'train_jobs': list[TrainJob]}, refer to restful api. | |||
| """ | |||
| Validation.check_param_empty(plugin_name=plugin_name, train_id=train_id) | |||
| Validation.check_plugin_name(plugin_name=plugin_name) | |||
| train_job = self._data_manager.get_train_job_by_plugin(train_id=train_id, plugin_name=plugin_name) | |||
| if train_job is None: | |||
| raise exceptions.SummaryLogPathInvalid() | |||
| return dict(train_jobs=[train_job]) | |||
| def get_plugins(self, train_id, manual_update=True): | |||
| """ | |||
| Queries the plug-in data for the specified training job | |||
| Args: | |||
| train_id (str): Specify a training job to query. | |||
| manual_update (bool): Specifies whether to refresh automatically. | |||
| Returns: | |||
| dict, refer to restful api. | |||
| """ | |||
| Validation.check_param_empty(train_id=train_id) | |||
| train_job = self._data_manager.get_single_train_job(train_id, manual_update=manual_update) | |||
| if not train_job: | |||
| default_result = dict() | |||
| for plugin_name in PluginNameEnum.list_members(): | |||
| default_result.update({plugin_name: list()}) | |||
| return dict(plugins=default_result) | |||
| return dict( | |||
| plugins=train_job['tag_mapping'] | |||
| ) | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,328 @@ | |||
| // Copyright 2019 Huawei Technologies Co., Ltd. | |||
| // | |||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||
| // you may not use this file except in compliance with the License. | |||
| // You may obtain a copy of the License at | |||
| // | |||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software | |||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| syntax = "proto2"; | |||
| package mindinsight; | |||
| // Versioning | |||
| enum Version { | |||
| // unknown version | |||
| UNKNOWWN_VERSION = 0; | |||
| // Initial version (IR VERSION 1), published on Sep 23, 2019 | |||
| IR_VERSION = 0x0000000000000001; | |||
| } | |||
| // Data type definition | |||
| enum DataType { | |||
| DT_UNDEFINED = 0; | |||
| // Basic types. | |||
| DT_BOOL = 1; // bool | |||
| DT_INT8 = 2; // int8_t | |||
| DT_INT16 = 3; // int16_t | |||
| DT_INT32 = 4; // int32_t | |||
| DT_INT64 = 5; // int64_t | |||
| DT_UINT8 = 6; // uint8_t | |||
| DT_UINT16 = 7; // uint16_t | |||
| DT_UINT32 = 8; // uint32_t | |||
| DT_UINT64 = 9; // uint64_t | |||
| DT_FLOAT16 = 10; // float 16 | |||
| DT_FLOAT32 = 11; // float 32 | |||
| DT_FLOAT64 = 12; // float 64 | |||
| DT_STRING = 13; // string | |||
| DT_TENSOR = 14; // tensor | |||
| DT_GRAPH = 15; // graph | |||
| // list type | |||
| DT_BOOLS = 16; // list of bool | |||
| DT_INTS8 = 17; // list of int8_t | |||
| DT_INTS16 = 18; // list of int16_t | |||
| DT_INTS32 = 19; // list of int32_t | |||
| DT_INTS64 = 20; // list of int64_t | |||
| DT_UINTS8 = 21; // list of uint8_t | |||
| DT_UINTS16 = 22; // list of uint16_t | |||
| DT_UINTS32 = 23; // list of uint32_t | |||
| DT_UINTS64 = 24; // list of uint64_t | |||
| DT_FLOATS16 = 25; // list of float16 | |||
| DT_FLOATS32 = 26; // list of float32 | |||
| DT_FLOATS64 = 27; // list of float64 | |||
| DT_STRINGS = 28; // list of string | |||
| DT_TENSORS = 29; // list of tensor | |||
| DT_GRAPHS = 30; // list of graph | |||
| DT_TUPLE = 31; // tuple | |||
| DT_LIST = 32; // list | |||
| DT_DICT = 33; // dictionary | |||
| // other types | |||
| DT_NONE = 34; // None | |||
| DT_SYM_INST = 35; // Symbolic Key Instance | |||
| // type related type | |||
| DT_BASE_INT = 36; // type generic int | |||
| DT_BASE_UINT = 37; // type generate unsigned int | |||
| DT_BASE_FLOAT = 38; // type generate float | |||
| DT_TYPE = 39; // type type | |||
| DT_ANYTHING = 40; // type anything | |||
| } | |||
| // Value definiton for attribute value or parameter default value | |||
| message ValueProto { | |||
| // data type of value | |||
| optional DataType dtype = 1; // discriminator that indicates which field below is in use | |||
| // Exactly ONE of the following fields must be present for this version of the IR | |||
| optional bool bool_val = 2; // bool | |||
| optional int64 int_val = 3; // int | |||
| optional uint64 uint_val = 4; // uint | |||
| optional float float_val = 5; // float | |||
| optional double double_val = 6; // double | |||
| optional string str_val = 7; // string | |||
| optional TensorProto tensor_val = 8; // tensor value | |||
| optional GraphProto graph = 9; // graph | |||
| repeated bool bool_vals = 10; // list of bool | |||
| repeated int64 int_vals = 11; // list of int | |||
| repeated uint64 uint_vals = 12; // list of uint | |||
| repeated float float_vals = 13; // list of float | |||
| repeated double double_vals = 14; // list of double | |||
| repeated string str_vals = 15; // list of string | |||
| repeated TensorProto tensor_vals = 16; // list of tensor value | |||
| repeated GraphProto graphs = 17; // list of graph | |||
| // tuple or list | |||
| repeated ValueProto values = 18; // tuple, list of value | |||
| // dictionary | |||
| repeated NamedValueProto dict_val = 19; // dictionary info | |||
| // filed for type type | |||
| optional TypeProto type_val = 20; // type type info | |||
| } | |||
| message AttributeProto { | |||
| optional string name = 1; // attribute name | |||
| optional ValueProto value = 2; // attribute value | |||
| } | |||
| message NamedValueProto { | |||
| optional string key = 1; // attribute name | |||
| optional ValueProto value = 2; // attribute value | |||
| } | |||
| // Defines a tensor shape. | |||
| message TensorShapeProto { | |||
| // One dimension of the tensor. | |||
| message Dimension { | |||
| // Size of the tensor in that dimension. | |||
| // This value must be >= -1, but values of -1 are reserved for "unknown" | |||
| // shapes (values of -1 mean "unknown" dimension). | |||
| optional int64 size = 1; | |||
| // Optional name of the tensor dimension. | |||
| optional string name = 2; | |||
| }; | |||
| repeated Dimension dim = 1; | |||
| } | |||
| // Types for graph input(parameter) and output | |||
| message TypeProto { | |||
| message Tensor { | |||
| // This field MUST have a valid DataType value except DT_TENSOR | |||
| optional DataType elem_type = 1; | |||
| optional TensorShapeProto shape = 2; // for scalar, this field is not set | |||
| } | |||
| // tuple type | |||
| message Sequence { | |||
| // The type and optional shape of elements of the tuple. | |||
| repeated TypeProto elem_types = 1; | |||
| }; | |||
| // data type | |||
| optional DataType data_type = 1; | |||
| oneof value { | |||
| // The type of a tensor. | |||
| Tensor tensor_type = 2; | |||
| // The type of a tuple. | |||
| Sequence sequence_type = 3; | |||
| } | |||
| } | |||
| // Defines information on graph parameters, including the name, the type, and | |||
| // the default value of parameter if exists. | |||
| message ParameterProto { | |||
| optional string name = 1; // parameter name | |||
| optional TypeProto type = 2; // parameter type | |||
| optional ValueProto default_val = 3; // default value of parameter if exists | |||
| } | |||
| // Defines graph output information | |||
| message OutputProto { | |||
| optional string name = 1; // output node name | |||
| optional TypeProto type = 2; // output node type | |||
| } | |||
| // Define node input information | |||
| message InputProto { | |||
| enum EdgeType { | |||
| DATA_EDGE = 0; // data edge | |||
| CONTROL_EDGE = 1; // control edge | |||
| } | |||
| optional string name = 1; | |||
| optional EdgeType type = 2; | |||
| } | |||
| // Nodes | |||
| // | |||
| // Computation graphs are made up of a DAG of nodes, which represent what is | |||
| // commonly called a "layer" or "pipeline stage" in machine learning frameworks. | |||
| // | |||
| // For example, it can be a node of type "Conv" that takes in an image, a filter | |||
| // tensor and a bias tensor, and produces the convolved output. | |||
| message NodeProto { | |||
| repeated InputProto input = 1; // namespace Value | |||
| optional string name = 2; // namespace Value | |||
| // The symbolic identifier of the Operator to execute. | |||
| optional string op_type = 3; // namespace Operator | |||
| // The domain of the OperatorSet that specifies the operator named by op_type. | |||
| optional string scope = 4; // namespace Domain | |||
| // Additional named attributes. | |||
| repeated AttributeProto attribute = 5; | |||
| // Optional type info of this node | |||
| optional TypeProto output_type = 6; | |||
| // other fields for debug | |||
| optional uint64 output_i = 7; | |||
| } | |||
| // Models | |||
| // | |||
| // ModelProto is a top-level file/container format for bundling a ML model and | |||
| // associating its computation graph with metadata. | |||
| // | |||
| // The semantics of the model are described by the associated GraphProto. | |||
| message ModelProto { | |||
| // ir version | |||
| optional int64 ir_version = 1; | |||
| // Domain name of the model. | |||
| // We use reverse domain names as name space indicators. For example: | |||
| // `com.facebook.fair` or `com.microsoft.cognitiveservices` | |||
| // | |||
| // Together with `model_version` and GraphProto.name, this forms the unique identity of | |||
| // the graph. | |||
| optional string domain = 2; | |||
| // The version of the graph encoded. See Version enum below. | |||
| optional int64 model_version = 3; | |||
| // The parameterized graph that is evaluated to execute the model. | |||
| optional GraphProto graph = 4; | |||
| // metadata info of opeartors | |||
| optional OperatorSetProto metadata_operators = 5; | |||
| }; | |||
| message OperatorProto { | |||
| optional string name = 1; // used as key, must be distinct | |||
| optional bytes config = 2; // operator config info | |||
| optional bytes obj_info = 3; // operator related object info, e.g. content of operator binary or name | |||
| }; | |||
| message OperatorSetProto { | |||
| repeated OperatorProto operators = 1; | |||
| optional string summary = 2; // summary info of operators, e.g. file position of operators file | |||
| } | |||
| // Graphs | |||
| // | |||
| // A graph defines the computational logic of a model and is comprised of a parameterized | |||
| // list of nodes that form a directed acyclic graph based on their inputs and outputs. | |||
| // This is the equivalent of the "network" or "graph" in many deep learning | |||
| // frameworks. | |||
| message GraphProto { | |||
| // The nodes in the graph, sorted topologically. | |||
| repeated NodeProto node = 1; | |||
| // The name of the graph. | |||
| optional string name = 2; // namespace Graph | |||
| // The parameters(inputs) and outputs of the graph. | |||
| repeated ParameterProto parameters = 3; | |||
| repeated OutputProto outputs = 4; | |||
| // Constants used in this graph | |||
| repeated NamedValueProto const_vals = 5; | |||
| } | |||
| // Tensors | |||
| // | |||
| // A serialized tensor value. | |||
| message TensorProto { | |||
| // The shape of the tensor. | |||
| repeated int64 dims = 1; | |||
| // The data type of the tensor. | |||
| // This field MUST have a valid DataType value except DT_TENSOR | |||
| optional DataType data_type = 2; | |||
| // Tensor content must be organized in row-major order. | |||
| // | |||
| // Depending on the data_type field, exactly one of the fields below with | |||
| // name ending in _data is used to store the elements of the tensor. | |||
| // For float values | |||
| repeated float float_data = 3 [packed = true]; | |||
| // For int32, uint8, int8, uint16, int16, and bool values | |||
| // When this field is present, the data_type field MUST be | |||
| // INT32, INT16, INT8, UINT16, UINT8, or BOOL | |||
| repeated int32 int32_data = 4 [packed = true]; | |||
| // For int64. | |||
| // When this field is present, the data_type field MUST be INT64 | |||
| repeated int64 int64_data = 5 [packed = true]; | |||
| // For double | |||
| // When this field is present, the data_type field MUST be DOUBLE | |||
| repeated double double_data = 6 [packed = true]; | |||
| // For uint64 and uint32 values | |||
| // When this field is present, the data_type field MUST be | |||
| // UINT32 or UINT64 | |||
| repeated uint64 uint64_data = 7 [packed = true]; | |||
| // Store raw tensor content. When this raw_data field is used to store tensor value, | |||
| // elements MUST be stored in as fixed-width, little-endian order. | |||
| optional bytes raw_data = 8; | |||
| } | |||
| @@ -0,0 +1,155 @@ | |||
| // Copyright 2019 Huawei Technologies Co., Ltd. | |||
| // | |||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||
| // you may not use this file except in compliance with the License. | |||
| // You may obtain a copy of the License at | |||
| // | |||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software | |||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| syntax = "proto2"; | |||
| package mindinsight; | |||
| option cc_enable_arenas = true; | |||
| // The ANF IR define, include the tensor and graph define | |||
| import "mindinsight_anf_ir.proto"; | |||
| // Event Protocol buffer, Top define | |||
| message Event { | |||
| // Timestamp | |||
| required double wall_time = 1; | |||
| // The step of train. | |||
| optional int64 step = 2; | |||
| oneof what { | |||
| // An event file was started, with the specified version. | |||
| // Now version is "Mindspore.Event:1" | |||
| string version = 3; | |||
| // GraphDef. | |||
| GraphProto graph_def = 4; | |||
| // Summary data | |||
| Summary summary = 5; | |||
| // Train lineage | |||
| TrainLineage train_lineage = 6; | |||
| // Evaluation lineage | |||
| EvaluationLineage evaluation_lineage = 7; | |||
| // dataset graph | |||
| DatasetGraph dataset_graph = 9; | |||
| } | |||
| } | |||
| // TrainLineage records infos of a train. | |||
| message TrainLineage{ | |||
| message HyperParameters{ | |||
| optional string optimizer = 1; | |||
| optional float learning_rate = 2; | |||
| optional string loss_function = 3; | |||
| optional int32 epoch = 4; | |||
| optional string parallel_mode = 5; | |||
| optional int32 device_num = 6; | |||
| optional int32 batch_size = 8; | |||
| } | |||
| message TrainDataset{ | |||
| optional string train_dataset_path = 1; | |||
| optional int32 train_dataset_size = 2; | |||
| } | |||
| message Algorithm{ | |||
| optional string network = 1; | |||
| optional float loss = 2; | |||
| } | |||
| message Model{ | |||
| optional string path = 3; | |||
| optional int64 size = 4; | |||
| } | |||
| optional HyperParameters hyper_parameters = 1; | |||
| optional TrainDataset train_dataset = 2; | |||
| optional Algorithm algorithm = 3; | |||
| optional Model model = 4; | |||
| } | |||
| //EvalLineage records infos of evaluation. | |||
| message EvaluationLineage{ | |||
| message ValidDataset{ | |||
| optional string valid_dataset_path = 1; | |||
| optional int32 valid_dataset_size = 2; | |||
| } | |||
| optional string metric = 2; | |||
| optional ValidDataset valid_dataset = 3; | |||
| } | |||
| // A Summary is a set of named values that be produced regularly during training | |||
| message Summary { | |||
| message Image { | |||
| // Dimensions of the image. | |||
| required int32 height = 1; | |||
| required int32 width = 2; | |||
| // Valid colorspace values are | |||
| // 1 - grayscale | |||
| // 2 - grayscale + alpha | |||
| // 3 - RGB | |||
| // 4 - RGBA | |||
| // 5 - DIGITAL_YUV | |||
| // 6 - BGRA | |||
| required int32 colorspace = 3; | |||
| // Image data in encoded format. Now only support the RGB. | |||
| required bytes encoded_image = 4; | |||
| } | |||
| message Value { | |||
| // Tag name for the data. | |||
| required string tag = 1; | |||
| // Value associated with the tag. | |||
| oneof value { | |||
| float scalar_value = 3; | |||
| Image image = 4; | |||
| TensorProto tensor = 8; | |||
| } | |||
| } | |||
| // Set of values for the summary. | |||
| repeated Value value = 1; | |||
| } | |||
| // DatasetGraph | |||
| message DatasetGraph { | |||
| repeated DatasetGraph children = 1; | |||
| optional OperationParameter parameter = 2; | |||
| repeated Operation operations = 3; | |||
| optional Operation sampler = 4; | |||
| } | |||
| message Operation { | |||
| optional OperationParameter operationParam = 1; | |||
| repeated int32 size = 2; | |||
| repeated float weights = 3; | |||
| } | |||
| message OperationParameter{ | |||
| map<string, string> mapStr = 1; | |||
| map<string, StrList> mapStrList = 2; | |||
| map<string, bool> mapBool = 3; | |||
| map<string, int32> mapInt = 4; | |||
| map<string, double> mapDouble = 5; | |||
| } | |||
| message StrList { | |||
| repeated string strValue = 1; | |||
| } | |||
| @@ -0,0 +1,16 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Utils.""" | |||
| from .tools import find_app_package | |||
| @@ -0,0 +1,57 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATAVISUAL_UTILS_CRC32_BASE_H_ | |||
| #define DATAVISUAL_UTILS_CRC32_BASE_H_ | |||
| #include <memory> | |||
| #include <string> | |||
| #include "securec/include/securec.h" | |||
| using string = std::string; | |||
| using int8 = int8_t; | |||
| using int16 = int16_t; | |||
| using int32 = int32_t; | |||
| using int64 = int64_t; | |||
| using uint8 = uint8_t; | |||
| using uint16 = uint16_t; | |||
| using uint32 = uint32_t; | |||
| using uint64 = uint64_t; | |||
| // check the null point, Only log it in if(): The value is null | |||
| #define EXCEPT_CHECK_NULL(value) \ | |||
| do { \ | |||
| if (value == nullptr) { \ | |||
| break; \ | |||
| } \ | |||
| } while (0) | |||
| // implement common define function | |||
| // Get the 32 bits align value | |||
| inline uint32 DecodeFixed32(const char* ptr) { | |||
| uint32 result = 0; | |||
| if (EOK != memcpy_s(&result, sizeof(result), ptr, sizeof(result))) { | |||
| return result; | |||
| } | |||
| return result; | |||
| } | |||
| // Used to fetch a naturally-aligned 32-bit word in little endian byte-order | |||
| inline uint32 LE_LOAD32(const uint8_t* p) { return DecodeFixed32(reinterpret_cast<const char*>(p)); } | |||
| #endif // DATAVISUAL_UTILS_CRC32_BASE_H_ | |||
| @@ -0,0 +1,316 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "crc32/crc32.h" | |||
| #include <stdint.h> | |||
| const unsigned int CRC_TABLE_SIZE = 256; | |||
| static const uint32 crc_table_o32[CRC_TABLE_SIZE] = { | |||
| 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, 0x8AD958CF, | |||
| 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, 0x105EC76F, 0xE235446C, | |||
| 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, | |||
| 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, | |||
| 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, | |||
| 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, | |||
| 0x1642AE59, 0xE4292D5A, 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, | |||
| 0x6EF07595, 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, | |||
| 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, 0x5125DAD3, | |||
| 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, 0xDBFC821C, 0x2997011F, | |||
| 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, 0x61C69362, 0x93AD1061, 0x80FDE395, | |||
| 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, | |||
| 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, | |||
| 0x44694011, 0x5739B3E5, 0xA55230E6, 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, | |||
| 0xDDE0EB2A, 0x2F8B6829, 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, | |||
| 0x563C5F93, 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, | |||
| 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, 0x1871A4D8, | |||
| 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, 0xA24BB5A6, 0x502036A5, | |||
| 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, | |||
| 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, | |||
| 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, | |||
| 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, | |||
| 0xE52CC12C, 0x1747422F, 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, | |||
| 0x9D9E1AE0, 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, | |||
| 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, 0xE330A81A, | |||
| 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, 0x69E9F0D5, 0x9B8273D6, | |||
| 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, 0xF36E6F75, 0x0105EC76, 0x12551F82, | |||
| 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, | |||
| 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351}; | |||
| static const uint32 crc_table_o40[CRC_TABLE_SIZE] = { | |||
| 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945, 0x9D14C3B8, | |||
| 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, 0x3FC5F181, 0x2C6769F6, | |||
| 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4, 0xA2D13239, 0xB173AA4E, 0x859402D7, | |||
| 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C, 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, | |||
| 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47, 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, | |||
| 0xBFB7D911, 0x8B507188, 0x98F2E9FF, 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, | |||
| 0x298143B1, 0x3A23DBC6, 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, | |||
| 0xA737187E, 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41, | |||
| 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9, 0xC0D23785, | |||
| 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0, 0x5DC6F43D, 0x4E646C4A, | |||
| 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78, 0x809C2506, 0x933EBD71, 0xA7D915E8, | |||
| 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43, 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, | |||
| 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB, 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, | |||
| 0xE2712D2C, 0xD69685B5, 0xC5341DC2, 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, | |||
| 0x4B82460D, 0x5820DE7A, 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, | |||
| 0x81AE33BC, 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004, | |||
| 0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D, 0x5912C8C0, | |||
| 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185, 0x844819FB, 0x97EA818C, | |||
| 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE, 0x195CDA43, 0x0AFE4234, 0x3E19EAAD, | |||
| 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306, 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, | |||
| 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F, 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, | |||
| 0x7BB1D269, 0x4F567AF0, 0x5CF4E287, 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, | |||
| 0x6D1B6DCF, 0x7EB9F5B8, 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, | |||
| 0xE3AD3600, 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439, | |||
| 0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781, 0x7B5FDFFF, | |||
| 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA, 0xE64B1C47, 0xF5E98430, | |||
| 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502, 0x449A2E7E, 0x5738B609, 0x63DF1E90, | |||
| 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B, 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, | |||
| 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483}; | |||
| static const uint32 crc_table_o48[CRC_TABLE_SIZE] = { | |||
| 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469, 0x38513EC5, | |||
| 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, 0x70A27D8A, 0xD5E3EFF4, | |||
| 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3, 0x48F3434F, 0xEDB2D131, 0x079C1142, | |||
| 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726, 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, | |||
| 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D, 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, | |||
| 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8, 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, | |||
| 0x40577089, 0xE516E2F7, 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, | |||
| 0xDD47DC32, 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0, | |||
| 0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75, 0xB7C7FD53, | |||
| 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A, 0x8F96C396, 0x2AD751E8, | |||
| 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF, 0x26217BCD, 0x8360E9B3, 0x694E29C0, | |||
| 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4, 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, | |||
| 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161, 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, | |||
| 0x6D1C3023, 0x8732F050, 0x2273622E, 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, | |||
| 0xBF63CE95, 0x1A225CEB, 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, | |||
| 0xFFD7132A, 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF, | |||
| 0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0, 0xC3D4340C, | |||
| 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065, 0x6A638C57, 0xCF221E29, | |||
| 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E, 0x5232B292, 0xF77320EC, 0x1D5DE09F, | |||
| 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB, 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, | |||
| 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4, 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, | |||
| 0x190FF97C, 0xF321390F, 0x5660AB71, 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, | |||
| 0x9DF3018D, 0x38B293F3, 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, | |||
| 0x00E3AD36, 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79, | |||
| 0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC, 0xAD060C8E, | |||
| 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7, 0x9557324B, 0x3016A035, | |||
| 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622, 0xDDA47104, 0x78E5E37A, 0x92CB2309, | |||
| 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D, 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, | |||
| 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8}; | |||
| static const uint32 crc_table_o56[CRC_TABLE_SIZE] = { | |||
| 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA, 0xF64463E6, | |||
| 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, 0xE964B13D, 0x34211B85, | |||
| 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7, 0x1F20D2DB, 0xC2657863, 0xA047F15A, | |||
| 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11, 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, | |||
| 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41, 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, | |||
| 0x8706EC26, 0xE524651F, 0x3861CFA7, 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, | |||
| 0xFA04B7C4, 0x27411D7C, 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, | |||
| 0xD1057E9A, 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D, | |||
| 0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB, 0x42C2EEDA, | |||
| 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610, 0xB4868D3C, 0x69C32784, | |||
| 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6, 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, | |||
| 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6, 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, | |||
| 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040, 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, | |||
| 0x3380611A, 0x51A2E823, 0x8CE7429B, 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, | |||
| 0xA7E68BC5, 0x7AA3217D, 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, | |||
| 0x4BA071F5, 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213, | |||
| 0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8, 0x4D801BE4, | |||
| 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E, 0x8585DDB4, 0x58C0770C, | |||
| 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E, 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, | |||
| 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698, 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, | |||
| 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443, 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, | |||
| 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5, 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, | |||
| 0x3D4384AA, 0xE0062E12, 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, | |||
| 0x16424DF4, 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F, | |||
| 0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9, 0x2E238253, | |||
| 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99, 0xD867E1B5, 0x05224B0D, | |||
| 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F, 0xC747336E, 0x1A0299D6, 0x782010EF, | |||
| 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4, 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, | |||
| 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842}; | |||
| static const uint32 crc_table_o64[CRC_TABLE_SIZE] = { | |||
| 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44, 0xC5670B91, | |||
| 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, 0x8F2261D3, 0xB7330E7F, | |||
| 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97, 0x4A456A42, 0x725405EE, 0x3A67B51A, | |||
| 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406, 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, | |||
| 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13, 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, | |||
| 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082, 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, | |||
| 0x04EDB56C, 0x3CFCDAC0, 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, | |||
| 0xF99BD151, 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA, | |||
| 0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B, 0xB8730B7D, | |||
| 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539, 0x7D1400EC, 0x45056F40, | |||
| 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8, 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, | |||
| 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD, 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, | |||
| 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C, 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, | |||
| 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E, 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, | |||
| 0xF6DBD453, 0xCECABBFF, 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, | |||
| 0xC6D4DB18, 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089, | |||
| 0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB, 0x24E7BF1E, | |||
| 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A, 0x750A600B, 0x4D1B0FA7, | |||
| 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F, 0xB06D6B9A, 0x887C0436, 0xC04FB4C2, | |||
| 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE, 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, | |||
| 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C, 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, | |||
| 0xE71BDB55, 0xAF286BA1, 0x9739040D, 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, | |||
| 0xC994DE1A, 0xF185B1B6, 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, | |||
| 0x34E2BA27, 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065, | |||
| 0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4, 0x425B0AA5, | |||
| 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1, 0x873C0134, 0xBF2D6E98, | |||
| 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70, 0xCD796B76, 0xF56804DA, 0xBD5BB42E, | |||
| 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532, 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, | |||
| 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3}; | |||
| static const uint32 crc_table_o72[CRC_TABLE_SIZE] = { | |||
| 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD, 0x6006181F, | |||
| 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, 0xC00C303E, 0x2F3C5B27, | |||
| 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93, 0xA00A2821, 0x4F3A4338, 0x7B8688E2, | |||
| 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C, 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, | |||
| 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20, 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, | |||
| 0xB83752FC, 0x8C8B9926, 0x63BBF23F, 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, | |||
| 0x2C81B107, 0xC3B1DA1E, 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, | |||
| 0xA3B7C201, 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746, | |||
| 0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59, 0xCE086BD5, | |||
| 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778, 0xAE0E73CA, 0x413E18D3, | |||
| 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67, 0x8BF04D66, 0x64C0267F, 0x507CEDA5, | |||
| 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB, 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, | |||
| 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4, 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, | |||
| 0x16392136, 0x2285EAEC, 0xCDB581F5, 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, | |||
| 0x4283F2F3, 0xADB399EA, 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, | |||
| 0x9A414B7B, 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364, | |||
| 0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45, 0xBC029FF7, | |||
| 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A, 0x99FCA15B, 0x76CCCA42, | |||
| 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6, 0xF9FAB944, 0x16CAD25D, 0x22761987, | |||
| 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9, 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, | |||
| 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8, 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, | |||
| 0x6433D514, 0x508F1ECE, 0xBFBF75D7, 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, | |||
| 0x7B757B89, 0x94451090, 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, | |||
| 0xF443088F, 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE, | |||
| 0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1, 0x97F8FAB0, | |||
| 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D, 0xF7FEE2AF, 0x18CE89B6, | |||
| 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02, 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, | |||
| 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623, 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, | |||
| 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C}; | |||
| static const uint32 crc_table_o80[CRC_TABLE_SIZE] = { | |||
| 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089, 0x4E2DFD53, | |||
| 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, 0x9C5BFAA6, 0xF458D66E, | |||
| 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F, 0xD27607F5, 0xBA752B3D, 0x02705E65, | |||
| 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C, 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, | |||
| 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334, 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, | |||
| 0xBE9597F7, 0x0690E2AF, 0x6E93CE67, 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, | |||
| 0xD4E6E55A, 0xBCE5C992, 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, | |||
| 0xF2C834C1, 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3, | |||
| 0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0, 0xE6ECFDDC, | |||
| 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55, 0xA8C1008F, 0xC0C22C47, | |||
| 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006, 0x47EC84C7, 0x2FEFA80F, 0x97EADD57, | |||
| 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E, 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, | |||
| 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D, 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, | |||
| 0x16549778, 0xAE51E220, 0xC652CEE8, 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, | |||
| 0xE07C1F73, 0x887F33BB, 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, | |||
| 0xE88BBE7D, 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E, | |||
| 0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB, 0x27180901, | |||
| 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988, 0xC8358D49, 0xA036A181, | |||
| 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0, 0x8618701A, 0xEE1B5CD2, 0x561E298A, | |||
| 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093, 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, | |||
| 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766, 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, | |||
| 0xD7A063A5, 0x6FA516FD, 0x07A63A35, 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, | |||
| 0xFA3F95CF, 0x923CB907, 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, | |||
| 0xDC114454, 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1, | |||
| 0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2, 0xB2828A33, | |||
| 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA, 0xFCAF7760, 0x94AC5BA8, | |||
| 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9, 0x2ED97095, 0x46DA5C5D, 0xFEDF2905, | |||
| 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C, 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, | |||
| 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F}; | |||
| static const uint32 crc_table_o88[CRC_TABLE_SIZE] = { | |||
| 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504, 0x423B04DA, | |||
| 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, 0x847609B4, 0xCD4A7493, | |||
| 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0, 0xC64D0D6E, 0x8F717049, 0x5435F720, | |||
| 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A, 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, | |||
| 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D, 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, | |||
| 0x271A9E09, 0xFC5E1960, 0xB5626447, 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, | |||
| 0x3A13140E, 0x732F6929, 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, | |||
| 0x31146DF3, 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36, | |||
| 0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC, 0x9E76C286, | |||
| 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782, 0xDC4DC65C, 0x9571BB7B, | |||
| 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358, 0x1700AEAB, 0x5E3CD38C, 0x857854E5, | |||
| 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF, 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, | |||
| 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75, 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, | |||
| 0xFB575855, 0x2013DF3C, 0x692FA21B, 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, | |||
| 0x6228DBE6, 0x2B14A6C1, 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, | |||
| 0xCE589360, 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA, | |||
| 0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4, 0xF24C9B0A, | |||
| 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E, 0x3901F3FD, 0x703D8EDA, | |||
| 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9, 0x7B3AF727, 0x32068A00, 0xE9420D69, | |||
| 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223, 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, | |||
| 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D, 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, | |||
| 0x976D01D9, 0x4C2986B0, 0x0515FB97, 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, | |||
| 0x9D642575, 0xD4585852, 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, | |||
| 0x96635C88, 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6, | |||
| 0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C, 0x230138CF, | |||
| 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB, 0x613A3C15, 0x28064132, | |||
| 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911, 0xA777317B, 0xEE4B4C5C, 0x350FCB35, | |||
| 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F, 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, | |||
| 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5}; | |||
| // Use the 8 table to calc crc32c value | |||
| inline void CRC32T8(uint32 *crc, const uint8_t **p) { | |||
| auto c = static_cast<uint32>(*crc ^ LE_LOAD32(*p)); | |||
| *p += 4; | |||
| *crc = crc_table_o88[c & 0xff] ^ crc_table_o80[(c >> 8) & 0xff] ^ crc_table_o72[(c >> 16) & 0xff] ^ | |||
| crc_table_o64[(c >> 24) & 0xff]; | |||
| c = static_cast<uint32>(LE_LOAD32(*p)); | |||
| *crc = (*crc) ^ crc_table_o56[c & 0xff] ^ crc_table_o48[(c >> 8) & 0xff] ^ crc_table_o40[(c >> 16) & 0xff] ^ | |||
| crc_table_o32[(c >> 24) & 0xff]; | |||
| *p += 4; | |||
| } | |||
| // calc the crc32c value | |||
| uint32 MakeCrc32c(uint32 init_crc, const char *data, size_t size) { | |||
| EXCEPT_CHECK_NULL(data); | |||
| uint32 crc = init_crc ^ 0xffffffffu; | |||
| const unsigned int OFFSET = 8; | |||
| // Get the origin begin and end address(not alignment) | |||
| auto *bp = reinterpret_cast<const uint8_t *>(data); | |||
| const uint8_t *ep = bp + size; | |||
| // Get the alignment address | |||
| // Point x at first 4-byte aligned byte in string. | |||
| // This might be just past the end of the string. | |||
| auto pval = reinterpret_cast<uintptr_t>(bp); | |||
| auto *bp_align = reinterpret_cast<const uint8_t *>(MEM_ALIGN(pval, 2)); | |||
| // process the not alignment bits when size < 4 byte | |||
| if (bp_align <= ep) { | |||
| // Process bytes until finished or p is 4-byte aligned | |||
| while (bp != bp_align) { | |||
| crc = crc_table_o32[(crc & 0xff) ^ (*bp++)] ^ (crc >> 8); | |||
| } | |||
| } | |||
| // Process bytes 8 at a time use the 8 table | |||
| while ((ep - bp) >= OFFSET) { | |||
| CRC32T8(&crc, &bp); | |||
| } | |||
| // Process the last not alignment bytes | |||
| while (bp < ep) { | |||
| crc = crc_table_o32[(crc & 0xff) ^ (*bp++)] ^ (crc >> 8); | |||
| } | |||
| return crc ^ 0xffffffffu; | |||
| } | |||
| @@ -0,0 +1,54 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATAVISUAL_UTILS_CRC32_CRC32_H_ | |||
| #define DATAVISUAL_UTILS_CRC32_CRC32_H_ | |||
| #include <pybind11/pybind11.h> | |||
| #include <stddef.h> | |||
| #include <cstdint> | |||
| #include "crc32/base.h" | |||
| // Align n to (1 << m) byte boundary | |||
| #define MEM_ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1)) | |||
| // Masked for crc. | |||
| static constexpr uint32 kMaskDelta = 0xa282ead8ul; | |||
| // Provide the Crc32c function | |||
| // Calculate the crc32c value, use the 8 table method | |||
| uint32 MakeCrc32c(uint32 init_crc, const char* data, size_t size); | |||
| uint32 GetMaskCrc32cValue(const char* data, size_t n) { | |||
| auto crc = MakeCrc32c(0, data, n); | |||
| return crc; | |||
| } | |||
| uint32 GetValueFromStr(const char* crc_str) { | |||
| uint32 crc = DecodeFixed32(crc_str); | |||
| uint32 rot = crc - kMaskDelta; | |||
| return ((rot >> 17) | (rot << 15)); | |||
| } | |||
| PYBIND11_MODULE(crc32, m) { | |||
| m.doc() = "crc util"; | |||
| m.def("MakeCrc32c", &MakeCrc32c, "A function calculating the crc32c value, use the 8 table method"); | |||
| m.def("GetMaskCrc32cValue", &GetMaskCrc32cValue, "A function return the crc32c value"); | |||
| m.def("GetValueFromStr", &GetValueFromStr, "A function return the crc32c value from string"); | |||
| } | |||
| #endif // DATAVISUAL_UTILS_CRC32_CRC32_H_ | |||
| @@ -0,0 +1,155 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Common Tools.""" | |||
| import imghdr | |||
| import math | |||
| import os | |||
| from numbers import Number | |||
| from urllib.parse import unquote | |||
| from mindinsight.utils import exceptions | |||
| _IMG_EXT_TO_MIMETYPE = { | |||
| 'bmp': 'image/bmp', | |||
| 'gif': 'image/gif', | |||
| 'jpeg': 'image/jpeg', | |||
| 'png': 'image/png', | |||
| } | |||
| _DEFAULT_IMAGE_MIMETYPE = 'application/octet-stream' | |||
| def find_app_package(): | |||
| """Find package in current directory.""" | |||
| backend_dir = os.path.realpath(os.path.join(__file__, os.pardir, os.pardir, os.pardir, "backend")) | |||
| packages = [] | |||
| for file in os.listdir(backend_dir): | |||
| file_path = os.path.join(backend_dir, file) | |||
| if os.path.isfile(file_path): | |||
| continue | |||
| if not os.path.isfile(os.path.join(file_path, '__init__.py')): | |||
| continue | |||
| rel_path = os.path.relpath(file_path, backend_dir) | |||
| package = rel_path.replace(os.path.sep, '.') | |||
| package = f"mindinsight.backend.{package}" | |||
| packages.append(package) | |||
| return packages | |||
| def to_str(bytes_or_text, encode="utf-8"): | |||
| """Bytes transform string.""" | |||
| if isinstance(bytes_or_text, bytes): | |||
| return bytes_or_text.decode(encode) | |||
| if isinstance(bytes_or_text, str): | |||
| return bytes_or_text | |||
| raise TypeError("Param isn't str or bytes type, param={}".format(bytes_or_text)) | |||
| def to_int(param, param_name): | |||
| """ | |||
| Transfer param to int type. | |||
| Args: | |||
| param (Any): A param transformed. | |||
| param_name (str): Param name. | |||
| Returns: | |||
| int, value after transformed. | |||
| """ | |||
| try: | |||
| param = int(param) | |||
| except ValueError: | |||
| raise exceptions.ParamTypeError(param_name, 'Integer') | |||
| return param | |||
| def str_to_bool(param, param_name): | |||
| """ | |||
| Check param and transform it to bool. | |||
| Args: | |||
| param (str): 'true' or 'false' is valid. | |||
| param_name (str): Param name. | |||
| Returns: | |||
| bool, if param is 'true', case insensitive. | |||
| Raises: | |||
| ParamValueError: If the value of param is not 'false' and 'true'. | |||
| """ | |||
| if not isinstance(param, str): | |||
| raise exceptions.ParamTypeError(param_name, 'str') | |||
| if param.lower() not in ['false', 'true']: | |||
| raise exceptions.ParamValueError("The value of %s must be 'false' or 'true'." % param_name) | |||
| param = (param.lower() == 'true') | |||
| return param | |||
| def get_img_mimetype(img_data): | |||
| """ | |||
| Recognize image headers and generate image MIMETYPE. | |||
| Args: | |||
| img_data (bin): Binary character stream of image. | |||
| Returns: | |||
| str, a MIMETYPE of the give image. | |||
| """ | |||
| image_type = imghdr.what(None, img_data) | |||
| mimetype = _IMG_EXT_TO_MIMETYPE.get(image_type, _DEFAULT_IMAGE_MIMETYPE) | |||
| return mimetype | |||
| def get_train_id(request): | |||
| """ | |||
| Get train ID from requst query string and unquote content. | |||
| Args: | |||
| request (FlaskRequest): Http request instance. | |||
| Returns: | |||
| str, unquoted train ID. | |||
| """ | |||
| train_id = request.args.get('train_id') | |||
| if train_id is not None: | |||
| try: | |||
| train_id = unquote(train_id, errors='strict') | |||
| except UnicodeDecodeError: | |||
| raise exceptions.ParamValueError('Unquote error with strict mode') | |||
| return train_id | |||
| def if_nan_inf_to_none(name, value): | |||
| """ | |||
| Transform value to None if it is NaN or Inf. | |||
| Args: | |||
| name (str): Name of value. | |||
| value (float): A number transformed. | |||
| Returns: | |||
| float, if value is NaN or Inf, return None. | |||
| """ | |||
| if not isinstance(value, Number): | |||
| raise exceptions.ParamTypeError(name, 'number') | |||
| if math.isnan(value) or math.isinf(value): | |||
| value = None | |||
| return value | |||
| @@ -0,0 +1,33 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Lineagemgr Module Introduction. | |||
| This module provides Python APIs to collect and query the lineage of models. | |||
| Users can add the TrainLineage/EvalLineage callback to the MindSpore train/eval callback list to | |||
| collect the key parameters and results, such as, the name of the network and optimizer, the | |||
| evaluation metric and results. | |||
| The APIs can be used to get the lineage information of the models. For example, | |||
| what hyperparameter is used in the model training, which model has the highest | |||
| accuracy among all the versions, etc. | |||
| """ | |||
| from mindinsight.lineagemgr.api.model import get_summary_lineage, filter_summary_lineage | |||
| from mindinsight.lineagemgr.common.log import logger | |||
| try: | |||
| from mindinsight.lineagemgr.collection.model.model_lineage import TrainLineage, EvalLineage | |||
| except (ModuleNotFoundError, NameError, ImportError): | |||
| logger.warning('Not found MindSpore!') | |||
| __all__ = ["TrainLineage", "EvalLineage", "get_summary_lineage", "filter_summary_lineage"] | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,292 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This file is used to define the model lineage python api.""" | |||
| import os | |||
| from mindinsight.lineagemgr.common.exceptions.exceptions import LineageParamValueError, \ | |||
| LineageFileNotFoundError, LineageQuerySummaryDataError, LineageParamSummaryPathError, \ | |||
| LineageQuerierParamException, LineageDirNotExistError, LineageSearchConditionParamError, \ | |||
| LineageParamTypeError, LineageSummaryParseException | |||
| from mindinsight.lineagemgr.common.log import logger as log | |||
| from mindinsight.lineagemgr.common.path_parser import SummaryPathParser | |||
| from mindinsight.lineagemgr.common.validator.model_parameter import SearchModelConditionParameter | |||
| from mindinsight.lineagemgr.common.validator.validate import validate_filter_key | |||
| from mindinsight.lineagemgr.common.validator.validate import validate_search_model_condition, \ | |||
| validate_condition, validate_path | |||
| from mindinsight.lineagemgr.querier.querier import Querier | |||
| from mindinsight.utils.exceptions import MindInsightException | |||
| def get_summary_lineage(summary_dir, keys=None): | |||
| """ | |||
| Get the lineage information according to summary directory and keys. | |||
| The function queries lineage information of single train process | |||
| corresponding to the given summary directory. Users can query the | |||
| information according to `keys`. | |||
| Args: | |||
| summary_dir (str): The summary directory. It contains summary logs for | |||
| one training. | |||
| keys (list[str]): The filter keys of lineage information. The acceptable | |||
| keys are `metric`, `hyper_parameters`, `algorithm`, `train_dataset`, | |||
| `model`, `valid_dataset` and `dataset_graph`. If it is `None`, all | |||
| information will be returned. Default: None. | |||
| Returns: | |||
| dict, the lineage information for one training. | |||
| Raises: | |||
| LineageParamSummaryPathError: If summary path is invalid. | |||
| LineageQuerySummaryDataError: If querying summary data fails. | |||
| LineageFileNotFoundError: If the summary log file is not found. | |||
| Examples: | |||
| >>> summary_dir = "/path/to/summary" | |||
| >>> summary_lineage_info = get_summary_lineage(summary_dir) | |||
| >>> hyper_parameters = get_summary_lineage(summary_dir, keys=["hyper_parameters"]) | |||
| """ | |||
| try: | |||
| summary_dir = validate_path(summary_dir) | |||
| except MindInsightException as error: | |||
| log.error(str(error)) | |||
| log.exception(error) | |||
| raise LineageParamSummaryPathError(str(error.message)) | |||
| if keys is not None: | |||
| validate_filter_key(keys) | |||
| summary_path = SummaryPathParser.get_latest_lineage_summary(summary_dir) | |||
| if summary_path is None: | |||
| log.error('There is no summary log file under summary_dir.') | |||
| raise LineageFileNotFoundError( | |||
| 'There is no summary log file under summary_dir.' | |||
| ) | |||
| try: | |||
| result = Querier(summary_path).get_summary_lineage( | |||
| summary_dir, filter_keys=keys) | |||
| except LineageSummaryParseException: | |||
| return {} | |||
| except (LineageQuerierParamException, LineageParamTypeError) as error: | |||
| log.error(str(error)) | |||
| log.exception(error) | |||
| raise LineageQuerySummaryDataError("Get summary lineage failed.") | |||
| return result[0] | |||
| def filter_summary_lineage(summary_base_dir, search_condition=None): | |||
| """ | |||
| Filter the lineage information under summary base directory according to search condition. | |||
| Users can filter and sort all lineage information according to the search | |||
| condition. The supported filter fields include `summary_dir`, `network`, | |||
| etc. The filter conditions include `eq`, `lt`, `gt`, `le`, `ge` and `in`. | |||
| At the same time, the combined use of these fields and conditions is | |||
| supported. If you want to sort based on filter fields, the field of | |||
| `sorted_name` and `sorted_type` should be specified. | |||
| Users can use `lineage_type` to decide what kind of lineage information to | |||
| query. If the `lineage_type` is `dataset`, the query result is only the | |||
| lineage information related to data augmentation. If the `lineage_type` is | |||
| `model` or `None`, the query result is all lineage information. | |||
| Users can paginate query result based on `offset` and `limit`. The `offset` | |||
| refers to page number. The `limit` refers to the number in one page. | |||
| Args: | |||
| summary_base_dir (str): The summary base directory. It contains summary | |||
| directories generated by training. | |||
| search_condition (dict): The search condition. When filtering and | |||
| sorting, in addition to the following supported fields, fields | |||
| prefixed with `metric_` are also supported. The fields prefixed with | |||
| `metric_` are related to the `metrics` parameter in the training | |||
| script. For example, if the key of `metrics` parameter is | |||
| `accuracy`, the field should be `metric_accuracy`. Default: None. | |||
| - summary_dir (dict): The filter condition of summary directory. | |||
| - loss_function (dict): The filter condition of loss function. | |||
| - train_dataset_path (dict): The filter condition of train dataset path. | |||
| - train_dataset_count (dict): The filter condition of train dataset count. | |||
| - test_dataset_path (dict): The filter condition of test dataset path. | |||
| - test_dataset_count (dict): The filter condition of test dataset count. | |||
| - network (dict): The filter condition of network. | |||
| - optimizer (dict): The filter condition of optimizer. | |||
| - learning_rate (dict): The filter condition of learning rate. | |||
| - epoch (dict): The filter condition of epoch. | |||
| - batch_size (dict): The filter condition of batch size. | |||
| - loss (dict): The filter condition of loss. | |||
| - model_size (dict): The filter condition of model size. | |||
| - dataset_mark (dict): The filter condition of dataset mark. | |||
| - offset (int): Page number, the value range is [0, 100000]. | |||
| - limit (int): The number in one page, the value range is [1, 100]. | |||
| - sorted_name (str): Specify which field to sort by. | |||
| - sorted_type (str): Specify sort order. It can be `ascending` or | |||
| `descending`. | |||
| - lineage_type (str): It decides what kind of lineage information to | |||
| query. It can be `dataset` or `model`. If it is `dataset`, | |||
| the query result is only the lineage information related to data | |||
| augmentation. If it is `model` or `None`, the query result is all | |||
| lineage information. | |||
| Returns: | |||
| dict, all lineage information under summary base directory according to | |||
| search condition. | |||
| Raises: | |||
| LineageSearchConditionParamError: If search_condition param is invalid. | |||
| LineageParamSummaryPathError: If summary path is invalid. | |||
| LineageFileNotFoundError: If the summary log file is not found. | |||
| LineageQuerySummaryDataError: If querying summary log file data fails. | |||
| Examples: | |||
| >>> summary_base_dir = "/path/to/summary_base" | |||
| >>> search_condition = { | |||
| >>> 'summary_dir': { | |||
| >>> 'in': [ | |||
| >>> os.path.join(summary_base_dir, 'summary_1'), | |||
| >>> os.path.join(summary_base_dir, 'summary_2'), | |||
| >>> os.path.join(summary_base_dir, 'summary_3') | |||
| >>> ] | |||
| >>> }, | |||
| >>> 'loss': { | |||
| >>> 'gt': 2.0 | |||
| >>> }, | |||
| >>> 'batch_size': { | |||
| >>> 'ge': 128, | |||
| >>> 'le': 256 | |||
| >>> }, | |||
| >>> 'metric_accuracy': { | |||
| >>> 'lt': 0.1 | |||
| >>> }, | |||
| >>> 'sorted_name': 'summary_dir', | |||
| >>> 'sorted_type': 'descending', | |||
| >>> 'limit': 3, | |||
| >>> 'offset': 0, | |||
| >>> 'lineage_type': 'model' | |||
| >>> } | |||
| >>> summary_lineage = filter_summary_lineage(summary_base_dir) | |||
| >>> summary_lineage_filter = filter_summary_lineage(summary_base_dir, search_condition) | |||
| """ | |||
| try: | |||
| summary_base_dir = validate_path(summary_base_dir) | |||
| except (LineageParamValueError, LineageDirNotExistError) as error: | |||
| log.error(str(error)) | |||
| log.exception(error) | |||
| raise LineageParamSummaryPathError(str(error.message)) | |||
| search_condition = {} if search_condition is None else search_condition | |||
| try: | |||
| validate_condition(search_condition) | |||
| validate_search_model_condition(SearchModelConditionParameter, search_condition) | |||
| except MindInsightException as error: | |||
| log.error(str(error)) | |||
| log.exception(error) | |||
| raise LineageSearchConditionParamError(str(error.message)) | |||
| try: | |||
| search_condition = _convert_relative_path_to_abspath(summary_base_dir, search_condition) | |||
| except (LineageParamValueError, LineageDirNotExistError) as error: | |||
| log.error(str(error)) | |||
| log.exception(error) | |||
| raise LineageParamSummaryPathError(str(error.message)) | |||
| summary_path = SummaryPathParser.get_latest_lineage_summaries(summary_base_dir) | |||
| if not summary_path: | |||
| log.error('There is no summary log file under summary_base_dir.') | |||
| raise LineageFileNotFoundError( | |||
| 'There is no summary log file under summary_base_dir.' | |||
| ) | |||
| try: | |||
| result = Querier(summary_path).filter_summary_lineage( | |||
| condition=search_condition | |||
| ) | |||
| except LineageSummaryParseException: | |||
| result = {'object': [], 'count': 0} | |||
| except (LineageQuerierParamException, LineageParamTypeError) as error: | |||
| log.error(str(error)) | |||
| log.exception(error) | |||
| raise LineageQuerySummaryDataError("Filter summary lineage failed.") | |||
| return result | |||
| def _convert_relative_path_to_abspath(summary_base_dir, search_condition): | |||
| """ | |||
| Convert relative path to absolute path. | |||
| Args: | |||
| summary_base_dir (str): The summary base directory. | |||
| search_condition (dict): The search condition. | |||
| Returns: | |||
| dict, the updated search_condition. | |||
| Raises: | |||
| LineageParamValueError: If the value of input_name is invalid. | |||
| """ | |||
| if ("summary_dir" not in search_condition) or (not search_condition.get("summary_dir")): | |||
| return search_condition | |||
| summary_dir_condition = search_condition.get("summary_dir") | |||
| if not set(summary_dir_condition.keys()).issubset(['in', 'eq']): | |||
| raise LineageParamValueError("Invalid operation of summary dir.") | |||
| if 'in' in summary_dir_condition: | |||
| summary_paths = [] | |||
| for summary_dir in summary_dir_condition.get('in'): | |||
| if summary_dir.startswith('./'): | |||
| abs_dir = os.path.join( | |||
| summary_base_dir, summary_dir[2:] | |||
| ) | |||
| abs_dir = validate_path(abs_dir) | |||
| else: | |||
| abs_dir = validate_path(summary_dir) | |||
| summary_paths.append(abs_dir) | |||
| search_condition.get('summary_dir')['in'] = summary_paths | |||
| if 'eq' in summary_dir_condition: | |||
| summary_dir = summary_dir_condition.get('eq') | |||
| if summary_dir.startswith('./'): | |||
| abs_dir = os.path.join( | |||
| summary_base_dir, summary_dir[2:] | |||
| ) | |||
| abs_dir = validate_path(abs_dir) | |||
| else: | |||
| abs_dir = validate_path(summary_dir) | |||
| search_condition.get('summary_dir')['eq'] = abs_dir | |||
| return search_condition | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,37 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Metadata of lineage collection.""" | |||
| class Metadata: | |||
| """Initialize parameters used in model lineage management.""" | |||
| train_dataset_path = 'train_dataset_path' | |||
| valid_dataset_path = 'valid_dataset_path' | |||
| train_network = 'train_network' | |||
| loss_function = 'loss_function' | |||
| loss = 'loss' | |||
| optimizer = 'optimizer' | |||
| learning_rate = 'learning_rate' | |||
| epoch = 'epoch' | |||
| step_num = 'step_num' | |||
| parallel_mode = 'parallel_mode' | |||
| device_num = 'device_num' | |||
| batch_size = 'batch_size' | |||
| model_path = 'model_path' | |||
| model_ckpt = 'model_ckpt' | |||
| model_size = 'model_size' | |||
| metrics = 'metrics' | |||
| train_dataset_size = 'train_dataset_size' | |||
| valid_dataset_size = 'valid_dataset_size' | |||
| @@ -0,0 +1,621 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This module is used to collect lineage information of model training.""" | |||
| import json | |||
| import os | |||
| import numpy as np | |||
| from mindinsight.lineagemgr.summary.summary_record import LineageSummary | |||
| from mindinsight.utils.exceptions import \ | |||
| MindInsightException | |||
| from mindinsight.lineagemgr.common.validator.validate import validate_train_run_context, \ | |||
| validate_eval_run_context, validate_file_path, validate_network, \ | |||
| validate_int_params, validate_summary_record, validate_raise_exception | |||
| from mindinsight.lineagemgr.common.exceptions.error_code import LineageErrors, LineageErrorMsg | |||
| from mindinsight.lineagemgr.common.exceptions.exceptions import LineageParamRunContextError, \ | |||
| LineageGetModelFileError, LineageLogError | |||
| from mindinsight.lineagemgr.common.log import logger as log | |||
| from mindinsight.lineagemgr.common.utils import try_except | |||
| from mindinsight.lineagemgr.common.validator.model_parameter import RunContextArgs, \ | |||
| EvalParameter | |||
| from mindinsight.lineagemgr.collection.model.base import Metadata | |||
| try: | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.train.callback import Callback, RunContext, ModelCheckpoint, SummaryStep | |||
| from mindspore.nn import Cell, Optimizer, WithLossCell, TrainOneStepWithLossScaleCell | |||
| from mindspore.nn.loss.loss import _Loss | |||
| from mindspore.dataset.engine import Dataset, MindDataset | |||
| import mindspore.dataset as ds | |||
| except (ImportError, ModuleNotFoundError): | |||
| log.warning('MindSpore Not Found!') | |||
| class TrainLineage(Callback): | |||
| """ | |||
| Collect lineage of a training job. | |||
| Args: | |||
| summary_record (SummaryRecord): SummaryRecord is used to record | |||
| the summary value, and summary_record is an instance of SummaryRecord, | |||
| see mindspore.train.summary.SummaryRecord. | |||
| raise_exception (bool): Whether to raise exception when error occurs in | |||
| TrainLineage. If True, raise exception. If False, catch exception | |||
| and continue. Default: False. | |||
| Raises: | |||
| MindInsightException: If validating parameter fails. | |||
| LineageLogError: If recording lineage information fails. | |||
| Examples: | |||
| >>> from mindinsight.lineagemgr import TrainLineage | |||
| >>> from mindspore.train.callback import ModelCheckpoint, SummaryStep | |||
| >>> from mindspore.train.summary import SummaryRecord | |||
| >>> model = Model(train_network) | |||
| >>> model_ckpt = ModelCheckpoint(directory='/dir/to/save/model/') | |||
| >>> summary_writer = SummaryRecord(log_dir='./') | |||
| >>> summary_callback = SummaryStep(summary_writer, flush_step=2) | |||
| >>> lineagemgr = TrainLineage(summary_record=summary_writer) | |||
| >>> model.train(epoch_num, dataset, callbacks=[model_ckpt, summary_callback, lineagemgr]) | |||
| """ | |||
| def __init__(self, summary_record, raise_exception=False): | |||
| super(TrainLineage, self).__init__() | |||
| try: | |||
| validate_raise_exception(raise_exception) | |||
| self.raise_exception = raise_exception | |||
| validate_summary_record(summary_record) | |||
| self.summary_record = summary_record | |||
| summary_log_path = summary_record.full_file_name | |||
| validate_file_path(summary_log_path) | |||
| self.lineage_log_path = summary_log_path + '_lineage' | |||
| self.initial_learning_rate = None | |||
| except MindInsightException as err: | |||
| log.error(err) | |||
| if raise_exception: | |||
| raise | |||
| @try_except(log) | |||
| def begin(self, run_context): | |||
| """ | |||
| Initialize the training progress when the training job begins. | |||
| Args: | |||
| run_context (RunContext): It contains all lineage information, | |||
| see mindspore.train.callback.RunContext. | |||
| Raises: | |||
| MindInsightException: If validating parameter fails. | |||
| """ | |||
| log.info('Initialize training lineage collection...') | |||
| if not isinstance(run_context, RunContext): | |||
| error_msg = f'Invalid TrainLineage run_context.' | |||
| log.error(error_msg) | |||
| raise LineageParamRunContextError(error_msg) | |||
| run_context_args = run_context.original_args() | |||
| if not self.initial_learning_rate: | |||
| optimizer = run_context_args.get('optimizer') | |||
| if optimizer and not isinstance(optimizer, Optimizer): | |||
| log.error("The parameter optimizer is invalid. It should be an instance of " | |||
| "mindspore.nn.optim.optimizer.Optimizer.") | |||
| raise MindInsightException(error=LineageErrors.PARAM_OPTIMIZER_ERROR, | |||
| message=LineageErrorMsg.PARAM_OPTIMIZER_ERROR.value) | |||
| if optimizer: | |||
| log.info('Obtaining initial learning rate...') | |||
| self.initial_learning_rate = AnalyzeObject.analyze_optimizer(optimizer) | |||
| log.debug('initial_learning_rate: %s', self.initial_learning_rate) | |||
| else: | |||
| network = run_context_args.get('train_network') | |||
| validate_network(network) | |||
| optimizer = AnalyzeObject.get_optimizer_by_network(network) | |||
| self.initial_learning_rate = AnalyzeObject.analyze_optimizer(optimizer) | |||
| log.debug('initial_learning_rate: %s', self.initial_learning_rate) | |||
| # get train dataset graph | |||
| train_dataset = run_context_args.get('train_dataset') | |||
| dataset_graph_dict = ds.serialize(train_dataset) | |||
| dataset_graph_json_str = json.dumps(dataset_graph_dict, indent=2) | |||
| dataset_graph_dict = json.loads(dataset_graph_json_str) | |||
| log.info('Logging dataset graph...') | |||
| try: | |||
| lineage_summary = LineageSummary(self.lineage_log_path) | |||
| lineage_summary.record_dataset_graph(dataset_graph=dataset_graph_dict) | |||
| except Exception as error: | |||
| error_msg = f'Dataset graph log error in TrainLineage begin: {error}' | |||
| log.error(error_msg) | |||
| raise LineageLogError(error_msg) | |||
| log.info('Dataset graph logged successfully.') | |||
| @try_except(log) | |||
| def end(self, run_context): | |||
| """ | |||
| Collect lineage information when the training job ends. | |||
| Args: | |||
| run_context (RunContext): It contains all lineage information, | |||
| see mindspore.train.callback.RunContext. | |||
| Raises: | |||
| LineageLogError: If recording lineage information fails. | |||
| """ | |||
| log.info('Start to collect training lineage...') | |||
| if not isinstance(run_context, RunContext): | |||
| error_msg = f'Invalid TrainLineage run_context.' | |||
| log.error(error_msg) | |||
| raise LineageParamRunContextError(error_msg) | |||
| run_context_args = run_context.original_args() | |||
| validate_train_run_context(RunContextArgs, run_context_args) | |||
| train_lineage = dict() | |||
| train_lineage = AnalyzeObject.get_network_args( | |||
| run_context_args, train_lineage | |||
| ) | |||
| train_dataset = run_context_args.get('train_dataset') | |||
| callbacks = run_context_args.get('list_callback') | |||
| list_callback = getattr(callbacks, '_callbacks', []) | |||
| log.info('Obtaining model files...') | |||
| ckpt_file_path, _ = AnalyzeObject.get_file_path(list_callback) | |||
| train_lineage[Metadata.learning_rate] = self.initial_learning_rate | |||
| train_lineage[Metadata.epoch] = run_context_args.get('epoch_num') | |||
| train_lineage[Metadata.step_num] = run_context_args.get('cur_step_num') | |||
| train_lineage[Metadata.parallel_mode] = run_context_args.get('parallel_mode') | |||
| train_lineage[Metadata.device_num] = run_context_args.get('device_number') | |||
| train_lineage[Metadata.batch_size] = run_context_args.get('batch_num') | |||
| model_path_dict = { | |||
| 'ckpt': ckpt_file_path | |||
| } | |||
| train_lineage[Metadata.model_path] = json.dumps(model_path_dict) | |||
| log.info('Calculating model size...') | |||
| train_lineage[Metadata.model_size] = AnalyzeObject.get_model_size( | |||
| ckpt_file_path | |||
| ) | |||
| log.debug('model_size: %s', train_lineage[Metadata.model_size]) | |||
| log.info('Analyzing dataset object...') | |||
| train_lineage = AnalyzeObject.analyze_dataset(train_dataset, train_lineage, 'train') | |||
| log.info('Logging lineage information...') | |||
| try: | |||
| lineage_summary = LineageSummary(self.lineage_log_path) | |||
| lineage_summary.record_train_lineage(train_lineage) | |||
| except IOError as error: | |||
| error_msg = f'End error in TrainLineage: {error}' | |||
| log.error(error_msg) | |||
| raise LineageLogError(error_msg) | |||
| except Exception as error: | |||
| error_msg = f'End error in TrainLineage: {error}' | |||
| log.error(error_msg) | |||
| log.error('Fail to log the lineage of the training job.') | |||
| raise LineageLogError(error_msg) | |||
| log.info('The lineage of the training job has logged successfully.') | |||
| class EvalLineage(Callback): | |||
| """ | |||
| Collect lineage of an evaluation job. | |||
| Args: | |||
| summary_record (SummaryRecord): SummaryRecord is used to record | |||
| the summary value, and summary_record is an instance of SummaryRecord, | |||
| see mindspore.train.summary.SummaryRecord. | |||
| raise_exception (bool): Whether to raise exception when error occurs in | |||
| EvalLineage. If True, raise exception. If False, catch exception | |||
| and continue. Default: False. | |||
| Raises: | |||
| MindInsightException: If validating parameter fails. | |||
| LineageLogError: If recording lineage information fails. | |||
| Examples: | |||
| >>> from mindinsight.lineagemgr import EvalLineage | |||
| >>> from mindspore.train.callback import ModelCheckpoint, SummaryStep | |||
| >>> from mindspore.train.summary import SummaryRecord | |||
| >>> model = Model(train_network) | |||
| >>> model_ckpt = ModelCheckpoint(directory='/dir/to/save/model/') | |||
| >>> summary_writer = SummaryRecord(log_dir='./') | |||
| >>> summary_callback = SummaryStep(summary_writer, flush_step=2) | |||
| >>> lineagemgr = EvalLineage(summary_record=summary_writer) | |||
| >>> model.eval(epoch_num, dataset, callbacks=[model_ckpt, summary_callback, lineagemgr]) | |||
| """ | |||
| def __init__(self, summary_record, raise_exception=False): | |||
| super(EvalLineage, self).__init__() | |||
| try: | |||
| validate_raise_exception(raise_exception) | |||
| self.raise_exception = raise_exception | |||
| validate_summary_record(summary_record) | |||
| self.summary_record = summary_record | |||
| summary_log_path = summary_record.full_file_name | |||
| validate_file_path(summary_log_path) | |||
| self.lineage_log_path = summary_log_path + '_lineage' | |||
| except MindInsightException as err: | |||
| log.error(err) | |||
| if raise_exception: | |||
| raise | |||
| @try_except(log) | |||
| def end(self, run_context): | |||
| """ | |||
| Collect lineage information when the training job ends. | |||
| Args: | |||
| run_context (RunContext): It contains all lineage information, | |||
| see mindspore.train.callback.RunContext. | |||
| Raises: | |||
| MindInsightException: If validating parameter fails. | |||
| LineageLogError: If recording lineage information fails. | |||
| """ | |||
| if not isinstance(run_context, RunContext): | |||
| error_msg = f'Invalid EvalLineage run_context.' | |||
| log.error(error_msg) | |||
| raise LineageParamRunContextError(error_msg) | |||
| run_context_args = run_context.original_args() | |||
| validate_eval_run_context(EvalParameter, run_context_args) | |||
| valid_dataset = run_context_args.get('valid_dataset') | |||
| eval_lineage = dict() | |||
| metrics = run_context_args.get('metrics') | |||
| eval_lineage[Metadata.metrics] = json.dumps(metrics) | |||
| eval_lineage[Metadata.step_num] = run_context_args.get('cur_step_num') | |||
| log.info('Analyzing dataset object...') | |||
| eval_lineage = AnalyzeObject.analyze_dataset(valid_dataset, eval_lineage, 'valid') | |||
| log.info('Logging evaluation job lineage...') | |||
| try: | |||
| lineage_summary = LineageSummary(self.lineage_log_path) | |||
| lineage_summary.record_evaluation_lineage(eval_lineage) | |||
| except IOError as error: | |||
| error_msg = f'End error in EvalLineage: {error}' | |||
| log.error(error_msg) | |||
| log.error('Fail to log the lineage of the evaluation job.') | |||
| raise LineageLogError(error_msg) | |||
| except Exception as error: | |||
| error_msg = f'End error in EvalLineage: {error}' | |||
| log.error(error_msg) | |||
| log.error('Fail to log the lineage of the evaluation job.') | |||
| raise LineageLogError(error_msg) | |||
| log.info('The lineage of the evaluation job has logged successfully.') | |||
| class AnalyzeObject: | |||
| """Analyze class object in MindSpore.""" | |||
| @staticmethod | |||
| def get_optimizer_by_network(network): | |||
| """ | |||
| Get optimizer by analyzing network. | |||
| Args: | |||
| network (Cell): See mindspore.nn.Cell. | |||
| Returns: | |||
| Optimizer, an Optimizer object. | |||
| """ | |||
| optimizer = None | |||
| net_args = vars(network) if network else {} | |||
| net_cell = net_args.get('_cells') if net_args else {} | |||
| for _, value in net_cell.items(): | |||
| if isinstance(value, Optimizer): | |||
| optimizer = value | |||
| break | |||
| return optimizer | |||
| @staticmethod | |||
| def get_loss_fn_by_network(network): | |||
| """ | |||
| Get loss function by analyzing network. | |||
| Args: | |||
| network (Cell): See mindspore.nn.Cell. | |||
| Returns: | |||
| Loss_fn, a Cell object. | |||
| """ | |||
| loss_fn = None | |||
| inner_cell_list = [] | |||
| net_args = vars(network) if network else {} | |||
| net_cell = net_args.get('_cells') if net_args else {} | |||
| for _, value in net_cell.items(): | |||
| if isinstance(value, Cell) and \ | |||
| not isinstance(value, Optimizer): | |||
| inner_cell_list.append(value) | |||
| while inner_cell_list: | |||
| inner_net_args = vars(inner_cell_list[0]) | |||
| inner_net_cell = inner_net_args.get('_cells') | |||
| for value in inner_net_cell.values(): | |||
| if isinstance(value, _Loss): | |||
| loss_fn = value | |||
| break | |||
| if isinstance(value, Cell): | |||
| inner_cell_list.append(value) | |||
| if loss_fn: | |||
| break | |||
| inner_cell_list.pop(0) | |||
| return loss_fn | |||
| @staticmethod | |||
| def get_backbone_network(network): | |||
| """ | |||
| Get the name of backbone network. | |||
| Args: | |||
| network (Cell): The train network. | |||
| Returns: | |||
| str, the name of the backbone network. | |||
| """ | |||
| with_loss_cell = False | |||
| backbone = None | |||
| net_args = vars(network) if network else {} | |||
| net_cell = net_args.get('_cells') if net_args else {} | |||
| for _, value in net_cell.items(): | |||
| if isinstance(value, WithLossCell): | |||
| backbone = getattr(value, '_backbone') | |||
| with_loss_cell = True | |||
| break | |||
| if with_loss_cell: | |||
| backbone_name = type(backbone).__name__ \ | |||
| if backbone else None | |||
| elif isinstance(network, TrainOneStepWithLossScaleCell): | |||
| backbone = getattr(network, 'network') | |||
| backbone_name = type(backbone).__name__ \ | |||
| if backbone else None | |||
| else: | |||
| backbone_name = type(network).__name__ \ | |||
| if network else None | |||
| return backbone_name | |||
| @staticmethod | |||
| def analyze_optimizer(optimizer): | |||
| """ | |||
| Analyze Optimizer, a Cell object of MindSpore. | |||
| In this way, we can obtain the following attributes: | |||
| learning_rate (float), | |||
| weight_decay (float), | |||
| momentum (float), | |||
| weights (float). | |||
| Args: | |||
| optimizer (Optimizer): See mindspore.nn.optim.Optimizer. | |||
| Returns: | |||
| float, the learning rate that the optimizer adopted. | |||
| """ | |||
| learning_rate = None | |||
| if isinstance(optimizer, Optimizer): | |||
| learning_rate = getattr(optimizer, 'learning_rate', None) | |||
| if learning_rate: | |||
| learning_rate = learning_rate.default_input | |||
| # Get the real learning rate value | |||
| if isinstance(learning_rate, Tensor): | |||
| learning_rate = learning_rate.asnumpy() | |||
| if learning_rate.ndim == 0: | |||
| learning_rate = np.atleast_1d(learning_rate) | |||
| learning_rate = list(learning_rate) | |||
| elif isinstance(learning_rate, float): | |||
| learning_rate = [learning_rate] | |||
| return learning_rate[0] if learning_rate else None | |||
| @staticmethod | |||
| def analyze_dataset(dataset, lineage_dict, dataset_type): | |||
| """ | |||
| Analyze Dataset, a Dataset object of MindSpore. | |||
| In this way, we can obtain the following attributes: | |||
| dataset_path (str), | |||
| train_dataset_size (int), | |||
| valid_dataset_size (int), | |||
| batch_size (int) | |||
| Args: | |||
| dataset (Dataset): See mindspore.dataengine.datasets.Dataset. | |||
| lineage_dict (dict): A dict contains lineage metadata. | |||
| dataset_type (str): Dataset type, train or valid. | |||
| Returns: | |||
| dict, the lineage metadata. | |||
| """ | |||
| dataset_batch_size = dataset.get_dataset_size() | |||
| if dataset_batch_size is not None: | |||
| validate_int_params(dataset_batch_size, 'dataset_batch_size') | |||
| log.debug('dataset_batch_size: %d', dataset_batch_size) | |||
| dataset_path = AnalyzeObject.get_dataset_path_wrapped(dataset) | |||
| if dataset_path: | |||
| dataset_path = '/'.join(dataset_path.split('/')[:-1]) | |||
| step_num = lineage_dict.get('step_num') | |||
| validate_int_params(step_num, 'step_num') | |||
| log.debug('step_num: %d', step_num) | |||
| if dataset_type == 'train': | |||
| lineage_dict[Metadata.train_dataset_path] = dataset_path | |||
| epoch = lineage_dict.get('epoch') | |||
| train_dataset_size = dataset_batch_size * (step_num / epoch) | |||
| lineage_dict[Metadata.train_dataset_size] = int(train_dataset_size) | |||
| elif dataset_type == 'valid': | |||
| lineage_dict[Metadata.valid_dataset_path] = dataset_path | |||
| lineage_dict[Metadata.valid_dataset_size] = dataset_batch_size * step_num | |||
| return lineage_dict | |||
| def get_dataset_path(self, output_dataset): | |||
| """ | |||
| Get dataset path of MindDataset object. | |||
| Args: | |||
| output_dataset (Union[MindDataset, Dataset]): See | |||
| mindspore.dataengine.datasets.Dataset. | |||
| Returns: | |||
| str, dataset path. | |||
| """ | |||
| if isinstance(output_dataset, MindDataset): | |||
| return output_dataset.dataset_file | |||
| return self.get_dataset_path(output_dataset.input[0]) | |||
| @staticmethod | |||
| def get_dataset_path_wrapped(dataset): | |||
| """ | |||
| A wrapper for obtaining dataset path. | |||
| Args: | |||
| dataset (Union[MindDataset, Dataset]): See | |||
| mindspore.dataengine.datasets.Dataset. | |||
| Returns: | |||
| str, dataset path. | |||
| """ | |||
| dataset_path = None | |||
| if isinstance(dataset, Dataset): | |||
| try: | |||
| dataset_path = AnalyzeObject().get_dataset_path(dataset) | |||
| except IndexError: | |||
| dataset_path = None | |||
| validate_file_path(dataset_path, allow_empty=True) | |||
| return dataset_path | |||
| @staticmethod | |||
| def get_file_path(list_callback): | |||
| """ | |||
| Get ckpt_file_name and summary_log_path from MindSpore callback list. | |||
| Args: | |||
| list_callback (list[Callback]): The MindSpore training Callback list. | |||
| Returns: | |||
| tuple, contains ckpt_file_name and summary_log_path. | |||
| """ | |||
| ckpt_file_path = None | |||
| summary_log_path = None | |||
| for callback in list_callback: | |||
| if isinstance(callback, ModelCheckpoint): | |||
| ckpt_file_path = callback.latest_ckpt_file_name | |||
| if isinstance(callback, SummaryStep): | |||
| summary_log_path = callback.summary_file_name | |||
| if ckpt_file_path: | |||
| validate_file_path(ckpt_file_path) | |||
| ckpt_file_path = os.path.realpath(ckpt_file_path) | |||
| if summary_log_path: | |||
| validate_file_path(summary_log_path) | |||
| summary_log_path = os.path.realpath(summary_log_path) | |||
| return ckpt_file_path, summary_log_path | |||
| @staticmethod | |||
| def get_file_size(file_path): | |||
| """ | |||
| Get the file size. | |||
| Args: | |||
| file_path (str): The file path. | |||
| Returns: | |||
| int, the file size. | |||
| """ | |||
| try: | |||
| return os.path.getsize(file_path) | |||
| except (OSError, IOError) as error: | |||
| error_msg = f"Error when get model file size: {error}" | |||
| log.error(error_msg) | |||
| raise LineageGetModelFileError(error_msg) | |||
| @staticmethod | |||
| def get_model_size(ckpt_file_path): | |||
| """ | |||
| Get model the total size of the model file and the checkpoint file. | |||
| Args: | |||
| ckpt_file_path (str): The checkpoint file path. | |||
| Returns: | |||
| int, the total file size. | |||
| """ | |||
| if ckpt_file_path: | |||
| ckpt_file_path = os.path.realpath(ckpt_file_path) | |||
| ckpt_file_size = AnalyzeObject.get_file_size(ckpt_file_path) | |||
| else: | |||
| ckpt_file_size = 0 | |||
| return ckpt_file_size | |||
| @staticmethod | |||
| def get_network_args(run_context_args, train_lineage): | |||
| """ | |||
| Get the parameters related to the network, | |||
| such as optimizer, loss function. | |||
| Args: | |||
| run_context_args (dict): It contains all information of the training job. | |||
| train_lineage (dict): A dict contains lineage metadata. | |||
| Returns: | |||
| dict, the lineage metadata. | |||
| """ | |||
| network = run_context_args.get('train_network') | |||
| validate_network(network) | |||
| optimizer = run_context_args.get('optimizer') | |||
| if not optimizer: | |||
| optimizer = AnalyzeObject.get_optimizer_by_network(network) | |||
| loss_fn = run_context_args.get('loss_fn') | |||
| if not loss_fn: | |||
| loss_fn = AnalyzeObject.get_loss_fn_by_network(network) | |||
| loss = None | |||
| else: | |||
| loss = run_context_args.get('net_outputs') | |||
| if loss: | |||
| log.info('Calculating loss...') | |||
| loss_numpy = loss.asnumpy() | |||
| loss = float(np.atleast_1d(loss_numpy)[0]) | |||
| log.debug('loss: %s', loss) | |||
| train_lineage[Metadata.loss] = loss | |||
| else: | |||
| train_lineage[Metadata.loss] = None | |||
| # Analyze classname of optimizer, loss function and training network. | |||
| train_lineage[Metadata.optimizer] = type(optimizer).__name__ \ | |||
| if optimizer else None | |||
| train_lineage[Metadata.train_network] = AnalyzeObject.get_backbone_network(network) | |||
| train_lineage[Metadata.loss_function] = type(loss_fn).__name__ \ | |||
| if loss_fn else None | |||
| return train_lineage | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,207 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Lineage error code and messages.""" | |||
| from enum import Enum, unique | |||
| from mindinsight.utils.constant import LineageMgrErrors as LineageErrorCodes | |||
| _PARAM_ERROR_MASK = 0b00001 << 7 | |||
| _MINDSPORE_COLLECTOR_ERROR = 0b00011 << 7 | |||
| _MODEL_LINEAGE_API_ERROR_MASK = 0b00100 << 7 | |||
| _DATASET_COLLECTOR_ERROR_MASK = 0b00101 << 7 | |||
| _DATASET_LINEAGE_ERROR_MASK = 0b00110 << 7 | |||
| _SUMMARY_ANALYZE_ERROR_MASK = 0b00111 << 7 | |||
| _QUERIER_ERROR_MASK = 0b01000 << 7 | |||
| @unique | |||
| class LineageErrors(LineageErrorCodes): | |||
| """Lineage error codes.""" | |||
| PARAM_TYPE_ERROR = 0 | _PARAM_ERROR_MASK | |||
| PARAM_VALUE_ERROR = 1 | _PARAM_ERROR_MASK | |||
| PARAM_MISSING_ERROR = 2 | _PARAM_ERROR_MASK | |||
| PARAM_SUMMARY_RECORD_ERROR = 3 | _PARAM_ERROR_MASK | |||
| PARAM_RAISE_EXCEPTION_ERROR = 4 | _PARAM_ERROR_MASK | |||
| # MindSpore Collector error codes. | |||
| PARAM_RUN_CONTEXT_ERROR = 0 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_OPTIMIZER_ERROR = 1 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_LOSS_FN_ERROR = 2 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_TRAIN_NETWORK_ERROR = 3 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_DATASET_ERROR = 4 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_EPOCH_NUM_ERROR = 5 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_BATCH_NUM_ERROR = 6 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_TRAIN_PARALLEL_ERROR = 7 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_DEVICE_NUMBER_ERROR = 8 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_FILE_PATH_ERROR = 9 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_DATASET_SIZE_ERROR = 10 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_LEARNING_RATE_ERROR = 11 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_EVAL_METRICS_ERROR = 12 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_BATCH_SIZE_ERROR = 13 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_NET_OUTPUTS_ERROR = 14 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_CALLBACK_LIST_ERROR = 15 | _MINDSPORE_COLLECTOR_ERROR | |||
| LINEAGE_GET_MODEL_FILE_ERROR = 16 | _MINDSPORE_COLLECTOR_ERROR | |||
| LOG_LINEAGE_INFO_ERROR = 17 | _MINDSPORE_COLLECTOR_ERROR | |||
| PARAM_STEP_NUM_ERROR = 18 | _MINDSPORE_COLLECTOR_ERROR | |||
| # Model lineage error codes. | |||
| LINEAGE_PARAM_OPERATION_ERROR = 0 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_METRIC_ERROR = 1 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_LOSS_FUNCTION_ERROR = 4 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_TRAIN_DATASET_PATH_ERROR = 5 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_TRAIN_DATASET_COUNT_ERROR = 6 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_TEST_DATASET_PATH_ERROR = 7 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_TEST_DATASET_COUNT_ERROR = 8 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_NETWORK_ERROR = 9 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_OPTIMIZER_ERROR = 10 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_LEARNING_RATE_ERROR = 11 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_EPOCH_ERROR = 12 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_BATCH_SIZE_ERROR = 13 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_NOT_SUPPORT_ERROR = 14 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_LOSS_ERROR = 15 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_MODEL_SIZE_ERROR = 16 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_SUMMARY_DIR_ERROR = 17 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_SORTED_NAME_ERROR = 18 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_SORTED_TYPE_ERROR = 19 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_DIR_NOT_EXIST_ERROR = 20 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_SUMMARY_DATA_ERROR = 21 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_FILE_NOT_FOUND_ERROR = 22 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_SUMMARY_PATH_ERROR = 23 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_SEARCH_CONDITION_PARAM_ERROR = 24 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_LINEAGE_TYPE_ERROR = 25 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| SUMMARY_ANALYZE_ERROR = 0 | _SUMMARY_ANALYZE_ERROR_MASK | |||
| SUMMARY_VERIFICATION_ERROR = 1 | _SUMMARY_ANALYZE_ERROR_MASK | |||
| # Querier error codes. | |||
| EVENT_NOT_EXIST_ERROR = 0 | _QUERIER_ERROR_MASK | |||
| QUERIER_PARAM_ERROR = 1 | _QUERIER_ERROR_MASK | |||
| SUMMARY_PARSE_FAIL_ERROR = 2 | _QUERIER_ERROR_MASK | |||
| EVENT_FIELD_NOT_EXIST_ERROR = 4 | _QUERIER_ERROR_MASK | |||
| @unique | |||
| class LineageErrorMsg(Enum): | |||
| """Lineage error messages.""" | |||
| PARAM_TYPE_ERROR = "TypeError. {}" | |||
| PARAM_VALUE_ERROR = "ValueError. {}" | |||
| PARAM_MISSING_ERROR = "MissingError. {}" | |||
| PARAM_LIMIT_ERROR = "Invalid input limit. 0 < limit <= 100" | |||
| PARAM_OFFSET_ERROR = "Invalid input offset. 0 <= offset <= 100000" | |||
| PARAM_SUMMARY_RECORD_ERROR = "Invalid value for summary_record. It should be an instance of " \ | |||
| "mindspore.train.summary.SummaryRecord" | |||
| PARAM_RAISE_EXCEPTION_ERROR = "Invalid value for raise_exception. It should be True or False." | |||
| # Lineage error messages. | |||
| LINEAGE_PARAM_SUMMARY_PATH_ERROR = "The parameter summary path error: {}" | |||
| LINEAGE_SUMMARY_DATA_ERROR = "Query summary data error: {}" | |||
| LINEAGE_FILE_NOT_FOUND_ERROR = "File not found error: {}" | |||
| LINEAGE_DIR_NOT_EXIST_ERROR = "Dir not exist error: {}" | |||
| LINEAGE_SEARCH_CONDITION_PARAM_ERROR = "Search_condition param error: {}" | |||
| # MindSpore Collector error messages. | |||
| PARAM_RUN_CONTEXT_ERROR = "The parameter run_context is invalid. It should be an instance of " \ | |||
| "mindspore.train.callback.RunContext. {}" | |||
| PARAM_OPTIMIZER_ERROR = "The parameter optimizer is invalid. It should be an instance of " \ | |||
| "mindspore.nn.optim.optimizer.Optimizer." | |||
| PARAM_LOSS_FN_ERROR = "The parameter loss_fn is invalid. It should be a Function." | |||
| PARAM_NET_OUTPUTS_ERROR = "The parameter net_outputs is invalid. It should be a Tensor." | |||
| PARAM_TRAIN_NETWORK_ERROR = "The parameter train_network is invalid. It should be an instance of " \ | |||
| "mindspore.nn.cell.Cell." | |||
| PARAM_EPOCH_NUM_ERROR = "The parameter epoch is invalid. It should be a positive integer." | |||
| PARAM_STEP_NUM_ERROR = "The parameter step_num is invalid. It should be a positive integer." | |||
| PARAM_BATCH_NUM_ERROR = "The parameter batch_num is invalid. It should be a non-negative integer." | |||
| PARAM_TRAIN_PARALLEL_ERROR = "The parameter parallel_mode is invalid. It should be an integer" \ | |||
| "between 0 and 4." | |||
| PARAM_DEVICE_NUMBER_ERROR = "The parameter device_number is invalid. It should be a positive integer." | |||
| PARAM_LEARNING_RATE_ERROR = "The parameter learning_rate is invalid. It should be a float number or " \ | |||
| "an instance of mindspore.common.tensor.Tensor." | |||
| PARAM_EVAL_METRICS_ERROR = "The parameter metrics is invalid. It should be a dictionary." | |||
| PARAM_BATCH_SIZE_ERROR = "The parameter batch_size is invalid. It should be a non-negative integer." | |||
| PARAM_CALLBACK_LIST_ERROR = "The parameter list_callback is invalid. It should be an instance of " \ | |||
| "mindspore.train.callback._ListCallback." | |||
| LINEAGE_GET_MODEL_FILE_ERROR = "Error when get model file size. {}" | |||
| LINEAGE_METRIC_ERROR = "The parameter {} is invalid. " \ | |||
| "It should be a dict and the value should be a float or a integer" | |||
| LINEAGE_COMPARE_OPERATION_ERROR = "The schema error and compare operation should be" \ | |||
| " 'eq', 'lt', 'gt', 'ge', 'le', 'in'." | |||
| LINEAGE_PARAM_SUMMARY_DIR_ERROR = "The parameter summary_dir is invalid. It should be a dict and the value " \ | |||
| "should be a string" | |||
| LINEAGE_TRAIN_DATASET_PATH_ERROR = "The parameter train_dataset_path is invalid." \ | |||
| " It should be a dict and the value should be a string" | |||
| LINEAGE_TRAIN_DATASET_COUNT_ERROR = "The parameter train_dataset_count is invalid. It should be a dict " \ | |||
| "and the value should be a integer between 0 and pow(2, 63) -1" | |||
| LINEAGE_TEST_DATASET_PATH_ERROR = "The parameter test_dataset_path is invalid. " \ | |||
| "It should be a dict and the value should be a string" | |||
| LINEAGE_TEST_DATASET_COUNT_ERROR = "The parameter test_dataset_count is invalid. It should be a dict " \ | |||
| "and the value should be a integer between 0 and pow(2, 63) -1" | |||
| LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string" | |||
| LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. It should be a dict and the value should be a string" | |||
| LINEAGE_LOSS_FUNCTION_ERROR = "The parameter loss_function is invalid. " \ | |||
| "It should be a dict and the value should be a string" | |||
| LINEAGE_LOSS_ERROR = "The parameter loss is invalid. " \ | |||
| "It should be a float." | |||
| LINEAGE_MODEL_SIZE_ERROR = "The parameter model_size is invalid. " \ | |||
| "It should be an integer between 0 and pow(2, 63) -1." | |||
| LINEAGE_LEARNING_RATE_ERROR = "The parameter learning_rate is invalid. " \ | |||
| "It should be a dict and the value should be a float or a integer" | |||
| LINEAGE_PARAM_SORTED_NAME_ERROR = "The parameter sorted_name is invalid. " \ | |||
| "It should be a string." | |||
| LINEAGE_PARAM_SORTED_TYPE_ERROR = "The parameter sorted_type is invalid. " \ | |||
| "It should be a string." | |||
| LINEAGE_PARAM_LINEAGE_TYPE_ERROR = "The parameter lineage_type is invalid. " \ | |||
| "It should be None, 'dataset' or 'model'." | |||
| SUMMARY_ANALYZE_ERROR = "Failed to analyze summary log. {}" | |||
| SUMMARY_VERIFICATION_ERROR = "Verification failed in summary analysis. {}" | |||
| # Querier error codes. | |||
| EVENT_NOT_EXIST_ERROR = "Train and evaluation event not exist in summary log." | |||
| QUERIER_PARAM_ERROR = "Querier param <{}> invalid. {}" | |||
| SUMMARY_PARSE_FAIL_ERROR = "All summary logs parsing failed." | |||
| EVENT_FIELD_NOT_EXIST_ERROR = 'Event field <{}> not exist.' | |||
| LOG_LINEAGE_INFO_ERROR = "Fail to write lineage information into log file. {}" | |||
| @@ -0,0 +1,191 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Definition of error code and relative messages in lineage module.""" | |||
| from mindinsight.utils.exceptions import MindInsightException | |||
| from mindinsight.lineagemgr.common.exceptions.error_code import LineageErrors, LineageErrorMsg | |||
| class LineageParamTypeError(MindInsightException): | |||
| """The parameter type error in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageParamTypeError, self).__init__( | |||
| error=LineageErrors.PARAM_TYPE_ERROR, | |||
| message=LineageErrorMsg.PARAM_TYPE_ERROR.value.format(msg) | |||
| ) | |||
| class LineageParamValueError(MindInsightException): | |||
| """The parameter value error in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageParamValueError, self).__init__( | |||
| error=LineageErrors.PARAM_VALUE_ERROR, | |||
| message=LineageErrorMsg.PARAM_VALUE_ERROR.value.format(msg) | |||
| ) | |||
| class LineageParamMissingError(MindInsightException): | |||
| """The parameter missing error in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageParamMissingError, self).__init__( | |||
| error=LineageErrors.PARAM_MISSING_ERROR, | |||
| message=LineageErrorMsg.PARAM_MISSING_ERROR.value.format(msg) | |||
| ) | |||
| class LineageParamRunContextError(MindInsightException): | |||
| """The input parameter run_context error in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageParamRunContextError, self).__init__( | |||
| error=LineageErrors.PARAM_RUN_CONTEXT_ERROR, | |||
| message=LineageErrorMsg.PARAM_RUN_CONTEXT_ERROR.value.format(msg) | |||
| ) | |||
| class LineageGetModelFileError(MindInsightException): | |||
| """The get model file error in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageGetModelFileError, self).__init__( | |||
| error=LineageErrors.LINEAGE_GET_MODEL_FILE_ERROR, | |||
| message=LineageErrorMsg.LINEAGE_GET_MODEL_FILE_ERROR.value.format(msg) | |||
| ) | |||
| class LineageSearchModelParamError(MindInsightException): | |||
| """The lineage search model param error.""" | |||
| def __init__(self, msg): | |||
| super(LineageSearchModelParamError, self).__init__( | |||
| error=LineageErrors.LINEAGE_PARAM_NOT_SUPPORT_ERROR, | |||
| message=LineageErrorMsg.LINEAGE_PARAM_NOT_SUPPORT_ERROR.value.format(msg) | |||
| ) | |||
| class LineageSummaryAnalyzeException(MindInsightException): | |||
| """The summary analyze error in lineage module.""" | |||
| def __init__(self, msg=None): | |||
| if msg is None: | |||
| msg = '' | |||
| super(LineageSummaryAnalyzeException, self).__init__( | |||
| error=LineageErrors.SUMMARY_ANALYZE_ERROR, | |||
| message=LineageErrorMsg.SUMMARY_ANALYZE_ERROR.value.format(msg) | |||
| ) | |||
| class LineageVerificationException(MindInsightException): | |||
| """The summary verification error in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageVerificationException, self).__init__( | |||
| error=LineageErrors.SUMMARY_VERIFICATION_ERROR, | |||
| message=LineageErrorMsg.SUMMARY_VERIFICATION_ERROR.value.format(msg) | |||
| ) | |||
| class LineageLogError(MindInsightException): | |||
| """The lineage collector error.""" | |||
| def __init__(self, msg): | |||
| super(LineageLogError, self).__init__( | |||
| error=LineageErrors.LOG_LINEAGE_INFO_ERROR, | |||
| message=LineageErrorMsg.LOG_LINEAGE_INFO_ERROR.value.format(msg) | |||
| ) | |||
| class LineageEventNotExistException(MindInsightException): | |||
| """The querier error in lineage module.""" | |||
| def __init__(self): | |||
| super(LineageEventNotExistException, self).__init__( | |||
| error=LineageErrors.EVENT_NOT_EXIST_ERROR, | |||
| message=LineageErrorMsg.EVENT_NOT_EXIST_ERROR.value | |||
| ) | |||
| class LineageQuerierParamException(MindInsightException): | |||
| """The querier error in lineage module.""" | |||
| def __init__(self, *msg): | |||
| super(LineageQuerierParamException, self).__init__( | |||
| error=LineageErrors.QUERIER_PARAM_ERROR, | |||
| message=LineageErrorMsg.QUERIER_PARAM_ERROR.value.format(*msg) | |||
| ) | |||
| class LineageSummaryParseException(MindInsightException): | |||
| """The querier error in lineage module.""" | |||
| def __init__(self): | |||
| super(LineageSummaryParseException, self).__init__( | |||
| error=LineageErrors.SUMMARY_PARSE_FAIL_ERROR, | |||
| message=LineageErrorMsg.SUMMARY_PARSE_FAIL_ERROR.value | |||
| ) | |||
| class LineageEventFieldNotExistException(MindInsightException): | |||
| """The querier error in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageEventFieldNotExistException, self).__init__( | |||
| error=LineageErrors.EVENT_FIELD_NOT_EXIST_ERROR, | |||
| message=LineageErrorMsg.EVENT_FIELD_NOT_EXIST_ERROR.value.format(msg) | |||
| ) | |||
| class LineageParamSummaryPathError(MindInsightException): | |||
| """The lineage parameter summary path error.""" | |||
| def __init__(self, msg): | |||
| super(LineageParamSummaryPathError, self).__init__( | |||
| error=LineageErrors.LINEAGE_PARAM_SUMMARY_PATH_ERROR, | |||
| message=LineageErrorMsg.LINEAGE_PARAM_SUMMARY_PATH_ERROR.value.format(msg) | |||
| ) | |||
| class LineageQuerySummaryDataError(MindInsightException): | |||
| """Query summary data error in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageQuerySummaryDataError, self).__init__( | |||
| error=LineageErrors.LINEAGE_SUMMARY_DATA_ERROR, | |||
| message=LineageErrorMsg.LINEAGE_SUMMARY_DATA_ERROR.value.format(msg) | |||
| ) | |||
| class LineageFileNotFoundError(MindInsightException): | |||
| """Summary file not found in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageFileNotFoundError, self).__init__( | |||
| error=LineageErrors.LINEAGE_FILE_NOT_FOUND_ERROR, | |||
| message=LineageErrorMsg.LINEAGE_FILE_NOT_FOUND_ERROR.value.format(msg) | |||
| ) | |||
| class LineageDirNotExistError(MindInsightException): | |||
| """Directory not exist in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageDirNotExistError, self).__init__( | |||
| error=LineageErrors.LINEAGE_DIR_NOT_EXIST_ERROR, | |||
| message=LineageErrorMsg.LINEAGE_DIR_NOT_EXIST_ERROR.value.format(msg) | |||
| ) | |||
| class LineageSearchConditionParamError(MindInsightException): | |||
| """Search condition param is invalid in lineage module.""" | |||
| def __init__(self, msg): | |||
| super(LineageSearchConditionParamError, self).__init__( | |||
| error=LineageErrors.LINEAGE_SEARCH_CONDITION_PARAM_ERROR, | |||
| message=LineageErrorMsg.LINEAGE_SEARCH_CONDITION_PARAM_ERROR.value.format(msg) | |||
| ) | |||
| @@ -0,0 +1,20 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Import mindinsight unified log module.""" | |||
| from mindinsight.utils.log import setup_logger | |||
| LOG_NAME = "lineage" | |||
| LOG_MODULE = "lineage" | |||
| logger = setup_logger(sub_module=LOG_MODULE, log_name=LOG_NAME) | |||
| @@ -0,0 +1,149 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This file provides path resolution.""" | |||
| import os | |||
| from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher | |||
| class SummaryPathParser: | |||
| """ | |||
| Summary path parser. | |||
| This class is a utility class, users can use it to parse summary dir, | |||
| parse summary log path, get the latest lineage summary log, etc. | |||
| """ | |||
| LINEAGE_SUMMARY_SUFFIX = '_lineage' | |||
| _LINEAGE_SUMMARY_SUFFIX_LEN = len(LINEAGE_SUMMARY_SUFFIX) | |||
| @staticmethod | |||
| def get_summary_dirs(summary_base_dir): | |||
| """ | |||
| Get summary dirs according to summary base dir. | |||
| Args: | |||
| summary_base_dir (str): Summary base dir. | |||
| Returns: | |||
| list[str], all summary dirs in summary base dir. The summary dir is | |||
| absolute path. | |||
| """ | |||
| summary_watcher = SummaryWatcher() | |||
| relative_dirs = summary_watcher.list_summary_directories( | |||
| summary_base_dir=summary_base_dir | |||
| ) | |||
| summary_dirs = list( | |||
| map( | |||
| lambda item: os.path.realpath( | |||
| os.path.join(summary_base_dir, item.get('relative_path')) | |||
| ), | |||
| relative_dirs | |||
| ) | |||
| ) | |||
| return summary_dirs | |||
| @staticmethod | |||
| def get_latest_lineage_summary(summary_dir): | |||
| """ | |||
| Get latest lineage summary log path according to summary dir. | |||
| Args: | |||
| summary_dir (str): Summary dir. | |||
| Returns: | |||
| Union[str, None], if the lineage summary log exist, return the path, | |||
| else return None. The lineage summary log path is absolute path. | |||
| """ | |||
| summary_watcher = SummaryWatcher() | |||
| summaries = summary_watcher.list_summaries(summary_base_dir=summary_dir) | |||
| latest_file_name = SummaryPathParser._get_latest_lineage_file(summaries) | |||
| return os.path.join(summary_dir, latest_file_name) \ | |||
| if latest_file_name is not None else None | |||
| @staticmethod | |||
| def get_latest_lineage_summaries(summary_base_dir): | |||
| """ | |||
| Get all latest lineage summary logs in summary base dir. | |||
| Args: | |||
| summary_base_dir (str): Summary base dir. | |||
| Returns: | |||
| list[str], all latest lineage summary logs in summary base dir. The | |||
| lineage summary log is absolute path. | |||
| """ | |||
| summary_watcher = SummaryWatcher() | |||
| relative_dirs = summary_watcher.list_summary_directories( | |||
| summary_base_dir=summary_base_dir | |||
| ) | |||
| latest_summaries = [] | |||
| for item in relative_dirs: | |||
| relative_dir = item.get('relative_path') | |||
| summaries = summary_watcher.list_summaries( | |||
| summary_base_dir=summary_base_dir, | |||
| relative_path=relative_dir | |||
| ) | |||
| latest_file_name = SummaryPathParser._get_latest_lineage_file( | |||
| summaries | |||
| ) | |||
| if latest_file_name is None: | |||
| continue | |||
| latest_file = os.path.realpath( | |||
| os.path.join( | |||
| summary_base_dir, | |||
| relative_dir, | |||
| latest_file_name | |||
| ) | |||
| ) | |||
| latest_summaries.append(latest_file) | |||
| return latest_summaries | |||
| @staticmethod | |||
| def _get_latest_lineage_file(summaries): | |||
| """ | |||
| Get latest lineage summary file. | |||
| If there is a file with the suffix `LINEAGE_SUMMARY_SUFFIX`, check | |||
| whether there is a file with the same name that does not include the | |||
| suffix `LINEAGE_SUMMARY_SUFFIX`. When both exist, the file is considered | |||
| to be a lineage summary log. | |||
| Args: | |||
| summaries (list[dict]): All summary logs info in summary dir. | |||
| Returns: | |||
| str, the latest lineage summary file name. | |||
| """ | |||
| try: | |||
| latest_summary = max( | |||
| summaries, | |||
| key=lambda summary: summary.get('create_time') | |||
| ) | |||
| except ValueError: | |||
| return None | |||
| max_create_time = latest_summary.get('create_time') | |||
| summary_file_names = [] | |||
| for summary in summaries: | |||
| if summary.get('create_time') == max_create_time: | |||
| summary_file_names.append(summary.get('file_name')) | |||
| latest_lineage_name = None | |||
| for name in summary_file_names: | |||
| if not name.endswith(SummaryPathParser.LINEAGE_SUMMARY_SUFFIX): | |||
| continue | |||
| ms_name = name[:-SummaryPathParser._LINEAGE_SUMMARY_SUFFIX_LEN] | |||
| if ms_name in summary_file_names: | |||
| latest_lineage_name = name | |||
| return latest_lineage_name | |||
| @@ -0,0 +1,56 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Lineage utils.""" | |||
| from functools import wraps | |||
| from mindinsight.lineagemgr.common.exceptions.exceptions import LineageParamRunContextError, \ | |||
| LineageGetModelFileError, LineageLogError | |||
| from mindinsight.utils.exceptions import MindInsightException | |||
| def enum_to_list(enum): | |||
| return [enum_ele.value for enum_ele in enum] | |||
| def try_except(logger): | |||
| """ | |||
| Catch or raise exceptions while collecting lineage. | |||
| Args: | |||
| logger (logger): The logger instance which logs the warning info. | |||
| Returns: | |||
| function, the decorator which we use to retry the decorated function. | |||
| """ | |||
| def try_except_decorate(func): | |||
| @wraps(func) | |||
| def wrapper(self, *args, **kwargs): | |||
| try: | |||
| func(self, *args, **kwargs) | |||
| except (AttributeError, MindInsightException, | |||
| LineageParamRunContextError, LineageLogError, | |||
| LineageGetModelFileError, IOError) as err: | |||
| logger.error(err) | |||
| try: | |||
| raise_except = self.raise_exception | |||
| except AttributeError: | |||
| raise_except = False | |||
| if raise_except is True: | |||
| raise | |||
| return wrapper | |||
| return try_except_decorate | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,253 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Define schema of model lineage input parameters.""" | |||
| from marshmallow import Schema, fields, ValidationError, pre_load, validates | |||
| from marshmallow.validate import Range, OneOf | |||
| from mindinsight.lineagemgr.common.exceptions.error_code import LineageErrorMsg, \ | |||
| LineageErrors | |||
| from mindinsight.lineagemgr.common.exceptions.exceptions import \ | |||
| LineageParamTypeError, LineageParamValueError | |||
| from mindinsight.lineagemgr.common.log import logger | |||
| from mindinsight.lineagemgr.common.utils import enum_to_list | |||
| from mindinsight.lineagemgr.querier.querier import LineageType | |||
| from mindinsight.lineagemgr.querier.query_model import FIELD_MAPPING | |||
| from mindinsight.utils.exceptions import MindInsightException | |||
| try: | |||
| from mindspore.dataset.engine import Dataset | |||
| from mindspore.nn import Cell, Optimizer | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.train.callback import _ListCallback | |||
| except (ImportError, ModuleNotFoundError): | |||
| logger.error('MindSpore Not Found!') | |||
| class RunContextArgs(Schema): | |||
| """Define the parameter schema for RunContext.""" | |||
| optimizer = fields.Function(allow_none=True) | |||
| loss_fn = fields.Function(allow_none=True) | |||
| net_outputs = fields.Function(allow_none=True) | |||
| train_network = fields.Function(allow_none=True) | |||
| train_dataset = fields.Function(allow_none=True) | |||
| epoch_num = fields.Int(allow_none=True, validate=Range(min=1)) | |||
| batch_num = fields.Int(allow_none=True, validate=Range(min=0)) | |||
| cur_step_num = fields.Int(allow_none=True, validate=Range(min=0)) | |||
| parallel_mode = fields.Str(allow_none=True) | |||
| device_number = fields.Int(allow_none=True, validate=Range(min=1)) | |||
| list_callback = fields.Function(allow_none=True) | |||
| @pre_load | |||
| def check_optimizer(self, data, **kwargs): | |||
| optimizer = data.get("optimizer") | |||
| if optimizer and not isinstance(optimizer, Optimizer): | |||
| raise ValidationError({'optimizer': [ | |||
| "Parameter optimizer must be an instance of mindspore.nn.optim.Optimizer." | |||
| ]}) | |||
| return data | |||
| @pre_load | |||
| def check_train_network(self, data, **kwargs): | |||
| train_network = data.get("train_network") | |||
| if train_network and not isinstance(train_network, Cell): | |||
| raise ValidationError({'train_network': [ | |||
| "Parameter train_network must be an instance of mindspore.nn.Cell."]}) | |||
| return data | |||
| @pre_load | |||
| def check_train_dataset(self, data, **kwargs): | |||
| train_dataset = data.get("train_dataset") | |||
| if train_dataset and not isinstance(train_dataset, Dataset): | |||
| raise ValidationError({'train_dataset': [ | |||
| "Parameter train_dataset must be an instance of " | |||
| "mindspore.dataengine.datasets.Dataset"]}) | |||
| return data | |||
| @pre_load | |||
| def check_loss(self, data, **kwargs): | |||
| net_outputs = data.get("net_outputs") | |||
| if net_outputs and not isinstance(net_outputs, Tensor): | |||
| raise ValidationError({'net_outpus': [ | |||
| "The parameter net_outputs is invalid. It should be a Tensor." | |||
| ]}) | |||
| return data | |||
| @pre_load | |||
| def check_list_callback(self, data, **kwargs): | |||
| list_callback = data.get("list_callback") | |||
| if list_callback and not isinstance(list_callback, _ListCallback): | |||
| raise ValidationError({'list_callback': [ | |||
| "Parameter list_callback must be an instance of " | |||
| "mindspore.train.callback._ListCallback." | |||
| ]}) | |||
| return data | |||
| class EvalParameter(Schema): | |||
| """Define the parameter schema for Evaluation job.""" | |||
| valid_dataset = fields.Function(allow_none=True) | |||
| metrics = fields.Dict(allow_none=True) | |||
| @pre_load | |||
| def check_valid_dataset(self, data, **kwargs): | |||
| valid_dataset = data.get("valid_dataset") | |||
| if valid_dataset and not isinstance(valid_dataset, Dataset): | |||
| raise ValidationError({'valid_dataset': [ | |||
| "Parameter valid_dataset must be an instance of " | |||
| "mindspore.dataengine.datasets.Dataset"]}) | |||
| return data | |||
| class SearchModelConditionParameter(Schema): | |||
| """Define the search model condition parameter schema.""" | |||
| summary_dir = fields.Dict() | |||
| loss_function = fields.Dict() | |||
| train_dataset_path = fields.Dict() | |||
| train_dataset_count = fields.Dict() | |||
| test_dataset_path = fields.Dict() | |||
| test_dataset_count = fields.Dict() | |||
| network = fields.Dict() | |||
| optimizer = fields.Dict() | |||
| learning_rate = fields.Dict() | |||
| epoch = fields.Dict() | |||
| batch_size = fields.Dict() | |||
| loss = fields.Dict() | |||
| model_size = fields.Dict() | |||
| limit = fields.Int(validate=lambda n: 0 < n <= 100) | |||
| offset = fields.Int(validate=lambda n: 0 <= n <= 100000) | |||
| sorted_name = fields.Str() | |||
| sorted_type = fields.Str(allow_none=True) | |||
| lineage_type = fields.Str( | |||
| validate=OneOf(enum_to_list(LineageType)), | |||
| allow_none=True | |||
| ) | |||
| @staticmethod | |||
| def check_dict_value_type(data, value_type): | |||
| """Check dict value type and int scope.""" | |||
| for key, value in data.items(): | |||
| if key == "in": | |||
| if not isinstance(value, (list, tuple)): | |||
| raise ValidationError("In operation's value must be list or tuple.") | |||
| else: | |||
| if not isinstance(value, value_type): | |||
| raise ValidationError("Wrong value type.") | |||
| if value_type is int: | |||
| if value < 0 or value > pow(2, 63) - 1: | |||
| raise ValidationError("Int value should <= pow(2, 63) - 1.") | |||
| if isinstance(value, bool): | |||
| raise ValidationError("Wrong value type.") | |||
| @staticmethod | |||
| def check_param_value_type(data): | |||
| """Check input param's value type.""" | |||
| for key, value in data.items(): | |||
| if key == "in": | |||
| if not isinstance(value, (list, tuple)): | |||
| raise ValidationError("In operation's value must be list or tuple.") | |||
| else: | |||
| if isinstance(value, bool) or \ | |||
| (not isinstance(value, float) and not isinstance(value, int)): | |||
| raise ValidationError("Wrong value type.") | |||
| @validates("loss") | |||
| def check_loss(self, data): | |||
| """Check loss.""" | |||
| SearchModelConditionParameter.check_param_value_type(data) | |||
| @validates("learning_rate") | |||
| def check_learning_rate(self, data): | |||
| """Check learning_rate.""" | |||
| SearchModelConditionParameter.check_param_value_type(data) | |||
| @validates("loss_function") | |||
| def check_loss_function(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("train_dataset_path") | |||
| def check_train_dataset_path(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("train_dataset_count") | |||
| def check_train_dataset_count(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, int) | |||
| @validates("test_dataset_path") | |||
| def check_test_dataset_path(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("test_dataset_count") | |||
| def check_test_dataset_count(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, int) | |||
| @validates("network") | |||
| def check_network(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("optimizer") | |||
| def check_optimizer(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("epoch") | |||
| def check_epoch(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, int) | |||
| @validates("batch_size") | |||
| def check_batch_size(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, int) | |||
| @validates("model_size") | |||
| def check_model_size(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, int) | |||
| @validates("summary_dir") | |||
| def check_summary_dir(self, data): | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @pre_load | |||
| def check_comparision(self, data, **kwargs): | |||
| """Check comparision for all parameters in schema.""" | |||
| for attr, condition in data.items(): | |||
| if attr in ["limit", "offset", "sorted_name", "sorted_type", "lineage_type"]: | |||
| continue | |||
| if not isinstance(attr, str): | |||
| raise LineageParamValueError('The search attribute not supported.') | |||
| if attr not in FIELD_MAPPING and not attr.startswith('metric_'): | |||
| raise LineageParamValueError('The search attribute not supported.') | |||
| if not isinstance(condition, dict): | |||
| raise LineageParamTypeError("The search_condition element {} should be dict." | |||
| .format(attr)) | |||
| for key in condition.keys(): | |||
| if key not in ["eq", "lt", "gt", "le", "ge", "in"]: | |||
| raise LineageParamValueError("The compare condition should be in " | |||
| "('eq', 'lt', 'gt', 'le', 'ge', 'in').") | |||
| if attr.startswith('metric_'): | |||
| if len(attr) == 7: | |||
| raise LineageParamValueError( | |||
| 'The search attribute not supported.' | |||
| ) | |||
| try: | |||
| SearchModelConditionParameter.check_param_value_type(condition) | |||
| except ValidationError: | |||
| raise MindInsightException( | |||
| error=LineageErrors.LINEAGE_PARAM_METRIC_ERROR, | |||
| message=LineageErrorMsg.LINEAGE_METRIC_ERROR.value.format(attr) | |||
| ) | |||
| return data | |||
| @@ -0,0 +1,395 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Validate the parameters.""" | |||
| import os | |||
| from marshmallow import ValidationError | |||
| from mindinsight.lineagemgr.common.exceptions.error_code import LineageErrors, LineageErrorMsg | |||
| from mindinsight.lineagemgr.common.exceptions.exceptions import LineageParamMissingError, \ | |||
| LineageParamTypeError, LineageParamValueError, LineageDirNotExistError | |||
| from mindinsight.lineagemgr.common.log import logger as log | |||
| from mindinsight.lineagemgr.common.validator.validate_path import safe_normalize_path | |||
| from mindinsight.lineagemgr.querier.query_model import FIELD_MAPPING | |||
| from mindinsight.utils.exceptions import MindInsightException | |||
| try: | |||
| from mindspore.nn import Cell | |||
| from mindspore.train.summary import SummaryRecord | |||
| except (ImportError, ModuleNotFoundError): | |||
| log.warning('MindSpore Not Found!') | |||
| TRAIN_RUN_CONTEXT_ERROR_MAPPING = { | |||
| 'optimizer': LineageErrors.PARAM_OPTIMIZER_ERROR, | |||
| 'loss_fn': LineageErrors.PARAM_LOSS_FN_ERROR, | |||
| 'net_outputs': LineageErrors.PARAM_NET_OUTPUTS_ERROR, | |||
| 'train_network': LineageErrors.PARAM_TRAIN_NETWORK_ERROR, | |||
| 'train_dataset': LineageErrors.PARAM_DATASET_ERROR, | |||
| 'epoch_num': LineageErrors.PARAM_EPOCH_NUM_ERROR, | |||
| 'batch_num': LineageErrors.PARAM_BATCH_NUM_ERROR, | |||
| 'parallel_mode': LineageErrors.PARAM_TRAIN_PARALLEL_ERROR, | |||
| 'device_number': LineageErrors.PARAM_DEVICE_NUMBER_ERROR, | |||
| 'list_callback': LineageErrors.PARAM_CALLBACK_LIST_ERROR, | |||
| 'train_dataset_size': LineageErrors.PARAM_DATASET_SIZE_ERROR, | |||
| } | |||
| SEARCH_MODEL_ERROR_MAPPING = { | |||
| 'summary_dir': LineageErrors.LINEAGE_PARAM_SUMMARY_DIR_ERROR, | |||
| 'loss_function': LineageErrors.LINEAGE_PARAM_LOSS_FUNCTION_ERROR, | |||
| 'train_dataset_path': LineageErrors.LINEAGE_PARAM_TRAIN_DATASET_PATH_ERROR, | |||
| 'train_dataset_count': LineageErrors.LINEAGE_PARAM_TRAIN_DATASET_COUNT_ERROR, | |||
| 'test_dataset_path': LineageErrors.LINEAGE_PARAM_TEST_DATASET_PATH_ERROR, | |||
| 'test_dataset_count': LineageErrors.LINEAGE_PARAM_TEST_DATASET_COUNT_ERROR, | |||
| 'network': LineageErrors.LINEAGE_PARAM_NETWORK_ERROR, | |||
| 'optimizer': LineageErrors.LINEAGE_PARAM_OPTIMIZER_ERROR, | |||
| 'learning_rate': LineageErrors.LINEAGE_PARAM_LEARNING_RATE_ERROR, | |||
| 'epoch': LineageErrors.LINEAGE_PARAM_EPOCH_ERROR, | |||
| 'batch_size': LineageErrors.LINEAGE_PARAM_BATCH_SIZE_ERROR, | |||
| 'limit': LineageErrors.PARAM_VALUE_ERROR, | |||
| 'offset': LineageErrors.PARAM_VALUE_ERROR, | |||
| 'loss': LineageErrors.LINEAGE_PARAM_LOSS_ERROR, | |||
| 'model_size': LineageErrors.LINEAGE_PARAM_MODEL_SIZE_ERROR, | |||
| 'sorted_name': LineageErrors.LINEAGE_PARAM_SORTED_NAME_ERROR, | |||
| 'sorted_type': LineageErrors.LINEAGE_PARAM_SORTED_TYPE_ERROR, | |||
| 'lineage_type': LineageErrors.LINEAGE_PARAM_LINEAGE_TYPE_ERROR | |||
| } | |||
| TRAIN_RUN_CONTEXT_ERROR_MSG_MAPPING = { | |||
| 'optimizer': LineageErrorMsg.PARAM_OPTIMIZER_ERROR.value, | |||
| 'loss_fn': LineageErrorMsg.PARAM_LOSS_FN_ERROR.value, | |||
| 'net_outputs': LineageErrorMsg.PARAM_NET_OUTPUTS_ERROR.value, | |||
| 'train_network': LineageErrorMsg.PARAM_TRAIN_NETWORK_ERROR.value, | |||
| 'epoch_num': LineageErrorMsg.PARAM_EPOCH_NUM_ERROR.value, | |||
| 'batch_num': LineageErrorMsg.PARAM_BATCH_NUM_ERROR.value, | |||
| 'parallel_mode': LineageErrorMsg.PARAM_TRAIN_PARALLEL_ERROR.value, | |||
| 'device_number': LineageErrorMsg.PARAM_DEVICE_NUMBER_ERROR.value, | |||
| 'list_callback': LineageErrorMsg.PARAM_CALLBACK_LIST_ERROR.value | |||
| } | |||
| SEARCH_MODEL_ERROR_MSG_MAPPING = { | |||
| 'summary_dir': LineageErrorMsg.LINEAGE_PARAM_SUMMARY_DIR_ERROR.value, | |||
| 'loss_function': LineageErrorMsg.LINEAGE_LOSS_FUNCTION_ERROR.value, | |||
| 'train_dataset_path': LineageErrorMsg.LINEAGE_TRAIN_DATASET_PATH_ERROR.value, | |||
| 'train_dataset_count': LineageErrorMsg.LINEAGE_TRAIN_DATASET_COUNT_ERROR.value, | |||
| 'test_dataset_path': LineageErrorMsg.LINEAGE_TEST_DATASET_PATH_ERROR.value, | |||
| 'test_dataset_count': LineageErrorMsg.LINEAGE_TEST_DATASET_COUNT_ERROR.value, | |||
| 'network': LineageErrorMsg.LINEAGE_NETWORK_ERROR.value, | |||
| 'optimizer': LineageErrorMsg.LINEAGE_OPTIMIZER_ERROR.value, | |||
| 'learning_rate': LineageErrorMsg.LINEAGE_LEARNING_RATE_ERROR.value, | |||
| 'epoch': LineageErrorMsg.PARAM_EPOCH_NUM_ERROR.value, | |||
| 'batch_size': LineageErrorMsg.PARAM_BATCH_SIZE_ERROR.value, | |||
| 'limit': LineageErrorMsg.PARAM_LIMIT_ERROR.value, | |||
| 'offset': LineageErrorMsg.PARAM_OFFSET_ERROR.value, | |||
| 'loss': LineageErrorMsg.LINEAGE_LOSS_ERROR.value, | |||
| 'model_size': LineageErrorMsg.LINEAGE_MODEL_SIZE_ERROR.value, | |||
| 'sorted_name': LineageErrorMsg.LINEAGE_PARAM_SORTED_NAME_ERROR.value, | |||
| 'sorted_type': LineageErrorMsg.LINEAGE_PARAM_SORTED_TYPE_ERROR.value, | |||
| 'lineage_type': LineageErrorMsg.LINEAGE_PARAM_LINEAGE_TYPE_ERROR.value | |||
| } | |||
| EVAL_RUN_CONTEXT_ERROR_MAPPING = { | |||
| 'valid_dataset': LineageErrors.PARAM_DATASET_ERROR, | |||
| 'metrics': LineageErrors.PARAM_EVAL_METRICS_ERROR | |||
| } | |||
| EVAL_RUN_CONTEXT_ERROR_MSG_MAPPING = { | |||
| 'metrics': LineageErrorMsg.PARAM_EVAL_METRICS_ERROR.value, | |||
| } | |||
| def validate_int_params(int_param, param_name): | |||
| """ | |||
| Verify the parameter which type is integer valid or not. | |||
| Args: | |||
| int_param (int): parameter that is integer, | |||
| including epoch, dataset_batch_size, step_num | |||
| param_name (str): the name of parameter, | |||
| including epoch, dataset_batch_size, step_num | |||
| Raises: | |||
| MindInsightException: If the parameters are invalid. | |||
| """ | |||
| if not isinstance(int_param, int) or int_param <= 0 or int_param > pow(2, 63) - 1: | |||
| if param_name == 'step_num': | |||
| log.error('Invalid step_num. The step number should be a positive integer.') | |||
| raise MindInsightException(error=LineageErrors.PARAM_STEP_NUM_ERROR, | |||
| message=LineageErrorMsg.PARAM_STEP_NUM_ERROR.value) | |||
| if param_name == 'dataset_batch_size': | |||
| log.error('Invalid dataset_batch_size. ' | |||
| 'The batch size should be a positive integer.') | |||
| raise MindInsightException(error=LineageErrors.PARAM_BATCH_SIZE_ERROR, | |||
| message=LineageErrorMsg.PARAM_BATCH_SIZE_ERROR.value) | |||
| def validate_network(network): | |||
| """ | |||
| Verify if the network is valid. | |||
| Args: | |||
| network (Cell): See mindspore.nn.Cell. | |||
| Raises: | |||
| LineageParamMissingError: If the network is None. | |||
| MindInsightException: If the network is invalid. | |||
| """ | |||
| if not network: | |||
| error_msg = "The input network for TrainLineage should not be None." | |||
| log.error(error_msg) | |||
| raise LineageParamMissingError(error_msg) | |||
| if not isinstance(network, Cell): | |||
| log.error("Invalid network. Network should be an instance" | |||
| "of mindspore.nn.Cell.") | |||
| raise MindInsightException( | |||
| error=LineageErrors.PARAM_TRAIN_NETWORK_ERROR, | |||
| message=LineageErrorMsg.PARAM_TRAIN_NETWORK_ERROR.value | |||
| ) | |||
| def validate_file_path(file_path, allow_empty=False): | |||
| """ | |||
| Verify that the file_path is valid. | |||
| Args: | |||
| file_path (str): Input file path. | |||
| allow_empty (bool): Whether file_path can be empty. | |||
| Raises: | |||
| MindInsightException: If the parameters are invalid. | |||
| """ | |||
| try: | |||
| if allow_empty and not file_path: | |||
| return | |||
| safe_normalize_path(file_path, raise_key='dataset_path', safe_prefixes=None) | |||
| except ValidationError as error: | |||
| log.error(str(error)) | |||
| raise MindInsightException(error=LineageErrors.PARAM_FILE_PATH_ERROR, | |||
| message=str(error)) | |||
| def validate_train_run_context(schema, data): | |||
| """ | |||
| Validate mindspore train run_context data according to schema. | |||
| Args: | |||
| schema (Schema): data schema. | |||
| data (dict): data to check schema. | |||
| Raises: | |||
| MindInsightException: If the parameters are invalid. | |||
| """ | |||
| errors = schema().validate(data) | |||
| for error_key, error_msg in errors.items(): | |||
| if error_key in TRAIN_RUN_CONTEXT_ERROR_MAPPING.keys(): | |||
| error_code = TRAIN_RUN_CONTEXT_ERROR_MAPPING.get(error_key) | |||
| if TRAIN_RUN_CONTEXT_ERROR_MSG_MAPPING.get(error_key): | |||
| error_msg = TRAIN_RUN_CONTEXT_ERROR_MSG_MAPPING.get(error_key) | |||
| log.error(error_msg) | |||
| raise MindInsightException(error=error_code, message=error_msg) | |||
| def validate_eval_run_context(schema, data): | |||
| """ | |||
| Validate mindspore evaluation job run_context data according to schema. | |||
| Args: | |||
| schema (Schema): data schema. | |||
| data (dict): data to check schema. | |||
| Raises: | |||
| MindInsightException: If the parameters are invalid. | |||
| """ | |||
| errors = schema().validate(data) | |||
| for error_key, error_msg in errors.items(): | |||
| if error_key in EVAL_RUN_CONTEXT_ERROR_MAPPING.keys(): | |||
| error_code = EVAL_RUN_CONTEXT_ERROR_MAPPING.get(error_key) | |||
| if EVAL_RUN_CONTEXT_ERROR_MSG_MAPPING.get(error_key): | |||
| error_msg = EVAL_RUN_CONTEXT_ERROR_MSG_MAPPING.get(error_key) | |||
| log.error(error_msg) | |||
| raise MindInsightException(error=error_code, message=error_msg) | |||
| def validate_search_model_condition(schema, data): | |||
| """ | |||
| Validate search model condition. | |||
| Args: | |||
| schema (Schema): Data schema. | |||
| data (dict): Data to check schema. | |||
| Raises: | |||
| MindInsightException: If the parameters are invalid. | |||
| """ | |||
| error = schema().validate(data) | |||
| for error_key in error.keys(): | |||
| if error_key in SEARCH_MODEL_ERROR_MAPPING.keys(): | |||
| error_code = SEARCH_MODEL_ERROR_MAPPING.get(error_key) | |||
| error_msg = SEARCH_MODEL_ERROR_MSG_MAPPING.get(error_key) | |||
| log.error(error_msg) | |||
| raise MindInsightException(error=error_code, message=error_msg) | |||
| def validate_summary_record(summary_record): | |||
| """ | |||
| Validate summary_record. | |||
| Args: | |||
| summary_record (SummaryRecord): SummaryRecord is used to record | |||
| the summary value, and summary_record is an instance of SummaryRecord, | |||
| see mindspore.train.summary.SummaryRecord | |||
| Raises: | |||
| MindInsightException: If the parameters are invalid. | |||
| """ | |||
| if not isinstance(summary_record, SummaryRecord): | |||
| log.error("Invalid summary_record. It should be an instance " | |||
| "of mindspore.train.summary.SummaryRecord.") | |||
| raise MindInsightException( | |||
| error=LineageErrors.PARAM_SUMMARY_RECORD_ERROR, | |||
| message=LineageErrorMsg.PARAM_SUMMARY_RECORD_ERROR.value | |||
| ) | |||
| def validate_raise_exception(raise_exception): | |||
| """ | |||
| Validate raise_exception. | |||
| Args: | |||
| raise_exception (bool): decide raise exception or not, | |||
| if True, raise exception; else, catch exception and continue. | |||
| Raises: | |||
| MindInsightException: If the parameters are invalid. | |||
| """ | |||
| if not isinstance(raise_exception, bool): | |||
| log.error("Invalid raise_exception. It should be True or False.") | |||
| raise MindInsightException( | |||
| error=LineageErrors.PARAM_RAISE_EXCEPTION_ERROR, | |||
| message=LineageErrorMsg.PARAM_RAISE_EXCEPTION_ERROR.value | |||
| ) | |||
| def validate_filter_key(keys): | |||
| """ | |||
| Verify the keys of filtering is valid or not. | |||
| Args: | |||
| keys (list): The keys to get the relative lineage info. | |||
| Raises: | |||
| LineageParamTypeError: If keys is not list. | |||
| LineageParamValueError: If the value of keys is invalid. | |||
| """ | |||
| filter_keys = [ | |||
| 'metric', 'hyper_parameters', 'algorithm', | |||
| 'train_dataset', 'model', 'valid_dataset', | |||
| 'dataset_graph' | |||
| ] | |||
| if not isinstance(keys, list): | |||
| log.error("Keys must be list.") | |||
| raise LineageParamTypeError("Keys must be list.") | |||
| for element in keys: | |||
| if not isinstance(element, str): | |||
| log.error("Element of keys must be str.") | |||
| raise LineageParamTypeError("Element of keys must be str.") | |||
| if not set(keys).issubset(filter_keys): | |||
| err_msg = "Keys must be in {}.".format(filter_keys) | |||
| log.error(err_msg) | |||
| raise LineageParamValueError(err_msg) | |||
| def validate_condition(search_condition): | |||
| """ | |||
| Verify the param in search_condition is valid or not. | |||
| Args: | |||
| search_condition (dict): The search condition. | |||
| Raises: | |||
| LineageParamTypeError: If the type of the param in search_condition is invalid. | |||
| LineageParamValueError: If the value of the param in search_condition is invalid. | |||
| """ | |||
| if not isinstance(search_condition, dict): | |||
| log.error("Invalid search_condition type, it should be dict.") | |||
| raise LineageParamTypeError("Invalid search_condition type, " | |||
| "it should be dict.") | |||
| if "limit" in search_condition: | |||
| if isinstance(search_condition.get("limit"), bool) \ | |||
| or not isinstance(search_condition.get("limit"), int): | |||
| log.error("The limit must be int.") | |||
| raise LineageParamTypeError("The limit must be int.") | |||
| if "offset" in search_condition: | |||
| if isinstance(search_condition.get("offset"), bool) \ | |||
| or not isinstance(search_condition.get("offset"), int): | |||
| log.error("The offset must be int.") | |||
| raise LineageParamTypeError("The offset must be int.") | |||
| if "sorted_name" in search_condition: | |||
| sorted_name = search_condition.get("sorted_name") | |||
| err_msg = "The sorted_name must be in {} or start with " \ | |||
| "`metric_`.".format(list(FIELD_MAPPING.keys())) | |||
| if not isinstance(sorted_name, str): | |||
| log.error(err_msg) | |||
| raise LineageParamValueError(err_msg) | |||
| if sorted_name not in FIELD_MAPPING and not ( | |||
| sorted_name.startswith('metric_') and len(sorted_name) > 7): | |||
| log.error(err_msg) | |||
| raise LineageParamValueError(err_msg) | |||
| sorted_type_param = ['ascending', 'descending', None] | |||
| if "sorted_type" in search_condition: | |||
| if "sorted_name" not in search_condition: | |||
| log.error("The sorted_name have to exist when sorted_type exists.") | |||
| raise LineageParamValueError("The sorted_name have to exist when sorted_type exists.") | |||
| if search_condition.get("sorted_type") not in sorted_type_param: | |||
| err_msg = "The sorted_type must be ascending or descending." | |||
| log.error(err_msg) | |||
| raise LineageParamValueError(err_msg) | |||
| def validate_path(summary_path): | |||
| """ | |||
| Verify the summary path is valid or not. | |||
| Args: | |||
| summary_path (str): The summary path which is a dir. | |||
| Raises: | |||
| LineageParamValueError: If the input param value is invalid. | |||
| LineageDirNotExistError: If the summary path is invalid. | |||
| """ | |||
| try: | |||
| summary_path = safe_normalize_path( | |||
| summary_path, "summary_path", None, check_absolute_path=True | |||
| ) | |||
| except ValidationError: | |||
| log.error("The summary path is invalid.") | |||
| raise LineageParamValueError("The summary path is invalid.") | |||
| if not os.path.isdir(summary_path): | |||
| log.error("The summary path does not exist or is not a dir.") | |||
| raise LineageDirNotExistError("The summary path does not exist or is not a dir.") | |||
| return summary_path | |||
| @@ -0,0 +1,120 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Validate the input path.""" | |||
| import os | |||
| from typing import Union, List | |||
| from marshmallow import ValidationError | |||
| def safe_normalize_path( | |||
| path, | |||
| raise_key, | |||
| safe_prefixes: Union[None, List[str]], | |||
| check_absolute_path=False, | |||
| allow_parent_dir=False, | |||
| ): | |||
| """ | |||
| Returns safe normalized path. | |||
| This func validates given path, and returns its normalized form. If | |||
| safe_prefixes is given, this func will check whether the path is safe. | |||
| Note: | |||
| This func is not compatible with windows. | |||
| Caller should check returned path to ensure safety according to | |||
| business logic. | |||
| File scheme (rfc8089) is currently not supported. | |||
| Args: | |||
| path (str): Path to be normalized. | |||
| raise_key (str): The exception raise key | |||
| safe_prefixes (list[str]): If not none, path must startswith one of the | |||
| safe_prefixes. Set this arg to [] will cause all paths considered | |||
| unsafe. Normally, prefix in this arg should end with "/". | |||
| check_absolute_path (bool): Whether check path is absolute. | |||
| allow_parent_dir (bool): Whether allow parent dir in path. | |||
| Returns: | |||
| str, normalized path. | |||
| """ | |||
| normalized_path = validate_and_normalize_path( | |||
| path, | |||
| raise_key=raise_key, | |||
| check_absolute_path=check_absolute_path, | |||
| allow_parent_dir=allow_parent_dir, | |||
| ) | |||
| if safe_prefixes is None: | |||
| return normalized_path | |||
| normalized_str = str(normalized_path) | |||
| for prefix in safe_prefixes: | |||
| if normalized_str.startswith(prefix): | |||
| return normalized_path | |||
| raise ValidationError({raise_key: {"The path is invalid!"}}) | |||
| def validate_and_normalize_path( | |||
| path, | |||
| raise_key, | |||
| check_absolute_path=False, | |||
| allow_parent_dir=False, | |||
| ): | |||
| """ | |||
| Validates path and returns its normalized form. | |||
| If path has a valid scheme, treat path as url, otherwise consider path a | |||
| unix local path. | |||
| Note: | |||
| File scheme (rfc8089) is currently not supported. | |||
| Args: | |||
| path (str): Path to be normalized. | |||
| raise_key (str): The exception raise key. | |||
| check_absolute_path (bool): Whether check path scheme is supported. | |||
| allow_parent_dir (bool): Whether allow parent dir in path. | |||
| Returns: | |||
| str, normalized path. | |||
| """ | |||
| if not path: | |||
| raise ValidationError({raise_key: {"The path is invalid!"}}) | |||
| path_str = str(path) | |||
| if not allow_parent_dir: | |||
| path_components = path_str.split("/") | |||
| if ".." in path_components: | |||
| raise ValidationError({raise_key: {"The path is invalid!"}}) | |||
| # path does not have valid schema, treat it as unix local path. | |||
| if check_absolute_path: | |||
| if not path_str.startswith("/"): | |||
| raise ValidationError({raise_key: {"The path is invalid!"}}) | |||
| try: | |||
| # most unix systems allow | |||
| normalized_path = os.path.realpath(path) | |||
| except ValueError: | |||
| raise ValidationError({raise_key: {"The path is invalid!"}}) | |||
| return normalized_path | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,446 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This file is used to define lineage info querier.""" | |||
| import enum | |||
| import functools | |||
| import operator | |||
| import os | |||
| from mindinsight.lineagemgr.common.exceptions.exceptions import \ | |||
| LineageParamTypeError, LineageSummaryAnalyzeException, \ | |||
| LineageEventNotExistException, LineageQuerierParamException, \ | |||
| LineageSummaryParseException, LineageEventFieldNotExistException | |||
| from mindinsight.lineagemgr.common.log import logger | |||
| from mindinsight.lineagemgr.querier.query_model import LineageObj, FIELD_MAPPING | |||
| from mindinsight.lineagemgr.summary.lineage_summary_analyzer import \ | |||
| LineageSummaryAnalyzer | |||
| @enum.unique | |||
| class ConditionParam(enum.Enum): | |||
| """ | |||
| Filtering and sorting field names. | |||
| `LIMIT` represents the number of lineage info per page. `OFFSET` represents | |||
| page number. `SORTED_NAME` means to sort by this field. `SORTED_TYPE` means | |||
| ascending or descending. | |||
| """ | |||
| LIMIT = 'limit' | |||
| OFFSET = 'offset' | |||
| SORTED_NAME = 'sorted_name' | |||
| SORTED_TYPE = 'sorted_type' | |||
| LINEAGE_TYPE = 'lineage_type' | |||
| @classmethod | |||
| def is_condition_type(cls, value): | |||
| """ | |||
| Judge that the input param is one of field names in the class. | |||
| Args: | |||
| value (str): The input field name. | |||
| Returns: | |||
| bool, `True` if the input field name in the class, else `False`. | |||
| """ | |||
| return value in cls._value2member_map_ | |||
| @enum.unique | |||
| class ExpressionType(enum.Enum): | |||
| """ | |||
| Filter condition name definition. | |||
| `EQ` means `==`. `LT` means `<`. `GT` means `>`. `LE` means `<=`. `GE` means | |||
| `>=`. `IN` means filter value in the specified list. | |||
| """ | |||
| EQ = 'eq' | |||
| LT = 'lt' | |||
| GT = 'gt' | |||
| LE = 'le' | |||
| GE = 'ge' | |||
| IN = 'in' | |||
| @classmethod | |||
| def is_valid_exp(cls, key): | |||
| """ | |||
| Judge that the input param is one of filter condition names in the class. | |||
| Args: | |||
| key (str): The input filter condition name. | |||
| Returns: | |||
| bool, `True` if the input filter condition name in the class, | |||
| else `False`. | |||
| """ | |||
| return key in cls._value2member_map_ | |||
| @classmethod | |||
| def is_match(cls, except_key, except_value, actual_value): | |||
| """ | |||
| Determine whether the value meets the expected requirement. | |||
| Args: | |||
| except_key (str): The expression key. | |||
| except_value (Union[str, int, float, list, tuple]): The expected | |||
| value. | |||
| actual_value (Union[str, int, float]): The actual value. | |||
| Returns: | |||
| bool, `True` if the actual value meets the expected requirement, | |||
| else `False`. | |||
| """ | |||
| if actual_value is None and except_key in [cls.LT.value, cls.GT.value, | |||
| cls.LE.value, cls.GE.value]: | |||
| return False | |||
| if except_key == cls.IN.value: | |||
| state = operator.contains(except_value, actual_value) | |||
| else: | |||
| state = getattr(operator, except_key)(actual_value, except_value) | |||
| return state | |||
| @enum.unique | |||
| class LineageFilterKey(enum.Enum): | |||
| """Summary lineage information filter key.""" | |||
| METRIC = 'metric' | |||
| HYPER_PARAM = 'hyper_parameters' | |||
| ALGORITHM = 'algorithm' | |||
| TRAIN_DATASET = 'train_dataset' | |||
| VALID_DATASET = 'valid_dataset' | |||
| MODEL = 'model' | |||
| DATASET_GRAPH = 'dataset_graph' | |||
| @classmethod | |||
| def is_valid_filter_key(cls, key): | |||
| """ | |||
| Judge that the input param is one of field names in the class. | |||
| Args: | |||
| key (str): The input field name. | |||
| Returns: | |||
| bool, `True` if the input field name in the class, else `False`. | |||
| """ | |||
| return key in cls._value2member_map_ | |||
| @classmethod | |||
| def get_key_list(cls): | |||
| """ | |||
| Get the filter key name list. | |||
| Returns: | |||
| list[str], the filter key name list. | |||
| """ | |||
| return [member.value for member in cls] | |||
| @enum.unique | |||
| class LineageType(enum.Enum): | |||
| """Lineage search type.""" | |||
| DATASET = 'dataset' | |||
| MODEL = 'model' | |||
| class Querier: | |||
| """ | |||
| The querier of model lineage information. | |||
| The class provides model lineage information query function. The information | |||
| includes hyper parameters, train dataset, algorithm, model information, | |||
| metric, valid dataset, etc. | |||
| The class also provides search and sorting capabilities about model lineage | |||
| information. You can search and sort by the specified condition. | |||
| The condition explain in `ConditionParam` and `ExpressionType` class. | |||
| See the method `filter_summary_lineage` for supported fields. | |||
| Args: | |||
| summary_path (Union[str, list[str]]): The single summary log path or | |||
| a list of summary log path. | |||
| Raises: | |||
| LineageParamTypeError: If the input parameter type is invalid. | |||
| LineageQuerierParamException: If the input parameter value is invalid. | |||
| LineageSummaryParseException: If all summary logs parsing failed. | |||
| """ | |||
| def __init__(self, summary_path): | |||
| self._lineage_objects = [] | |||
| self._index_map = {} | |||
| self._parse_failed_paths = [] | |||
| self._parse_summary_logs(summary_path) | |||
| self._size = len(self._lineage_objects) | |||
| def get_summary_lineage(self, summary_dir=None, filter_keys=None): | |||
| """ | |||
| Get summary lineage information. | |||
| If a summary dir is specified, the special summary lineage information | |||
| will be found. If the summary dir is `None`, all summary lineage | |||
| information will be found. | |||
| Returns the content corresponding to the specified field in the filter | |||
| key. The contents of the filter key include `metric`, `hyper_parameters`, | |||
| `algorithm`, `train_dataset`, `valid_dataset` and `model`. You can | |||
| specify multiple filter keys in the `filter_keys`. If the parameter is | |||
| `None`, complete information will be returned. | |||
| Args: | |||
| summary_dir (Union[str, None]): Summary log dir. Default: None. | |||
| filter_keys (Union[list[str], None]): Filter keys. Default: None. | |||
| Returns: | |||
| list[dict], summary lineage information. | |||
| """ | |||
| self._parse_fail_summary_logs() | |||
| if filter_keys is None: | |||
| filter_keys = LineageFilterKey.get_key_list() | |||
| else: | |||
| for key in filter_keys: | |||
| if not LineageFilterKey.is_valid_filter_key(key): | |||
| raise LineageQuerierParamException( | |||
| filter_keys, 'The filter key {} is invalid.'.format(key) | |||
| ) | |||
| if summary_dir is None: | |||
| result = [ | |||
| item.get_summary_info(filter_keys) for item in self._lineage_objects | |||
| ] | |||
| else: | |||
| index = self._index_map.get(summary_dir) | |||
| if index is None: | |||
| raise LineageQuerierParamException( | |||
| 'summary_dir', | |||
| 'Summary dir {} does not exist.'.format(summary_dir) | |||
| ) | |||
| lineage_obj = self._lineage_objects[index] | |||
| result = [lineage_obj.get_summary_info(filter_keys)] | |||
| return result | |||
| def filter_summary_lineage(self, condition=None): | |||
| """ | |||
| Filter and sort lineage information based on the specified condition. | |||
| See `ConditionType` and `ExpressionType` class for the rule of filtering | |||
| and sorting. The filtering and sorting fields are defined in | |||
| `FIELD_MAPPING` or prefixed with `metric_`. | |||
| If the condition is `None`, all model lineage information will be | |||
| returned. | |||
| Args: | |||
| condition (Union[dict, None]): Filter and sort condition. | |||
| Default: None. | |||
| Returns: | |||
| dict, filtered and sorted model lineage information. | |||
| """ | |||
| def _filter(lineage_obj: LineageObj): | |||
| for condition_key, condition_value in condition.items(): | |||
| if ConditionParam.is_condition_type(condition_key): | |||
| continue | |||
| if self._is_valid_field(condition_key): | |||
| raise LineageQuerierParamException( | |||
| 'condition', | |||
| 'The field {} not supported'.format(condition_key) | |||
| ) | |||
| value = lineage_obj.get_value_by_key(condition_key) | |||
| for exp_key, exp_value in condition_value.items(): | |||
| if not ExpressionType.is_valid_exp(exp_key): | |||
| raise LineageQuerierParamException( | |||
| 'condition', | |||
| 'The expression {} not supported.'.format(exp_key) | |||
| ) | |||
| if not ExpressionType.is_match(exp_key, exp_value, value): | |||
| return False | |||
| return True | |||
| def _cmp(obj1: LineageObj, obj2: LineageObj): | |||
| value1 = obj1.get_value_by_key(sorted_name) | |||
| value2 = obj2.get_value_by_key(sorted_name) | |||
| if value1 is None and value2 is None: | |||
| cmp_result = 0 | |||
| elif value1 is None: | |||
| cmp_result = -1 | |||
| elif value2 is None: | |||
| cmp_result = 1 | |||
| else: | |||
| cmp_result = (value1 > value2) - (value1 < value2) | |||
| return cmp_result | |||
| self._parse_fail_summary_logs() | |||
| if condition is None: | |||
| condition = {} | |||
| result = list(filter(_filter, self._lineage_objects)) | |||
| if ConditionParam.SORTED_NAME.value in condition: | |||
| sorted_name = condition.get(ConditionParam.SORTED_NAME.value) | |||
| if self._is_valid_field(sorted_name): | |||
| raise LineageQuerierParamException( | |||
| 'condition', | |||
| 'The sorted name {} not supported.'.format(sorted_name) | |||
| ) | |||
| sorted_type = condition.get(ConditionParam.SORTED_TYPE.value) | |||
| reverse = sorted_type == 'descending' | |||
| result = sorted( | |||
| result, key=functools.cmp_to_key(_cmp), reverse=reverse | |||
| ) | |||
| offset_result = self._handle_limit_and_offset(condition, result) | |||
| search_type = condition.get(ConditionParam.LINEAGE_TYPE.value) | |||
| lineage_info = { | |||
| 'object': [ | |||
| item.to_dataset_lineage_dict() if search_type == LineageType.DATASET.value | |||
| else item.to_filtration_dict() for item in offset_result | |||
| ], | |||
| 'count': len(result) | |||
| } | |||
| return lineage_info | |||
| def _is_valid_field(self, field_name): | |||
| """ | |||
| Check if field name is valid. | |||
| Args: | |||
| field_name (str): Field name. | |||
| Returns: | |||
| bool, `True` if the field name is valid, else `False`. | |||
| """ | |||
| return field_name not in FIELD_MAPPING and not field_name.startswith('metric_') | |||
| def _handle_limit_and_offset(self, condition, result): | |||
| """ | |||
| Handling the condition of `limit` and `offset`. | |||
| Args: | |||
| condition (dict): Filter and sort condition. | |||
| result (list[LineageObj]): Filtered and sorted result. | |||
| Returns: | |||
| list[LineageObj], paginated result. | |||
| """ | |||
| offset = 0 | |||
| limit = 10 | |||
| if ConditionParam.OFFSET.value in condition: | |||
| offset = condition.get(ConditionParam.OFFSET.value) | |||
| if ConditionParam.LIMIT.value in condition: | |||
| limit = condition.get(ConditionParam.LIMIT.value) | |||
| if ConditionParam.OFFSET.value not in condition \ | |||
| and ConditionParam.LIMIT.value not in condition: | |||
| offset_result = result | |||
| else: | |||
| offset_result = result[offset * limit: limit * (offset + 1)] | |||
| return offset_result | |||
| def _parse_summary_logs(self, summary_path): | |||
| """ | |||
| Parse summary logs. | |||
| Args: | |||
| summary_path (Union[str, list[str]]): The single summary log path or | |||
| a list of summary log path. | |||
| """ | |||
| if not summary_path: | |||
| raise LineageQuerierParamException( | |||
| 'summary_path', 'The summary path is empty.' | |||
| ) | |||
| if isinstance(summary_path, str): | |||
| self._parse_summary_log(summary_path, 0) | |||
| elif isinstance(summary_path, list): | |||
| index = 0 | |||
| for path in summary_path: | |||
| parse_result = self._parse_summary_log(path, index) | |||
| if parse_result: | |||
| index += 1 | |||
| else: | |||
| raise LineageParamTypeError('Summary path is not str or list.') | |||
| if self._parse_failed_paths: | |||
| logger.info('Parse failed paths: %s', str(self._parse_failed_paths)) | |||
| if not self._lineage_objects: | |||
| raise LineageSummaryParseException() | |||
| def _parse_summary_log(self, log_path, index: int, is_save_fail_path=True): | |||
| """ | |||
| Parse the single summary log. | |||
| Args: | |||
| log_path (str): The single summary log path. | |||
| index (int): TrainInfo instance index in the train info list. | |||
| is_save_fail_path (bool): Set whether to save the failed summary | |||
| path. Default: True. | |||
| Returns: | |||
| bool, `True` if parse summary log success, else `False`. | |||
| """ | |||
| log_dir = os.path.dirname(log_path) | |||
| try: | |||
| lineage_info = LineageSummaryAnalyzer.get_summary_infos(log_path) | |||
| lineage_obj = LineageObj( | |||
| log_dir, | |||
| train_lineage=lineage_info.train_lineage, | |||
| evaluation_lineage=lineage_info.eval_lineage, | |||
| dataset_graph=lineage_info.dataset_graph | |||
| ) | |||
| self._lineage_objects.append(lineage_obj) | |||
| self._add_dataset_mark() | |||
| self._index_map[log_dir] = index | |||
| return True | |||
| except (LineageSummaryAnalyzeException, | |||
| LineageEventNotExistException, | |||
| LineageEventFieldNotExistException): | |||
| if is_save_fail_path: | |||
| self._parse_failed_paths.append(log_path) | |||
| return False | |||
| def _parse_fail_summary_logs(self): | |||
| """Parse fail summary logs.""" | |||
| if self._parse_failed_paths: | |||
| failed_paths = [] | |||
| for path in self._parse_failed_paths: | |||
| parse_result = self._parse_summary_log(path, self._size, False) | |||
| if parse_result: | |||
| self._size += 1 | |||
| else: | |||
| failed_paths.append(path) | |||
| self._parse_failed_paths = failed_paths | |||
| def _add_dataset_mark(self): | |||
| """Add dataset mark into LineageObj.""" | |||
| # give a dataset mark for each dataset graph in lineage information | |||
| marked_dataset_group = {'1': None} | |||
| for lineage in self._lineage_objects: | |||
| dataset_mark = '0' | |||
| for dataset_graph_mark, marked_dataset_graph in marked_dataset_group.items(): | |||
| if marked_dataset_graph == lineage.dataset_graph: | |||
| dataset_mark = dataset_graph_mark | |||
| break | |||
| # if no matched, add the new dataset graph into group | |||
| if dataset_mark == '0': | |||
| dataset_mark = str(int(max(marked_dataset_group.keys())) + 1) | |||
| marked_dataset_group.update({ | |||
| dataset_mark: | |||
| lineage.dataset_graph | |||
| }) | |||
| lineage.dataset_mark = dataset_mark | |||
| @@ -0,0 +1,344 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This file is used to define lineage info model.""" | |||
| import json | |||
| from collections import namedtuple | |||
| from google.protobuf.json_format import MessageToDict | |||
| from mindinsight.lineagemgr.common.exceptions.exceptions import \ | |||
| LineageEventFieldNotExistException, LineageEventNotExistException | |||
| from mindinsight.lineagemgr.summary._summary_adapter import organize_graph | |||
| Field = namedtuple('Field', ['base_name', 'sub_name']) | |||
| FIELD_MAPPING = { | |||
| "summary_dir": Field('summary_dir', None), | |||
| "loss_function": Field("hyper_parameters", 'loss_function'), | |||
| "train_dataset_path": Field('train_dataset', 'train_dataset_path'), | |||
| "train_dataset_count": Field("train_dataset", 'train_dataset_size'), | |||
| "test_dataset_path": Field('valid_dataset', 'valid_dataset_path'), | |||
| "test_dataset_count": Field('valid_dataset', 'valid_dataset_size'), | |||
| "network": Field('algorithm', 'network'), | |||
| "optimizer": Field('hyper_parameters', 'optimizer'), | |||
| "learning_rate": Field('hyper_parameters', 'learning_rate'), | |||
| "epoch": Field('hyper_parameters', 'epoch'), | |||
| "batch_size": Field('hyper_parameters', 'batch_size'), | |||
| "loss": Field('algorithm', 'loss'), | |||
| "model_size": Field('model', 'size'), | |||
| "dataset_mark": Field('dataset_mark', None), | |||
| } | |||
| class LineageObj: | |||
| """ | |||
| Lineage information class. | |||
| An instance of the class hold lineage information for a training session. | |||
| Args: | |||
| summary_dir (str): Summary log dir. | |||
| kwargs (dict): Params to init the instance. | |||
| - train_lineage (Event): Train lineage object. | |||
| - evaluation_lineage (Event): Evaluation lineage object. | |||
| - dataset_graph (Event): Dataset graph object. | |||
| Raises: | |||
| LineageEventNotExistException: If train and evaluation event not exist. | |||
| LineageEventFieldNotExistException: If the special event field not exist. | |||
| """ | |||
| _name_train_lineage = 'train_lineage' | |||
| _name_evaluation_lineage = 'evaluation_lineage' | |||
| _name_summary_dir = 'summary_dir' | |||
| _name_metric = 'metric' | |||
| _name_hyper_parameters = 'hyper_parameters' | |||
| _name_algorithm = 'algorithm' | |||
| _name_train_dataset = 'train_dataset' | |||
| _name_model = 'model' | |||
| _name_valid_dataset = 'valid_dataset' | |||
| _name_dataset_graph = 'dataset_graph' | |||
| _name_dataset_mark = 'dataset_mark' | |||
| def __init__(self, summary_dir, **kwargs): | |||
| self._lineage_info = { | |||
| self._name_summary_dir: summary_dir | |||
| } | |||
| train_lineage = kwargs.get('train_lineage') | |||
| evaluation_lineage = kwargs.get('evaluation_lineage') | |||
| dataset_graph = kwargs.get('dataset_graph') | |||
| if not any([train_lineage, evaluation_lineage, dataset_graph]): | |||
| raise LineageEventNotExistException() | |||
| self._parse_train_lineage(train_lineage) | |||
| self._parse_evaluation_lineage(evaluation_lineage) | |||
| self._parse_dataset_graph(dataset_graph) | |||
| self._filtration_result = self._organize_filtration_result() | |||
| @property | |||
| def summary_dir(self): | |||
| """ | |||
| Get summary log dir. | |||
| Returns: | |||
| str, the summary log dir. | |||
| """ | |||
| return self._lineage_info.get(self._name_summary_dir) | |||
| @property | |||
| def metric(self): | |||
| """ | |||
| Get metric information. | |||
| Returns: | |||
| dict, the metric information. | |||
| """ | |||
| return self._lineage_info.get(self._name_metric) | |||
| @property | |||
| def hyper_parameters(self): | |||
| """ | |||
| Get hyperparameters. | |||
| Returns: | |||
| dict, the hyperparameters. | |||
| """ | |||
| return self._lineage_info.get(self._name_hyper_parameters) | |||
| @property | |||
| def algorithm(self): | |||
| """ | |||
| Get algorithm. | |||
| Returns: | |||
| dict, the algorithm. | |||
| """ | |||
| return self._lineage_info.get(self._name_algorithm) | |||
| @property | |||
| def train_dataset(self): | |||
| """ | |||
| Get train dataset information. | |||
| Returns: | |||
| dict, the train dataset information. | |||
| """ | |||
| return self._lineage_info.get(self._name_train_dataset) | |||
| @property | |||
| def model(self): | |||
| """ | |||
| Get model information. | |||
| Returns: | |||
| dict, the model information. | |||
| """ | |||
| return self._lineage_info.get(self._name_model) | |||
| @property | |||
| def valid_dataset(self): | |||
| """ | |||
| Get valid dataset information. | |||
| Returns: | |||
| dict, the valid dataset information. | |||
| """ | |||
| return self._lineage_info.get(self._name_valid_dataset) | |||
| @property | |||
| def dataset_graph(self): | |||
| """ | |||
| Get dataset_graph. | |||
| Returns: | |||
| dict, the dataset graph information. | |||
| """ | |||
| return self._lineage_info.get(self._name_dataset_graph) | |||
| @property | |||
| def dataset_mark(self): | |||
| """ | |||
| Get dataset_mark. | |||
| Returns: | |||
| dict, the dataset mark information. | |||
| """ | |||
| return self._lineage_info.get(self._name_dataset_mark) | |||
| @dataset_mark.setter | |||
| def dataset_mark(self, dataset_mark): | |||
| """ | |||
| Set dataset mark. | |||
| Args: | |||
| dataset_mark (int): Dataset mark. | |||
| """ | |||
| self._lineage_info[self._name_dataset_mark] = dataset_mark | |||
| # update dataset_mark into filtration result | |||
| self._filtration_result[self._name_dataset_mark] = dataset_mark | |||
| def get_summary_info(self, filter_keys: list): | |||
| """ | |||
| Get the summary lineage information. | |||
| Returns the content corresponding to the specified field in the filter | |||
| key. The contents of the filter key include `metric`, `hyper_parameters`, | |||
| `algorithm`, `train_dataset`, `valid_dataset` and `model`. You can | |||
| specify multiple filter keys in the `filter_keys` | |||
| Args: | |||
| filter_keys (list): Filter keys. | |||
| Returns: | |||
| dict, the summary lineage information. | |||
| """ | |||
| result = { | |||
| self._name_summary_dir: self.summary_dir, | |||
| } | |||
| for key in filter_keys: | |||
| result[key] = getattr(self, key) | |||
| return result | |||
| def to_filtration_dict(self): | |||
| """ | |||
| Returns the lineage information required by filtering interface. | |||
| Returns: | |||
| dict, the lineage information required by filtering interface. | |||
| """ | |||
| return self._filtration_result | |||
| def to_dataset_lineage_dict(self): | |||
| """ | |||
| Returns the dataset part lineage information. | |||
| Returns: | |||
| dict, the dataset lineage information. | |||
| """ | |||
| dataset_lineage = { | |||
| key: self._filtration_result.get(key) | |||
| for key in [self._name_summary_dir, self._name_dataset_graph] | |||
| } | |||
| return dataset_lineage | |||
| def get_value_by_key(self, key): | |||
| """ | |||
| Get the value based on the key in `FIELD_MAPPING` or the key prefixed with `metric_`. | |||
| Args: | |||
| key (str): The key in `FIELD_MAPPING` or prefixed with `metric_`. | |||
| Returns: | |||
| object, the value. | |||
| """ | |||
| if key.startswith('metric_'): | |||
| metric_key = key.split('_', 1)[1] | |||
| metric = self._filtration_result.get(self._name_metric) | |||
| if metric: | |||
| return metric.get(metric_key) | |||
| return self._filtration_result.get(key) | |||
| def _organize_filtration_result(self): | |||
| """ | |||
| Organize filtration result. | |||
| Returns: | |||
| dict, the filtration result. | |||
| """ | |||
| result = {} | |||
| for key, field in FIELD_MAPPING.items(): | |||
| if field.base_name is not None: | |||
| base_attr = getattr(self, field.base_name) | |||
| result[key] = base_attr.get(field.sub_name) \ | |||
| if field.sub_name else base_attr | |||
| # add metric into filtration result | |||
| result[self._name_metric] = self.metric | |||
| # add dataset_graph into filtration result | |||
| result[self._name_dataset_graph] = getattr(self, self._name_dataset_graph) | |||
| return result | |||
| def _parse_train_lineage(self, train_lineage): | |||
| """ | |||
| Parse train lineage. | |||
| Args: | |||
| train_lineage (Event): Train lineage. | |||
| """ | |||
| if train_lineage is None: | |||
| self._lineage_info[self._name_model] = {} | |||
| self._lineage_info[self._name_algorithm] = {} | |||
| self._lineage_info[self._name_hyper_parameters] = {} | |||
| self._lineage_info[self._name_train_dataset] = {} | |||
| return | |||
| event_dict = MessageToDict( | |||
| train_lineage, preserving_proto_field_name=True | |||
| ) | |||
| train_dict = event_dict.get(self._name_train_lineage) | |||
| if train_dict is None: | |||
| raise LineageEventFieldNotExistException( | |||
| self._name_train_lineage | |||
| ) | |||
| # when MessageToDict is converted to dict, int64 type is converted | |||
| # to string, so we convert it to an int in python | |||
| if train_dict.get(self._name_model): | |||
| model_size = train_dict.get(self._name_model).get('size') | |||
| if model_size: | |||
| train_dict[self._name_model]['size'] = int(model_size) | |||
| self._lineage_info.update(**train_dict) | |||
| def _parse_evaluation_lineage(self, evaluation_lineage): | |||
| """ | |||
| Parse evaluation lineage. | |||
| Args: | |||
| evaluation_lineage (Event): Evaluation lineage. | |||
| """ | |||
| if evaluation_lineage is None: | |||
| self._lineage_info[self._name_metric] = {} | |||
| self._lineage_info[self._name_valid_dataset] = {} | |||
| return | |||
| event_dict = MessageToDict( | |||
| evaluation_lineage, preserving_proto_field_name=True | |||
| ) | |||
| evaluation_dict = event_dict.get(self._name_evaluation_lineage) | |||
| if evaluation_dict is None: | |||
| raise LineageEventFieldNotExistException( | |||
| self._name_evaluation_lineage | |||
| ) | |||
| self._lineage_info.update(**evaluation_dict) | |||
| metric = self._lineage_info.get(self._name_metric) | |||
| self._lineage_info[self._name_metric] = json.loads(metric) if metric else {} | |||
| def _parse_dataset_graph(self, dataset_graph): | |||
| """ | |||
| Parse dataset graph. | |||
| Args: | |||
| dataset_graph (Event): Dataset graph. | |||
| """ | |||
| if dataset_graph is None: | |||
| self._lineage_info[self._name_dataset_graph] = {} | |||
| else: | |||
| # convert message to dict | |||
| event_dict = organize_graph(dataset_graph.dataset_graph) | |||
| if event_dict is None: | |||
| raise LineageEventFieldNotExistException(self._name_evaluation_lineage) | |||
| self._lineage_info[self._name_dataset_graph] = event_dict if event_dict else {} | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,293 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """The converter between proto format event of lineage and dict.""" | |||
| import time | |||
| from mindinsight.datavisual.proto_files.mindinsight_summary_pb2 import Event | |||
| from mindinsight.lineagemgr.common.exceptions.exceptions import LineageParamTypeError | |||
| from mindinsight.lineagemgr.common.log import logger as log | |||
| def package_dataset_graph(graph): | |||
| """ | |||
| Package dataset graph. | |||
| Args: | |||
| graph (dict): Dataset graph. | |||
| Returns: | |||
| Event, the proto message event contains dataset graph. | |||
| """ | |||
| dataset_graph_event = Event() | |||
| dataset_graph_event.wall_time = time.time() | |||
| dataset_graph = dataset_graph_event.dataset_graph | |||
| if "children" in graph: | |||
| children = graph.pop("children") | |||
| if children: | |||
| _package_children(children=children, message=dataset_graph) | |||
| _package_current_dataset(operation=graph, message=dataset_graph) | |||
| return dataset_graph_event | |||
| def _package_children(children, message): | |||
| """ | |||
| Package children in dataset operation. | |||
| Args: | |||
| children (list[dict]): Child operations. | |||
| message (DatasetGraph): Children proto message. | |||
| """ | |||
| for child in children: | |||
| if child: | |||
| child_graph_message = getattr(message, "children").add() | |||
| grandson = child.pop("children") | |||
| if grandson: | |||
| _package_children(children=grandson, message=child_graph_message) | |||
| # package other parameters | |||
| _package_current_dataset(operation=child, message=child_graph_message) | |||
| def _package_current_dataset(operation, message): | |||
| """ | |||
| Package operation parameters in event message. | |||
| Args: | |||
| operation (dict): Operation dict. | |||
| message (Operation): Operation proto message. | |||
| """ | |||
| for key, value in operation.items(): | |||
| if key == "operations": | |||
| for operator in value: | |||
| _package_enhancement_operation( | |||
| operator, | |||
| message.operations.add() | |||
| ) | |||
| elif key == "sampler": | |||
| _package_enhancement_operation( | |||
| value, | |||
| message.sampler | |||
| ) | |||
| else: | |||
| _package_parameter(key, value, message.parameter) | |||
| def _package_enhancement_operation(operation, message): | |||
| """ | |||
| Package enhancement operation in MapDataset. | |||
| Args: | |||
| operation (dict): Enhancement operation. | |||
| message (Operation): Enhancement operation proto message. | |||
| """ | |||
| for key, value in operation.items(): | |||
| if isinstance(value, list): | |||
| if all(isinstance(ele, int) for ele in value): | |||
| message.size.extend(value) | |||
| else: | |||
| message.weights.extend(value) | |||
| else: | |||
| _package_parameter(key, value, message.operationParam) | |||
| def _package_parameter(key, value, message): | |||
| """ | |||
| Package parameters in operation. | |||
| Args: | |||
| key (str): Operation name. | |||
| value (Union[str, bool, int, float, list, None]): Operation args. | |||
| message (OperationParameter): Operation proto message. | |||
| """ | |||
| if isinstance(value, str): | |||
| message.mapStr[key] = value | |||
| elif isinstance(value, bool): | |||
| message.mapBool[key] = value | |||
| elif isinstance(value, int): | |||
| message.mapInt[key] = value | |||
| elif isinstance(value, float): | |||
| message.mapDouble[key] = value | |||
| elif isinstance(value, list) and key != "operations": | |||
| if value: | |||
| replace_value_list = list(map(lambda x: "" if x is None else x, value)) | |||
| message.mapStrList[key].strValue.extend(replace_value_list) | |||
| elif value is None: | |||
| message.mapStr[key] = "None" | |||
| else: | |||
| error_msg = "Parameter {} is not supported " \ | |||
| "in event package.".format(key) | |||
| log.error(error_msg) | |||
| raise LineageParamTypeError(error_msg) | |||
| def organize_graph(graph_message): | |||
| """ | |||
| Convert a dataset graph to its dict format. | |||
| Args: | |||
| graph_message (DatasetGraph): Graph event message. | |||
| Returns: | |||
| dict, dataset graph. | |||
| """ | |||
| result = {} | |||
| # update current dataset graph dict | |||
| result.update(_organize_current_dataset( | |||
| parameter=getattr(graph_message, 'parameter'), | |||
| operations=getattr(graph_message, 'operations'), | |||
| sampler=getattr(graph_message, 'sampler') | |||
| )) | |||
| # update children dataset graph dict | |||
| result.update( | |||
| _organize_children(getattr(graph_message, 'children')) | |||
| ) | |||
| return result | |||
| def _organize_children(children_message): | |||
| """ | |||
| Convert children message to its dict format. | |||
| Args: | |||
| children_message (list[DatasetGraph]): Children message. | |||
| Returns: | |||
| dict, children dict of dataset graph. | |||
| """ | |||
| children_list = [] | |||
| children_dict = {'children': children_list} | |||
| if children_message: | |||
| for child_event in children_message: | |||
| child_dict = {} | |||
| # update current dataset to child | |||
| child_dict.update( | |||
| _organize_current_dataset( | |||
| parameter=getattr(child_event, 'parameter'), | |||
| operations=getattr(child_event, 'operations'), | |||
| sampler=getattr(child_event, 'sampler') | |||
| ) | |||
| ) | |||
| # update child's children | |||
| child_dict.update( | |||
| _organize_children(getattr(child_event, 'children')) | |||
| ) | |||
| children_list.append(child_dict) | |||
| children_dict['children'] = children_list | |||
| return children_dict | |||
| def _organize_current_dataset(parameter, operations, sampler): | |||
| """ | |||
| Convert current dataset message to its dict format. | |||
| Note: | |||
| Current dataset message include parameter, operations, | |||
| sampler message of dataset graph event. | |||
| Args: | |||
| parameter (OperationParameter): Parameter message. | |||
| operations (Operation): Operations message. | |||
| sampler (Operation): Sampler message. | |||
| Returns: | |||
| dict, current dataset. | |||
| """ | |||
| current_dataset = {} | |||
| if parameter: | |||
| current_dataset.update( | |||
| _organize_parameter(parameter) | |||
| ) | |||
| if operations: | |||
| operation_list = [] | |||
| for operation in operations: | |||
| operation_list.append( | |||
| _organize_operation(operation) | |||
| ) | |||
| current_dataset.update( | |||
| {'operations': operation_list} | |||
| ) | |||
| if sampler: | |||
| if _organize_operation(sampler): | |||
| current_dataset.update({ | |||
| 'sampler': | |||
| _organize_operation(sampler) | |||
| }) | |||
| return current_dataset | |||
| def _organize_operation(operation): | |||
| """ | |||
| Convert operation message to its dict format. | |||
| Args: | |||
| operation (Operation): Operation message. | |||
| Returns: | |||
| dict, operation. | |||
| """ | |||
| operation_dict = {} | |||
| operation_dict.update(_organize_parameter(getattr(operation, 'operationParam'))) | |||
| tmp_list = [] | |||
| repeated_keys = ['size', 'weights'] | |||
| for key in repeated_keys: | |||
| for str_ele in getattr(operation, key): | |||
| tmp_list.append(str_ele) | |||
| dict() | |||
| if tmp_list: | |||
| operation_dict.update({key: tmp_list}) | |||
| return operation_dict | |||
| def _organize_parameter(parameter): | |||
| """ | |||
| Convert operation parameter message to its dict format. | |||
| Args: | |||
| parameter (OperationParameter): Operation parameter message. | |||
| Returns: | |||
| dict, operation parameter. | |||
| """ | |||
| parameter_result = dict() | |||
| parameter_keys = [ | |||
| 'mapStr', | |||
| 'mapBool', | |||
| 'mapInt', | |||
| 'mapDouble', | |||
| ] | |||
| for parameter_key in parameter_keys: | |||
| base_attr = getattr(parameter, parameter_key) | |||
| parameter_value = dict(base_attr) | |||
| # convert str 'None' to None | |||
| for key, value in parameter_value.items(): | |||
| if value == 'None': | |||
| parameter_value[key] = None | |||
| parameter_result.update(parameter_value) | |||
| # drop `mapStrList` and `strValue` keys in result parameter | |||
| str_list_para = dict(getattr(parameter, 'mapStrList')) | |||
| result_str_list_para = dict() | |||
| for key, value in str_list_para.items(): | |||
| str_list_para_list = list() | |||
| for str_ele in getattr(value, 'strValue'): | |||
| str_list_para_list.append(str_ele) | |||
| str_list_para_list = list(map(lambda x: None if x == '' else x, str_list_para_list)) | |||
| result_str_list_para[key] = str_list_para_list | |||
| parameter_result.update(result_str_list_para) | |||
| return parameter_result | |||
| @@ -0,0 +1,95 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Event writer to record lineage message to summary log.""" | |||
| import os | |||
| import stat | |||
| import struct | |||
| from mindinsight.datavisual.utils import crc32 | |||
| KMASKDELTA = 0xa282ead8 | |||
| class EventWriter: | |||
| """ | |||
| Lineage summary record. | |||
| Recording train lineage and evaluation lineage to summary log. | |||
| Args: | |||
| file_path (str): Summary log path. | |||
| override (bool): If override the summary log exist. | |||
| Raises: | |||
| IOError: Write to summary log failed or file_path is a dir. | |||
| Examples: | |||
| >>> content = b'\x01\x02\x03\x04' | |||
| >>> event_writer = EventWriter("./test.log", True) | |||
| >>> event_writer.write_event_to_file(content) | |||
| """ | |||
| def __init__(self, file_path, override=False): | |||
| """ | |||
| Init EventWriter, get the type of writing. | |||
| Args: | |||
| file_path (str): The file path to writing. | |||
| override (bool): The type of writing. | |||
| """ | |||
| if os.path.exists(file_path): | |||
| if not os.path.isfile(file_path): | |||
| raise IOError("The file_path is not a normal file.") | |||
| self.file_path = file_path | |||
| if override: | |||
| self.write_type = 'wb' | |||
| else: | |||
| self.write_type = 'ab' | |||
| def write_event_to_file(self, content): | |||
| """ | |||
| Write event to file. | |||
| Args: | |||
| content (bytes): Content to write. | |||
| """ | |||
| length = struct.pack("<Q", len(content)) | |||
| header_crc = EventWriter.get_crc(length) | |||
| crc = EventWriter.get_crc(content) | |||
| content = length + header_crc + content + crc | |||
| try: | |||
| with open(self.file_path, self.write_type) as log_file: | |||
| os.chmod(self.file_path, stat.S_IRUSR | stat.S_IWUSR) | |||
| log_file.write(content) | |||
| except IOError: | |||
| raise IOError("There are some error when writing summary log.") | |||
| @staticmethod | |||
| def get_crc(content): | |||
| """ | |||
| Calculate crc value of the content. | |||
| Args: | |||
| content (bytes): Content to be Calculated. | |||
| Returns: | |||
| bytes, crc of content, 4 bytes. | |||
| """ | |||
| mask = (1 << 32) - 1 | |||
| crc_value = crc32.MakeCrc32c(0, content, len(content)) | |||
| crc_value = ((crc_value >> 15) | (crc_value << 17)) & mask | |||
| crc_value = (crc_value + KMASKDELTA) & mask | |||
| return struct.pack("<L", crc_value) | |||
| @@ -0,0 +1,95 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """File handler for lineage summary log.""" | |||
| import os | |||
| class FileHandler: | |||
| """ | |||
| Summary log file handler. | |||
| Summary log file handler provides Python APIs to manage file IO, including | |||
| read, seek. It is not suitable for very large files. | |||
| Args: | |||
| file_path (str): File path. | |||
| """ | |||
| def __init__(self, file_path): | |||
| self._size = os.path.getsize(file_path) | |||
| self._cache = self._read_cache(file_path) | |||
| self._offset = 0 | |||
| @property | |||
| def size(self): | |||
| """ | |||
| The size of file. | |||
| Returns: | |||
| int, the size of file. | |||
| """ | |||
| return self._size | |||
| def _read_cache(self, file_path): | |||
| """ | |||
| Read file in cache. | |||
| Args: | |||
| file_path (str): File path. | |||
| Returns: | |||
| bytes, the file content. | |||
| """ | |||
| with open(file_path, 'rb') as log_file: | |||
| return log_file.read() | |||
| def seek(self, offset): | |||
| """ | |||
| Set the new offset of file. | |||
| Args: | |||
| pos (int): The new offset. | |||
| """ | |||
| self._offset = offset | |||
| def tell(self): | |||
| """ | |||
| Tell the current offset. | |||
| Returns: | |||
| int, the offset. | |||
| """ | |||
| return self._offset | |||
| def read(self, size=-1, offset=None): | |||
| """ | |||
| Read bytes from buffer by size. | |||
| Args: | |||
| size (int): Number of bytes to read. If set -1, read the whole file. | |||
| Default: -1. | |||
| offset (int): The start offset to read bytes from. Default: None. | |||
| Returns: | |||
| bytes, the content. | |||
| """ | |||
| if offset is None: | |||
| offset = self._offset | |||
| new_offset = offset + size | |||
| result = self._cache[offset:new_offset] | |||
| self._offset = new_offset | |||
| return result | |||
| @@ -0,0 +1,209 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This module provides python APIs to get lineage summary from summary log.""" | |||
| import struct | |||
| from collections import namedtuple | |||
| from enum import Enum | |||
| from mindinsight.datavisual.proto_files.mindinsight_summary_pb2 import Event | |||
| from mindinsight.datavisual.utils import crc32 | |||
| from mindinsight.lineagemgr.common.exceptions.exceptions import MindInsightException, \ | |||
| LineageVerificationException, LineageSummaryAnalyzeException | |||
| from mindinsight.lineagemgr.common.log import logger as log | |||
| from mindinsight.lineagemgr.common.validator.validate_path import safe_normalize_path | |||
| from mindinsight.lineagemgr.summary.file_handler import FileHandler | |||
| LineageInfo = namedtuple('LineageInfo', ['train_lineage', 'eval_lineage', 'dataset_graph']) | |||
| class SummaryTag(Enum): | |||
| """The tag value of lineage fields.""" | |||
| # the value is `field_number << 3 | wire_type` | |||
| WALL_TIME = 'wall_time' | |||
| STEP = 'step' | |||
| VERSION = 'version' | |||
| GRAPH = 'graph' | |||
| SUMMARY = 'summary' | |||
| TRAIN_LINEAGE = 'train_lineage' | |||
| EVAL_LINEAGE = 'evaluation_lineage' | |||
| DATASET_GRAPH = 'dataset_graph' | |||
| class SummaryAnalyzer: | |||
| """ | |||
| Summary log Analyzer. | |||
| Args: | |||
| file_path (str): The path of summary log. | |||
| Raises: | |||
| LineageVerificationException: Raise when verification failed. | |||
| """ | |||
| HEADER_SIZE = 8 | |||
| HEADER_CRC_SIZE = 4 | |||
| BODY_CRC_SIZE = 4 | |||
| def __init__(self, file_path): | |||
| self.file_handler = FileHandler(file_path) | |||
| def load_events(self): | |||
| """ | |||
| Load events in summary log. | |||
| Returns: | |||
| generator, the event generator. | |||
| """ | |||
| while self._has_next(): | |||
| yield self._read_event() | |||
| def _has_next(self): | |||
| """ | |||
| Check if the file has reached the end. | |||
| Returns: | |||
| bool, whether the file has reached the end. | |||
| """ | |||
| current_offset = self.file_handler.tell() | |||
| if current_offset < self.file_handler.size: | |||
| return True | |||
| return False | |||
| def _read_event(self): | |||
| """ | |||
| Read event. | |||
| Returns: | |||
| Event, the event body. | |||
| """ | |||
| body_size = self._read_header() | |||
| body_str = self._read_body(body_size) | |||
| event = Event().FromString(body_str) | |||
| return event | |||
| def _read_header(self): | |||
| """ | |||
| Read header information. | |||
| Returns: | |||
| int, the length of event body. | |||
| """ | |||
| header_str = self.file_handler.read(self.HEADER_SIZE) | |||
| header_crc_str = self.file_handler.read(self.HEADER_CRC_SIZE) | |||
| SummaryAnalyzer._check_crc(header_str, header_crc_str) | |||
| body_len = struct.unpack("<Q", header_str)[0] | |||
| return body_len | |||
| def _read_body(self, body_size): | |||
| """ | |||
| Read event body information. | |||
| Args: | |||
| body_size (int): The size of event body. | |||
| Returns: | |||
| bytes, the event body in bytes. | |||
| """ | |||
| body_str = self.file_handler.read(body_size) | |||
| body_crc_str = self.file_handler.read(self.BODY_CRC_SIZE) | |||
| SummaryAnalyzer._check_crc(body_str, body_crc_str) | |||
| return body_str | |||
| @staticmethod | |||
| def _check_crc(source_str, crc_str): | |||
| """ | |||
| Check the integrity of source string. | |||
| Args: | |||
| source_str (bytes): Source string in bytes. | |||
| crc_str (bytes): CRC string of source string in bytes. | |||
| Raises: | |||
| LineageVerificationException: Raise when verification failed. | |||
| """ | |||
| if crc32.GetValueFromStr(crc_str) != \ | |||
| crc32.GetMaskCrc32cValue(source_str, len(source_str)): | |||
| log.error("The CRC verification failed.") | |||
| raise LineageVerificationException("The CRC verification failed.") | |||
| class LineageSummaryAnalyzer(SummaryAnalyzer): | |||
| """ | |||
| Summary log analyzer for lineage information. | |||
| Args: | |||
| file_path (str): The path of summary log. | |||
| Raises: | |||
| LineageSummaryAnalyzeException: If failed to get lineage information. | |||
| """ | |||
| def __init__(self, file_path): | |||
| file_path = safe_normalize_path(file_path, 'lineage_summary_path', None) | |||
| super(LineageSummaryAnalyzer, self).__init__(file_path) | |||
| def get_latest_info(self): | |||
| """ | |||
| Get latest lineage info in summary log file. | |||
| Returns: | |||
| LineageInfo, the lineage summary information. | |||
| """ | |||
| lineage_events = { | |||
| SummaryTag.TRAIN_LINEAGE: None, | |||
| SummaryTag.EVAL_LINEAGE: None, | |||
| SummaryTag.DATASET_GRAPH: None | |||
| } | |||
| for event in self.load_events(): | |||
| for tag, _ in lineage_events.items(): | |||
| if event.HasField(tag.value): | |||
| lineage_events[tag] = event | |||
| break | |||
| lineage_info = LineageInfo( | |||
| train_lineage=lineage_events.get(SummaryTag.TRAIN_LINEAGE), | |||
| eval_lineage=lineage_events.get(SummaryTag.EVAL_LINEAGE), | |||
| dataset_graph=lineage_events.get(SummaryTag.DATASET_GRAPH) | |||
| ) | |||
| return lineage_info | |||
| @classmethod | |||
| def get_summary_infos(cls, file_path): | |||
| """ | |||
| Get lineage summary information from summary log file. | |||
| Args: | |||
| file_path (str): The file path of summary log. | |||
| Returns: | |||
| LineageInfo, the lineage summary information. | |||
| Raises: | |||
| LineageSummaryAnalyzeException: If failed to get lineage information. | |||
| """ | |||
| analyzer = cls(file_path) | |||
| try: | |||
| lineage_info = analyzer.get_latest_info() | |||
| except (MindInsightException, IOError) as err: | |||
| log.error("Failed to get lineage information.") | |||
| log.exception(err) | |||
| raise LineageSummaryAnalyzeException() | |||
| return lineage_info | |||