Compare commits

...

No commits in common. 'master' and 'r0.1' have entirely different histories.
master ... r0.1

100 changed files with 2344 additions and 17431 deletions
Split View
  1. +0
    -26
      .gitee/PULL_REQUEST_TEMPLATE.md
  2. +0
    -19
      .github/ISSUE_TEMPLATE/RFC.md
  3. +0
    -43
      .github/ISSUE_TEMPLATE/bug-report.md
  4. +0
    -19
      .github/ISSUE_TEMPLATE/task-tracking.md
  5. +0
    -24
      .github/PULL_REQUEST_TEMPLATE.md
  6. +3
    -9
      .gitignore
  7. +0
    -3
      .gitmodules
  8. +0
    -39
      CMakeLists.txt
  9. +0
    -1
      MANIFEST.in
  10. +61
    -128
      README.md
  11. +0
    -178
      README_CN.md
  12. +6
    -306
      RELEASE.md
  13. +0
    -898
      Third_Party_Open_Source_Software_Notice
  14. +35
    -43
      build/build.sh
  15. +0
    -70
      build/scripts/build_vulkan_vision_linux.sh
  16. +0
    -67
      build/scripts/build_vulkan_vision_windows.bat
  17. +52
    -43
      build/scripts/crc32.sh
  18. +15
    -13
      build/scripts/ui.sh
  19. +0
    -25
      ecosystem_tools/VulkanVision/README.md
  20. +0
    -3187
      ecosystem_tools/VulkanVision/st-patches/0001-spirv-opt-Add-auto-inst-passes.patch
  21. +0
    -1
      ecosystem_tools/VulkanVision/st-patches/vvision-st.diff
  22. +0
    -3600
      ecosystem_tools/VulkanVision/vv-patches/0001-layers-Added-auto-inst-layers.patch
  23. +0
    -1
      ecosystem_tools/VulkanVision/vv-patches/vvision-vv.diff
  24. +1
    -1
      mindinsight/_version.py
  25. +0
    -1
      mindinsight/backend/__init__.py
  26. +5
    -15
      mindinsight/backend/application.py
  27. +5
    -75
      mindinsight/backend/config/gunicorn_conf.py
  28. +0
    -45
      mindinsight/backend/data_manager/__init__.py
  29. +6
    -0
      mindinsight/backend/datavisual/__init__.py
  30. +8
    -21
      mindinsight/backend/datavisual/static_resource_api.py
  31. +11
    -26
      mindinsight/backend/datavisual/task_manager_api.py
  32. +4
    -51
      mindinsight/backend/datavisual/train_visual_api.py
  33. +0
    -26
      mindinsight/backend/debugger/__init__.py
  34. +0
    -426
      mindinsight/backend/debugger/debugger_api.py
  35. +0
    -32
      mindinsight/backend/explainer/__init__.py
  36. +0
    -338
      mindinsight/backend/explainer/explainer_api.py
  37. +94
    -53
      mindinsight/backend/lineagemgr/lineage_api.py
  38. +0
    -26
      mindinsight/backend/optimizer/__init__.py
  39. +0
    -106
      mindinsight/backend/optimizer/optimizer_api.py
  40. +0
    -31
      mindinsight/backend/profiler/__init__.py
  41. +0
    -713
      mindinsight/backend/profiler/profile_api.py
  42. +57
    -92
      mindinsight/backend/run.py
  43. +0
    -26
      mindinsight/backend/ui_config/__init__.py
  44. +0
    -40
      mindinsight/backend/ui_config/ui_config_api.py
  45. +0
    -1
      mindinsight/common/__init__.py
  46. +0
    -1
      mindinsight/common/hook/__init__.py
  47. +2
    -2
      mindinsight/common/hook/datavisual.py
  48. +0
    -99
      mindinsight/common/hook/debugger.py
  49. +10
    -0
      mindinsight/conf/__init__.py
  50. +4
    -13
      mindinsight/conf/constants.py
  51. +1
    -8
      mindinsight/conf/defaults.py
  52. +0
    -1
      mindinsight/datavisual/__init__.py
  53. +0
    -1
      mindinsight/datavisual/common/__init__.py
  54. +2
    -12
      mindinsight/datavisual/common/enums.py
  55. +4
    -136
      mindinsight/datavisual/common/exceptions.py
  56. +0
    -2
      mindinsight/datavisual/common/log.py
  57. +3
    -3
      mindinsight/datavisual/common/validation.py
  58. +0
    -1
      mindinsight/datavisual/data_access/__init__.py
  59. +0
    -1
      mindinsight/datavisual/data_transform/__init__.py
  60. +4
    -12
      mindinsight/datavisual/data_transform/data_loader.py
  61. +232
    -775
      mindinsight/datavisual/data_transform/data_manager.py
  62. +11
    -29
      mindinsight/datavisual/data_transform/events_data.py
  63. +333
    -463
      mindinsight/datavisual/data_transform/graph/graph.py
  64. +190
    -329
      mindinsight/datavisual/data_transform/graph/msgraph.py
  65. +100
    -118
      mindinsight/datavisual/data_transform/graph/node.py
  66. +0
    -61
      mindinsight/datavisual/data_transform/graph/node_tree.py
  67. +0
    -237
      mindinsight/datavisual/data_transform/histogram.py
  68. +0
    -59
      mindinsight/datavisual/data_transform/histogram_container.py
  69. +0
    -30
      mindinsight/datavisual/data_transform/image_container.py
  70. +0
    -1
      mindinsight/datavisual/data_transform/loader_generators/__init__.py
  71. +1
    -6
      mindinsight/datavisual/data_transform/loader_generators/data_loader_generator.py
  72. +0
    -12
      mindinsight/datavisual/data_transform/loader_generators/loader_struct.py
  73. +198
    -381
      mindinsight/datavisual/data_transform/ms_data_loader.py
  74. +5
    -147
      mindinsight/datavisual/data_transform/reservoir.py
  75. +0
    -15
      mindinsight/datavisual/data_transform/summary_parser/__init__.py
  76. +0
    -179
      mindinsight/datavisual/data_transform/summary_parser/event_parser.py
  77. +0
    -54
      mindinsight/datavisual/data_transform/summary_parser/image_writer.py
  78. +0
    -52
      mindinsight/datavisual/data_transform/summary_parser/scalar_writer.py
  79. +0
    -32
      mindinsight/datavisual/data_transform/summary_parser/writer.py
  80. +85
    -316
      mindinsight/datavisual/data_transform/summary_watcher.py
  81. +0
    -157
      mindinsight/datavisual/data_transform/tensor_container.py
  82. +0
    -1
      mindinsight/datavisual/processors/__init__.py
  83. +31
    -18
      mindinsight/datavisual/processors/graph_processor.py
  84. +0
    -70
      mindinsight/datavisual/processors/histogram_processor.py
  85. +13
    -27
      mindinsight/datavisual/processors/images_processor.py
  86. +1
    -74
      mindinsight/datavisual/processors/scalars_processor.py
  87. +0
    -311
      mindinsight/datavisual/processors/tensor_processor.py
  88. +5
    -141
      mindinsight/datavisual/processors/train_task_manager.py
  89. +0
    -1
      mindinsight/datavisual/proto_files/__init__.py
  90. +0
    -45
      mindinsight/datavisual/proto_files/lazy_read.proto
  91. +0
    -242
      mindinsight/datavisual/proto_files/lazy_read_pb2.py
  92. +2
    -7
      mindinsight/datavisual/proto_files/mindinsight_anf_ir.proto
  93. +16
    -33
      mindinsight/datavisual/proto_files/mindinsight_anf_ir_pb2.py
  94. +0
    -129
      mindinsight/datavisual/proto_files/mindinsight_lineage.proto
  95. +0
    -1246
      mindinsight/datavisual/proto_files/mindinsight_lineage_pb2.py
  96. +70
    -79
      mindinsight/datavisual/proto_files/mindinsight_summary.proto
  97. +561
    -314
      mindinsight/datavisual/proto_files/mindinsight_summary_pb2.py
  98. +57
    -0
      mindinsight/datavisual/utils/crc32/base.h
  99. +26
    -23
      mindinsight/datavisual/utils/crc32/crc32.cc
  100. +14
    -37
      mindinsight/datavisual/utils/crc32/crc32.h

+ 0
- 26
.gitee/PULL_REQUEST_TEMPLATE.md View File

@@ -1,26 +0,0 @@
<!-- Thanks for sending a pull request! Here are some tips for you:

If this is your first time, please read our contributor guidelines: https://gitee.com/mindspore/mindspore/blob/master/CONTRIBUTING.md
-->

**What type of PR is this?**
> Uncomment only one ` /kind <>` line, hit enter to put that in a new line, and remove leading whitespaces from that line:
>
> /kind bug
> /kind task
> /kind feature


**What does this PR do / why do we need it**:


**Which issue(s) this PR fixes**:
<!--
*Automatically closes linked issue when PR is merged.
Usage: `Fixes #<issue number>`, or `Fixes (paste link of issue)`.
-->
Fixes #

**Special notes for your reviewers**:



+ 0
- 19
.github/ISSUE_TEMPLATE/RFC.md View File

@@ -1,19 +0,0 @@
---
name: RFC
about: Use this template for the new feature or enhancement
labels: kind/feature or kind/enhancement

---

## Background
- Describe the status of the problem you wish to solve
- Attach the relevant issue if have

## Introduction
- Describe the general solution, design and/or pseudo-code

## Trail
| No. | Task Description | Related Issue(URL) |
| --- | ---------------- | ------------------ |
| 1 | | |
| 2 | | |

+ 0
- 43
.github/ISSUE_TEMPLATE/bug-report.md View File

@@ -1,43 +0,0 @@
---
name: Bug Report
about: Use this template for reporting a bug
labels: kind/bug

---

<!-- Thanks for sending an issue! Here are some tips for you:

If this is your first time, please read our contributor guidelines: https://github.com/mindspore-ai/mindspore/blob/master/CONTRIBUTING.md
-->

## Environment
### Hardware Environment(`Ascend`/`GPU`/`CPU`):
> Uncomment only one ` /device <>` line, hit enter to put that in a new line, and remove leading whitespaces from that line:
>
> `/device ascend`</br>
> `/device gpu`</br>
> `/device cpu`</br>

### Software Environment:
- **MindSpore version (source or binary)**:
- **Python version (e.g., Python 3.7.5)**:
- **OS platform and distribution (e.g., Linux Ubuntu 16.04)**:
- **GCC/Compiler version (if compiled from source)**:

## Describe the current behavior


## Describe the expected behavior


## Steps to reproduce the issue
1.
2.
3.

## Related log / screenshot


## Special notes for this issue



+ 0
- 19
.github/ISSUE_TEMPLATE/task-tracking.md View File

@@ -1,19 +0,0 @@
---
name: Task
about: Use this template for task tracking
labels: kind/task

---

## Task Description


## Task Goal


## Sub Task
| No. | Task Description | Issue ID |
| --- | ---------------- | -------- |
| 1 | | |
| 2 | | |


+ 0
- 24
.github/PULL_REQUEST_TEMPLATE.md View File

@@ -1,24 +0,0 @@
<!-- Thanks for sending a pull request! Here are some tips for you:

If this is your first time, please read our contributor guidelines: https://github.com/mindspore-ai/mindspore/blob/master/CONTRIBUTING.md
-->

**What type of PR is this?**
> Uncomment only one ` /kind <>` line, hit enter to put that in a new line, and remove leading whitespaces from that line:
>
> `/kind bug`</br>
> `/kind task`</br>
> `/kind feature`</br>

**What does this PR do / why do we need it**:


**Which issue(s) this PR fixes**:
<!--
*Automatically closes linked issue when PR is merged.
Usage: `Fixes #<issue number>`, or `Fixes (paste link of issue)`.
-->
Fixes #

**Special notes for your reviewers**:


+ 3
- 9
.gitignore View File

@@ -78,18 +78,12 @@ TESTS*.xml
# vscode settings
.vscode


# OS files
*.DS_Store

package-lock.json

build/*
!build/scripts
!build/build.sh
build/lib
build/bdist.*

output/
!output/README.md

mindinsight/ui/public/static/js/graphvizlib.wasm
third_party/*
third_party/securec/build

+ 0
- 3
.gitmodules View File

@@ -1,3 +0,0 @@
[submodule "third_party/securec"]
path = third_party/securec
url = https://gitee.com/openeuler/libboundscheck.git

+ 0
- 39
CMakeLists.txt View File

@@ -1,39 +0,0 @@
cmake_minimum_required(VERSION 3.14)

project(MindInsight)

find_package(Git QUIET)

if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
option(GIT_SUBMODULE "Check submodules during build" ON)
if(GIT_SUBMODULE)
message(STATUS "Updating submodules")
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE GIT_SUBMOD_RESULT)
if(NOT GIT_SUBMOD_RESULT EQUAL "0")
message(FATAL_ERROR "git submodule update failed with ${GIT_SUBMOD_RESULT}, please checkout submodules.")
endif()
endif()
elseif(NOT EXISTS "${PROJECT_SOURCE_DIR}/third_party/securec/src")
message(FATAL_ERROR "git command not found or not in a git repository, third_party/securec/src not exists.")
else()
message(WARNING "git command not found or not in a git repository, submodules not updated.")
endif()

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif()

set(CMAKE_C_FLAGS_DEBUG "$ENV{CFLAGS} -fPIC -O0 -Wall -fvisibility=hidden \
-Wno-deprecated-declarations -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer \
-D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)='")
set(CMAKE_C_FLAGS_RELEASE "$ENV{CFLAGS} -fPIC -O3 -Wall -fvisibility=hidden -Wno-deprecated-declarations")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

#add flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror")

include_directories(./third_party/securec/include)
aux_source_directory(./third_party/securec/src SECUREC_SRCS)
add_library(securec ${SECUREC_SRCS})

+ 0
- 1
MANIFEST.in View File

@@ -3,6 +3,5 @@ recursive-exclude * .git
recursive-exclude * .gitignore
recursive-exclude * __pycache__
recursive-exclude * *.py[co] *.swp
recursive-exclude mindinsight/datavisual/utils/crc32 *
recursive-exclude mindinsight/ui *
recursive-include mindinsight/ui/dist *

+ 61
- 128
README.md View File

@@ -1,182 +1,115 @@
# MindInsight
MindInsight provides MindSpore with easy-to-use debugging and tuning capabilities. It
enables users to visualize the experiments. The features of MindInsight are as follows.

<!-- TOC -->
- Visualization of training process:

- [Introduction](#introduction)
Provide visualization of training process information,
such as computation graph, training process metrics, etc.

- Traceability of training result:

Provide visualization of model parameters information,
such as training data, model accuracy, etc.


# Index

- [More about MindInsight](#more-about-mindinsight)
- [Installation](#installation)
- [System Environment Information Confirmation](#system-environment-information-confirmation)
- [Installation Methods](#installation-methods)
- [Installation by pip](#installation-by-pip)
- [Installation by Source Code](#installation-by-source-code)
- [Downloading Source Code from Gitee](#downloading-source-code-from-gitee)
- [Compiling MindInsight](#compiling-mindInsight)
- [Installation Verification](#installation-verification)
- [Quick Start](#quick-start)
- [QuickStart](#quickstart)
- [Docs](#docs)
- [Community](#community)
- [Governance](#governance)
- [Communication](#communication)
- [Vulkan Vision](#vulkan-vision)
- [Contributing](#contributing)
- [Release Notes](#release-notes)
- [License](#license)

<!-- /TOC -->

[简体中文](./README_CN.md)
# More about MindInsight

## Introduction
The architecture diagram of MindInsight is illustrated as follows:

MindInsight provides MindSpore with easy-to-use debugging and tuning capabilities. During the training, data such as scalar, tensor, image, computational graph, model hyper parameter and training’s execution time can be recorded in the file for viewing and analysis through the visual page of MindInsight.

![MindInsight Architecture](docs/arch.png)

Click to view the [MindInsight design document](https://www.mindspore.cn/doc/note/en/master/design/mindinsight.html), learn more about the design.
Click to view the [Tutorial documentation](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/visualization_tutorials.html) learn more about the MindInsight tutorial.

## Installation

### System Environment Information Confirmation

- The hardware platform is Ascend or GPU.
- Confirm that [Python](https://www.python.org/ftp/python/3.7.5/Python-3.7.5.tgz) 3.7.5 is installed.
- The versions of MindInsight and MindSpore must be consistent.
- If you use source code to compile and install, the following dependencies also need to be installed:
- Confirm that [CMake](https://cmake.org/download/) 3.14.1 or later is installed.
- Confirm that [GCC](https://gcc.gnu.org/releases.html) 7.3.0 is installed.
- Confirm that [node.js](https://nodejs.org/en/download/) 10.19.0 or later is installed.
- Confirm that [wheel](https://pypi.org/project/wheel/) 0.32.0 or later is installed.
- Confirm that [pybind11](https://pypi.org/project/pybind11/) 2.4.3 or later is installed.
- All other dependencies are included in [requirements.txt](https://gitee.com/mindspore/mindinsight/blob/master/requirements.txt).

### Installation Methods

You can install MindInsight either by pip or by source code.

#### Installation by pip

Install from PyPI:

```bash
pip install mindinsight
```

Install with customized version:

```bash
pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindInsight/ascend/{system}/mindinsight-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple
```

> - When the network is connected, dependency items are automatically downloaded during .whl package installation. (For details about other dependency items, see [requirements.txt](https://gitee.com/mindspore/mindinsight/blob/master/requirements.txt)). In other cases, you need to manually install dependency items.
> - `{version}` denotes the version of MindInsight. For example, when you are downloading MindSpore 1.0.1, `{version}` should be 1.0.1.
> - `{arch}` denotes the system architecture. For example, the Linux system you are using is x86 architecture 64-bit, `{arch}` should be `x86_64`. If the system is ARM architecture 64-bit, then it should be `aarch64`.
> - `{system}` denotes the system version. For example, if you are using EulerOS ARM architecture, `{system}` should be `euleros_aarch64`. Currently, the following systems are supported by Ascend: `euleros_aarch64`/`centos_aarch64`/`centos_x86`/`ubuntu_aarch64`/`ubuntu_x86`. `ubuntu_x86` is supported by GPU.

#### Installation by Source Code

##### Downloading Source Code from Gitee

```bash
git clone https://gitee.com/mindspore/mindinsight.git
```

##### Compiling MindInsight

You can choose any of the following installation methods:
## Summary log file

1. Run the following command in the root directory of the source code:
The summary log file consists of a series of operation events. Each event contains
the necessary data for visualization.

```bash
cd mindinsight
pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
python setup.py install
```
MindSpore uses the Callback mechanism to record graph, scalar, image and model
information into summary log file.

2. Build the `whl` package for installation.
- The scalar and image is recorded by Summary operator.

Enter the root directory of the source code, first execute the MindInsight compilation script in the `build` directory, and then execute the command to install the `whl` package generated in the `output` directory.
- The computation graph is recorded by SummaryRecord after it was compiled.

```bash
cd mindinsight
bash build/build.sh
pip install output/mindinsight-{version}-cp37-cp37m-linux_{arch}.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
```
- The model parameters is recorded by TrainLineage or EvalLineage.

### Installation Verification
MindInsight provides the capability to analyze summary log files and visualize
relative information.

Execute the following command:
## Visualization

```bash
mindinsight start
```
MindInsight provides users with a full-process visualized GUI during
AI development, in order to help model developers to improve the model
precision efficiently.

If it prompts the following information, the installation is successful:
MindInsight has the following visualization capabilities:

```bash
Web address: http://127.0.0.1:8080
service start state: success
```
### Graph visualization

## Quick Start
The GUI of MindInsight displays the structure of neural network, the data flow and control
flow of each operator during the entire training process.

Before using MindInsight, the data in the training process should be recorded. When starting MindInsight, the directory of the saved data should be specified. After successful startup, the data can be viewed through the web page. Here is a brief introduction to recording training data, as well as starting and stopping MindInsight.
### Scalar visualization

[SummaryCollector](https://www.mindspore.cn/doc/api_python/en/master/mindspore/mindspore.train.html#mindspore.train.callback.SummaryCollector) is the interface MindSpore provides for a quick and easy collection of common data about computational graphs, loss values, learning rates, parameter weights, and so on. Below is an example of using `SummaryCollector` for data collection, specifying the directory where the data is stored in `./summary_dir`.
The GUI of MindInsight displays the change tendency of a specific scalar during the entire
training process, such as loss value and accuracy rate of each iteration.

```python
...
Two scalar curves can be combined and displayed in one chart.

from mindspore.train.callback import SummaryCollector
summary_collector = SummaryCollector(summary_dir='./summary_dir')
model.train(epoch=1, ds_train, callbacks=[summary_collector])
```
### Image visualization

For more ways to record visual data, see the [MindInsight Tutorial](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/visualization_tutorials.html).
The GUI of MindInsight displays both original images and enhanced images during the entire
training process.

After you've collected the data, when you launch MindInsight, specify the directory in which the data has been stored.
### Model lineage visualization

```bash
mindinsight start --summary-base-dir ./summary_dir
```
The GUI of MindInsight displays the parameters and metrics of all models, such as the
learning rate, the number of samples and the loss function of each model.

After successful startup, visit `http://127.0.0.1:8080` through the browser to view the web page.
### Dataset Graph visualization

Command of stopping the MindInsight service:
The GUI of MindInsight displays the pipeline of dataset processing and augmentation.

```bash
mindinsight stop
```
### Dataset Lineage visualization

## Docs
The GUI of MindInsight displays the parameters and operations of the dataset processing and augmentation.

More details about installation guide, tutorials and APIs, please see the
[User Documentation](https://gitee.com/mindspore/docs).
# Installation

## Community
See [Install MindInsight](https://www.mindspore.cn/install/en).

### Governance
# QuickStart

Check out how MindSpore Open Governance [works](https://gitee.com/mindspore/community/blob/master/governance.md).
See [guidance](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/advanced_use/visualization_tutorials.html)

### Communication
# Docs

- [MindSpore Slack](https://join.slack.com/t/mindspore/shared_invite/zt-dgk65rli-3ex4xvS4wHX7UDmsQmfu8w) - Communication platform for developers.
- IRC channel at `#mindspore` (only for meeting minutes logging purpose)
- Video Conferencing: TBD
- Mailing-list: <https://mailweb.mindspore.cn/postorius/lists>
See [API Reference](https://www.mindspore.cn/api/en/0.1.0-alpha/index.html)

## Vulkan Vision
# Community

Vulkan Vision(V-Vision) provides an unprecedented level of detail into the execution of Vulkan applications through dynamic instrumentation. V-Vision supports analyzing AI workloads implemented using the a compute pipeline as well as traditional raster and ray-tracing Vulkan applications. To use V-Vision please refer to the [build instructions](https://gitee.com/mindspore/mindinsight/blob/master/ecosystem_tools/VulkanVision/README.md). For more information, please refer to [the paper](https://webdocs.cs.ualberta.ca/~amaral/papers/PankratzCGO21) published at CGO 2021.
- [MindSpore Slack](https://join.slack.com/t/mindspore/shared_invite/enQtOTcwMTIxMDI3NjM0LTNkMWM2MzI5NjIyZWU5ZWQ5M2EwMTQ5MWNiYzMxOGM4OWFhZjI4M2E5OGI2YTg3ODU1ODE2Njg1MThiNWI3YmQ) - Communication platform for developers.

## Contributing
# Contributing

Welcome contributions. See our [Contributor Wiki](https://gitee.com/mindspore/mindspore/blob/master/CONTRIBUTING.md) for more details.

## Release Notes
# Release Notes

The release notes, see our [RELEASE](RELEASE.md).

## License
# License

[Apache License 2.0](LICENSE)

+ 0
- 178
README_CN.md View File

@@ -1,178 +0,0 @@
# MindInsight

<!-- TOC -->

- [MindInsight介绍](#mindinsight介绍)
- [安装](#安装)
- [确认系统环境信息](#确认系统环境信息)
- [安装方式](#安装方式)
- [pip安装](#pip安装)
- [源码编译安装](#源码编译安装)
- [从代码仓下载源码](#从代码仓下载源码)
- [编译安装MindInsight](#编译安装mindinsight)
- [验证是否成功安装](#验证是否成功安装)
- [快速入门](#快速入门)
- [文档](#文档)
- [社区](#社区)
- [治理](#治理)
- [交流](#交流)
- [贡献](#贡献)
- [版本说明](#版本说明)
- [许可证](#许可证)

<!-- /TOC -->

[View English](./README.md)

## MindInsight介绍

MindInsight为MindSpore提供了简单易用的调优调试能力。在训练过程中,可以将标量、张量、图像、计算图、模型超参、训练耗时等数据记录到文件中,通过MindInsight可视化页面进行查看及分析。

![MindInsight Architecture](docs/arch.png)

点击查看[MindInsight设计文档](https://www.mindspore.cn/doc/note/zh-CN/master/design/mindinsight.html),了解更多设计详情。
点击查看[教程文档](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/visualization_tutorials.html),了解更多MindInsight教程。

## 安装

### 确认系统环境信息

- 硬件平台为Ascend或GPU。
- 确认安装[Python](https://www.python.org/ftp/python/3.7.5/Python-3.7.5.tgz) 3.7.5版本。
- MindInsight与MindSpore的版本需保持一致。
- 若采用源码编译安装,还需确认安装以下依赖。
- 确认安装[CMake](https://cmake.org/download/) 3.14.1及以上版本。
- 确认安装[GCC](https://gcc.gnu.org/releases.html) 7.3.0版本。
- 确认安装[node.js](https://nodejs.org/en/download/) 10.19.0及以上版本。
- 确认安装[wheel](https://pypi.org/project/wheel/) 0.32.0及以上版本。
- 确认安装[pybind11](https://pypi.org/project/pybind11/) 2.4.3及以上版本。
- 其他依赖参见[requirements.txt](https://gitee.com/mindspore/mindinsight/blob/master/requirements.txt)。

### 安装方式

可以采用pip安装或者源码编译安装两种方式。

#### pip安装

安装PyPI上的版本:

```bash
pip install mindinsight
```

安装自定义版本:

```bash
pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindInsight/ascend/{system}/mindinsight-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple
```

> - 在联网状态下,安装whl包时会自动下载MindInsight安装包的依赖项(依赖项详情参见[requirements.txt](https://gitee.com/mindspore/mindinsight/blob/master/requirements.txt)),其余情况需自行安装。
> - `{version}`表示MindInsight版本号,例如下载1.0.1版本MindInsight时,`{version}`应写为1.0.1。
> - `{arch}`表示系统架构,例如使用的Linux系统是x86架构64位时,`{arch}`应写为`x86_64`。如果系统是ARM架构64位,则写为`aarch64`。
> - `{system}`表示系统版本,例如使用的欧拉系统ARM架构,`{system}`应写为`euleros_aarch64`,目前Ascend版本可支持以下系统`euleros_aarch64`/`centos_aarch64`/`centos_x86`/`ubuntu_aarch64`/`ubuntu_x86`;GPU版本可支持以下系统`ubuntu_x86`。

#### 源码编译安装

##### 从代码仓下载源码

```bash
git clone https://gitee.com/mindspore/mindinsight.git
```

##### 编译安装MindInsight

可选择以下任意一种安装方式:

1. 在源码根目录下执行如下命令。

```bash
cd mindinsight
pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
python setup.py install
```

2. 构建`whl`包进行安装。

进入源码的根目录,先执行`build`目录下的MindInsight编译脚本,再执行命令安装`output`目录下生成的`whl`包。

```bash
cd mindinsight
bash build/build.sh
pip install output/mindinsight-{version}-cp37-cp37m-linux_{arch}.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
```

### 验证是否成功安装

执行如下命令:

```bash
mindinsight start
```

如果出现下列提示,说明安装成功:

```bash
Web address: http://127.0.0.1:8080
service start state: success
```

## 快速入门

使用MindInsight前,需要先将训练过程中的数据记录下来,启动MindInsight时,指定所保存的数据的位置,启动成功后,
即可通过可视化页面查看数据。下面将简单介绍记录训练过程数据,以及启动、停止MindInsight服务。

[SummaryCollector](https://www.mindspore.cn/doc/api_python/zh-CN/master/mindspore/mindspore.train.html#mindspore.train.callback.SummaryCollector)是MindSpore提供的快速简易地收集一些常见信息的接口,收集的信息包括计算图、损失值、学习率、参数权重等。
下面是使用 `SummaryCollector` 进行数据收集的示例,其中指定存放数据的目录为 `./summary_dir`。

```python
...

from mindspore.train.callback import SummaryCollector
summary_collector = SummaryCollector(summary_dir='./summary_dir')
model.train(epoch=1, ds_train, callbacks=[summary_collector])
```

更多记录可视化数据的方法,请点击查看[MindInsight使用教程](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/visualization_tutorials.html)。

收集好数据后,启动MindInsight时指定存放数据的目录。

```bash
mindinsight start --summary-base-dir ./summary_dir
```

启动成功后,通过浏览器访问 `http://127.0.0.1:8080`,查看可视化页面。

停止MindInsight服务的命令:

```bash
mindinsight stop
```

## 文档

有关安装指南、教程和API的更多详细信息,请参阅[用户文档](https://gitee.com/mindspore/docs)。

## 社区

### 治理

查看MindSpore如何进行[开放治理](https://gitee.com/mindspore/community/blob/master/governance.md)。

### 交流

- [MindSpore Slack](https://join.slack.com/t/mindspore/shared_invite/zt-dgk65rli-3ex4xvS4wHX7UDmsQmfu8w) 开发者交流平台。
- `#mindspore`IRC频道(仅用于会议记录)
- 视频会议:待定
- 邮件列表:<https://mailweb.mindspore.cn/postorius/lists>

## 贡献

欢迎参与贡献。更多详情,请参阅我们的[贡献者Wiki](https://gitee.com/mindspore/mindspore/blob/master/CONTRIBUTING.md)。

## 版本说明

版本说明请参阅[RELEASE](RELEASE.md)。

## 许可证

[Apache License 2.0](LICENSE)

+ 6
- 306
RELEASE.md View File

@@ -1,309 +1,9 @@
# MindInsight 1.2.0
## MindInsight

## MindInsight 1.2.0 Release Notes
# Release 0.1.0-alpha

### Major Features and Improvements
* Training process observation
* Provides and displays training process information, including computational graphs and training process indicators.

#### Profiling

- [STABLE] Support memory profiling.(Ascend)
- [STABLE] Support host cpu utilization profiling.(Ascend/GPU)
- [STABLE] Support timeline for Host&Device Hybrid Training.(Ascend/GPU)
- [STABLE] Support show step breakdown information(Step Interval, Forward and Backward Propagation, and Step Tail) of each device in cluster profiling ui page.(Ascend)

#### MindConverter

- [STABLE] Support both classic computer vision and bert model definition script and trained weights migration from TensorFlow or PyTorch.
- [STABLE] Support ONNX model migration to improve the usability of PyTorch model migration.

#### Model Explanation

- [STABLE] Support counterfactual explanation for image classification.

### API Change

#### Backwards Compatible Change

##### Python API

###### add parameter `export_options` for `SummaryCollector` and `SummaryRecord`([!10881](https://gitee.com/mindspore/mindspore/pulls/10881))

Perform custom operations on the export data. You can customize the export data with a dictionary. For example, you can set `{'tensor_format': 'npy'}` to export tensor as npy file.

###### add parameter `raise_exception` for `SummaryRecord`([!10436](https://gitee.com/mindspore/mindspore/pulls/10436))

The parameter `raise_exception` determines whether to throw an exception when an exception occurs.

###### add API `register_uncertainty` for `explainer.ImageClassificationRunner`([!11309](https://gitee.com/mindspore/mindspore/pulls/11309))

`register_uncertainty` helps register uncertainty instance to compute the epistemic uncertainty base on the Bayes’ theorem.

###### add API `register_hierarchical_occlusion` for `explainer.ImageClassificationRunner`([!11309](https://gitee.com/mindspore/mindspore/pulls/11309))

`register_hierarchical_occlusion` helps register hierarchical occlusion instances.

##### Command Line Interface

###### `MindConverter` removes support for pth format model, `--project_path` deleted([!1253](https://gitee.com/mindspore/mindinsight/pulls/1253))

The pth format model is not supported anymore, please use ONNX to migrate.

### Bug fixes

- Error information missing when running on an unsupported device (e.g, cpu). [!11801](https://gitee.com/mindspore/mindspore/pulls/11801)

### Contributors

Thanks goes to these wonderful people:

Congli Gao, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Miao, Gongchang Ou, Kai Wen, Yue Wang, Lihua Ye, Ximiao Yu, Yunshu Zhang, Ning Ma, Yihui Zhang, Hong Sheng, Ran Mo, Zhaohong Guo, Tianshu Liang, Shuqiang Jiang, Yanjun Peng, Haitao Yang, Jiabin Liu, Han Gao, Xiaohui Li, Ngaifai Ng, Hui Pan, Weifeng Huang, Yifan Xia, Xuefeng Feng, Yanxi Wei.

Contributions of any kind are welcome!

# MindInsight 1.1.0

## MindInsight 1.1.0 Release Notes

### Major Features and Improvements

#### Precision tuning framework

- Support useful checks on weights, activations, gradients and tensors, such as:
- check unchanged weight
- check weight change above threshold
- check activation range
- check gradient vanishing
- check tensor overflow
- Support rechecking with new watch points on the same data.
- Newly designed tensor view with fix suggestions and tensor context to quickly locate root cause of problems.
- Support recommending watch points to find common precision problems.
- Support debugger on multigraph network.

#### Profiler

- Support GPU step trace profiling.
- Support GPU minddata profiling.

#### MindConverter

- Support TensorFlow model definition script to MindSpore for CV field.
- Conversion capability of PyTorch is enhanced.

#### Model Explanation

Provide explanations and their benchmarks for image classification deep CNN models.

- Support 6 explanation methods: Gradient, Deconvolution, GuidedBackprop, GradCAM, RISE, Occlusion
- Support 4 benchmark methods: Localization, Faithfulness, Class Sensitivity, Robustness
- Provide a high-level API (ImageClassificationRunner) for users to execute explanation methods and benchmark methods and store the results easily.

### API Change

#### Improvements

##### Command Line Interface

- `--enable_debugger`: Support both 1 and True ([!1051](https://gitee.com/mindspore/mindinsight/pulls/1051))
- `ENABLE_MS_DEBUGGER`: Support both 1 and True ([!10199](https://gitee.com/mindspore/mindspore/pulls/10199))
- `parse_summary`: Add parse_summary function to convert summary file to image file and csv file ([!774](https://gitee.com/mindspore/mindinsight/pulls/774))

### Bugfixes

#### Profiler

- Fix parser framework file error if the profiling data of one op is saved separately to two files.([!7824](https://gitee.com/mindspore/mindspore/pulls/7824))

#### Model Explanation

- Add reset_offset when CRCLengthError and CRCError happen([!955](https://gitee.com/mindspore/mindinsight/pulls/955))
- FIx the bug which ignore the sample_event when sample_id == 0.([!968](https://gitee.com/mindspore/mindinsight/pulls/968))

### Thanks to our Contributors

Thanks goes to these wonderful people:

Congli Gao, Jianfeng Zhu, Zhenzhong Kou, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Miao, Gongchang Ou, Yongxiu Qu, Luyu Qiu, Kai Wen, Yue Wang, Lihua Ye, Ximiao Yu, Yunshu Zhang, Ning Ma, Yihui Zhang, Shuide Wang, Hong Sheng, Ran Mo, Zhaohong Guo, Hui Pan, Weining Wang, Weifeng Huang, Yifan Xia, Chen Cao, Ngaifai Ng, Xiaohui Li, Yi Yang, Luyu Qiu, Yunpeng Wang, Yuhan Shi, Yanxi Wei.

Contributions of any kind are welcome!

# MindInsight 1.0.0

## MindInsight 1.0.0 Release Notes

### Major Features and Improvements

- Release MindSpore Debugger.
- MindConverter ability is enhanced, supporting scripts generation based on PyTorch model.
- Support training hyper-parameter importance visualization.
- Support GPU timeline.

### Bugfixes

- Optimize aicpu display method. ([!595](https://gitee.com/mindspore/mindinsight/pulls/595/files))
- Add the summary loading switch mechanism. ([!601](https://gitee.com/mindspore/mindinsight/pulls/601/files))
- Detect a summary dir having summary files or not. ([!632](https://gitee.com/mindspore/mindinsight/pulls/632/files))

### Thanks to our Contributors

Thanks goes to these wonderful people:

Congli Gao, Jianfeng Zhu, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Miao, Gongchang Ou, Yongxiu Qu, Luyu Qiu, Kai Wen, Yue Wang, Lihua Ye, Ximiao Yu, Yunshu Zhang, Ning Ma, Yihui Zhang, Shuide Wang, Hong Sheng, Ran Mo, Zhaohong Guo, Hui Pan, Junyan Qin, Weining Wang, Weifeng Huang, Yifan Xia.

Contributions of any kind are welcome!

# MindInsight 0.7.0-beta

## MindInsight 0.7.0 Release Notes

### Major Features and Improvements

- Optimize node name display in computation graph.
- MindSpore Profiler supports network training with GPU operators.
- MindWizard generates classic network scripts according to user preference.
- Web UI supports language internationalization, including both Chinese and English.

### Bugfixes

- Optimize UI page initialization to handle timeout requests. ([!503](https://gitee.com/mindspore/mindinsight/pulls/503))
- Fix the line break problem when the profiling file number is too long. ([!532](https://gitee.com/mindspore/mindinsight/pulls/532))

### Thanks to our Contributors

Thanks goes to these wonderful people:

Congli Gao, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Miao, Gongchang Ou, Yongxiu Qu, Hui Pan, Luyu Qiu, Junyan Qin, Kai Wen, Weining Wang, Yue Wang, Zhuanke Wu, Yifan Xia, Lihua Ye, Weibiao Yu, Ximiao Yu, Yunshu Zhang, Ting Zhao, Jianfeng Zhu, Ning Ma, Yihui Zhang, Shuide Wang, Hong Sheng, Lin Pan, Ran Mo.

Contributions of any kind are welcome!

# MindInsight 0.6.0-beta

## MindInsight 0.6.0 Release Notes

### Major Features and Improvements

- Provide monitoring capabilities for each of Ascend AI processor and other hardware resources, including CPU and memory.
- Visualization of weight, gradient and other tensor data in model training.
- Provide tabular from presentation of tensor data.
- Provide histogram to show the distribution of tensor data and its change over time.

### Bugfixes

- UI fix for the error message display mode of the tensor during real-time training. ([!465](https://gitee.com/mindspore/mindinsight/pulls/465))
- The summary file size is larger than max_file_size. ([!3481](https://gitee.com/mindspore/mindspore/pulls/3481))
- Fix real-time training error when disk is full. ([!3058](https://gitee.com/mindspore/mindspore/pulls/3058))

### Thanks to our Contributors

Thanks goes to these wonderful people:

Congli Gao, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Miao, Gongchang Ou, Yongxiu Qu, Hui Pan, Luyu Qiu, Junyan Qin, Kai Wen, Weining Wang, Yue Wang, Zhuanke Wu, Yifan Xia, Lihua Ye, Weibiao Yu, Ximiao Yu, Yunshu Zhang, Ting Zhao, Jianfeng Zhu, Ning Ma, Yihui Zhang, Shuide Wang.

Contributions of any kind are welcome!

# MindInsight 0.5.0-beta

## MindInsight 0.5.0 Release Notes

### Major Features and Improvements

- MindSpore Profiler
- Provide performance analyse tool for the input data pipeline.
- Provide timeline analyse tool, which can show the detail of the streams/tasks.
- Provide a tool to visualize the step trace information, which can be used to analyse the general performance of the neural network in each phase.
- Provide profiling guides for the users to find the performance bottlenecks quickly.
- CPU summary operations support for CPU summary data.
- Over threshold warn support in scalar training dashboard.
- Provide more user-friendly callback function for visualization
- Provide unified callback `SummaryCollector` to log most commonly visualization event.
- Discard the original visualization callback `SummaryStep`, `TrainLineage` and `EvalLineage`.
- `SummaryRecord` provide new API `add_value` to collect data into cache for summary persistence.
- `SummaryRecord` provide new API `set_mode` to distinguish summary persistence mode at different stages.
- MindConverter supports conversion of more operators and networks, and improves its ease of use.

### Bugfixes

- Fix FileNotFound exception by adding robust check for summary watcher ([!281](https://gitee.com/mindspore/mindinsight/pulls/281)).
- UI fix operator table sort jump problem ([!283](https://gitee.com/mindspore/mindinsight/pulls/283)).
- Dataset serializer return schema json str when schema type is `mindspore.dataset.engine.Schema` ([!2185](https://gitee.com/mindspore/mindspore/pulls/2185)).

### Thanks to our Contributors

Thanks goes to these wonderful people:

Chao Chen, Congli Gao, Ye Huang, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Miao, Gongchang Ou, Yongxiu Qu, Hui Pan, Luyu Qiu, Junyan Qin, Kai Wen, Weining Wang, Yue Wang, Zhuanke Wu, Yifan Xia, Lihua Ye, Weibiao Yu, Ximiao Yu, Yunshu Zhang, Ting Zhao, Jianfeng Zhu.

Contributions of any kind are welcome!

# MindInsight 0.3.0-alpha

## MindInsight 0.3.0 Release Notes

### Major Features and Improvements

- Profiling
- Provide easy to use apis for profiling start/stop and profiling data analyse (on Ascend only).
- Provide operators performance display and analysis on MindInsight UI.
- Large scale network computation graph visualization.
- Optimize summary record implementation and improve its performance.
- Improve lineage usability
- Optimize lineage display and enrich tabular operation.
- Decouple lineage callback from `SummaryRecord`.
- Support scalar compare of multiple runs.
- Scripts conversion from other frameworks
- Support for converting PyTorch scripts within TorchVision to MindSpore scripts automatically.

### Bugfixes

- Fix pb files loaded problem when files are modified at the same time ([!53](https://gitee.com/mindspore/mindinsight/pulls/53)).
- Fix load data thread stuck in `LineageCacheItemUpdater` ([!114](https://gitee.com/mindspore/mindinsight/pulls/114)).
- Fix samples from previous steps erased due to tags size too large problem ([!86](https://gitee.com/mindspore/mindinsight/pulls/86)).
- Fix image and histogram event package error ([!1143](https://gitee.com/mindspore/mindspore/pulls/1143)).
- Equally distribute histogram ignoring actual step number to avoid large white space ([!66](https://gitee.com/mindspore/mindinsight/pulls/66)).

### Thanks to our Contributors

Thanks goes to these wonderful people:

Chao Chen, Congli Gao, Ye Huang, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Liang, Pengting Luo, Yanming Miao, Gongchang Ou, Yongxiu Qu, Hui Pan, Luyu Qiu, Junyan Qin, Kai Wen, Weining Wang, Yue Wang, Zhuanke Wu, Yifan Xia, Weibiao Yu, Ximiao Yu, Ting Zhao, Jianfeng Zhu.

Contributions of any kind are welcome!

# MindInsight 0.2.0-alpha

## MindInsight 0.2.0 Release Notes

### Major Features and Improvements

- Parameter distribution graph (Histogram).

Now you can use [`HistogramSummary`](https://www.mindspore.cn/doc/api_python/en/master/mindspore/mindspore.ops.html#mindspore.ops.HistogramSummary) and MindInsight to record and visualize distribution info of tensors. See our [tutorial](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/visualization_tutorials.html).

- Lineage support Custom information
- GPU support
- Model and dataset tracking linkage support

### Bugfixes

- Reduce cyclomatic complexity of `list_summary_directories` ([!11](https://gitee.com/mindspore/mindinsight/pulls/11)).
- Fix unsafe functions and duplication files and redundant codes ([!14](https://gitee.com/mindspore/mindinsight/pulls/14)).
- Fix sha256 checksum missing bug ([!24](https://gitee.com/mindspore/mindinsight/pulls/24)).
- Fix graph bug when node name is empty ([!34](https://gitee.com/mindspore/mindinsight/pulls/34)).
- Fix start/stop command error code incorrect ([!44](https://gitee.com/mindspore/mindinsight/pulls/44)).

### Thanks to our Contributors

Thanks goes to these wonderful people:

Ye Huang, Weifeng Huang, Zhenzhong Kou, Pengting Luo, Hongzhang Li, Yongxiong Liang, Gongchang Ou, Hui Pan, Luyu Qiu, Junyan Qin, Kai Wen, Weining Wang, Yifan Xia, Yunshu Zhang, Ting Zhao

Contributions of any kind are welcome!

# MindInsight 0.1.0-alpha

## MindInsight 0.1.0 Release Notes

- Training process observation
- Provides and displays training process information, including computational graphs and training process indicators.

- Training result tracing
- Provides functions of tracing and visualizing model training parameter information, including filtering and sorting of training data, model accuracy and training hyperparameters.
* Training result tracing
* Provides functions of tracing and visualizing model training parameter information, including filtering and sorting of training data, model accuracy and training hyperparameters.

+ 0
- 898
Third_Party_Open_Source_Software_Notice View File

@@ -1,898 +0,0 @@
OPEN SOURCE SOFTWARE NOTICE
Please note we provide an open source software notice along with this product and/or this product firmware (in the following just “this product”). The open source software licenses are granted by the respective right holders. And the open source licenses prevail all other license information with regard to the respective open source software contained in the product, including but not limited to End User Software Licensing Agreement. This notice is provided on behalf of Huawei Technologies Co. Ltd. and any of its local subsidiaries which may have provided this product to you in your local country.
Warranty Disclaimer
THE OPEN SOURCE SOFTWARE IN THIS PRODUCT IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS.
Copyright Notice and License Texts
Software: axios v0.18.1
Copyright (c) 2014-present Matt Zabriskie
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Software: d3 v5.9.7
Copyright 2010-2017 Mike Bostock
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the author nor the names of contributors may be used to
endorse or promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Software: d3-graphviz v3.0.4
Copyright 2017, Magnus Jacobsson
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the author nor the names of contributors may be used to
endorse or promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Software: vue v2.6.11
The MIT License (MIT)
Copyright (c) 2013-present, Yuxi (Evan) You
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Software: vue-i18n v8.15.0
The MIT License (MIT)
Copyright (c) 2016 kazuya kawaguchi
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Software: vuex v3.1.1
The MIT License (MIT)
Copyright (c) 2015-present Evan You
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Software: element-ui v2.11.1
The MIT License (MIT)
Copyright (c) 2016-present ElemeFE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Software: vue-router v3.1.3
MIT License
Copyright (c) 2013-present Evan You
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Software: echarts v4.7.0
The MIT License (MIT)
Copyright (c) 2016-present GU Yiling & ECOMFE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Software: core-js v3.3.2
Copyright (c) 2014-2020 Denis Pushkarev
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Software: vue-i18n-loader v0.6.1
The MIT License (MIT)
Copyright (c) 2017 kazuya kawaguchi
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Software: babel-eslint v10.0.3
Copyright (c) 2014-2016 Sebastian McKenzie <sebmck@gmail.com>
MIT License
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Software: eslint v6.6.0
Copyright JS Foundation and other contributors, https://js.foundation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Software: eslint-plugin-vue v5.2.3
MIT License
Copyright (c) 2017 Toru Nagashima
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Software: sass-loader v8.0.0
Copyright JS Foundation and other contributors
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Software: dart-sass v1.25.0
Copyright (c) 2016, Google Inc.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Software: vue-cli v4.1.0
The MIT License (MIT)
Copyright (c) 2017-present, Yuxi (Evan) You
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Software: vue-cli-plugin-i18n v0.6.1
The MIT License (MIT)
Copyright (c) 2018 kazuya kawaguchi
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Software: slickGrid v2.4.22
Copyright (c) 2009-2019 Michael Leibman and Ben McIntyre, http://github.com/6pac/slickgrid
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Software: jquery v3.5.0
Copyright JS Foundation and other contributors, https://js.foundation/
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Software: SPIRV-Tools v2020.6
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Software: Vulkan-ValidationLayers sdk-1.2.162
The majority of files in this project use the Apache 2.0 License.
There are a few exceptions and their license can be found in the source.
Any license deviations from Apache 2.0 are "more permissive" licenses.
===========================================================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

+ 35
- 43
build/build.sh View File

@@ -13,38 +13,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

SCRIPT_BASEDIR=$(realpath "$(dirname "$0")")

PROJECT_BASEDIR=$(dirname "$SCRIPT_BASEDIR")
SCRIPT_BASEDIR=$(cd "$(dirname "$0")" || exit; pwd)

rename_wheel() {
cd "$PROJECT_BASEDIR/output" || exit
VERSION="$("$PYTHON" -c 'import platform; print(platform.python_version())')"
VERSION="$1"
PACKAGE_LIST=$(ls mindinsight-*-any.whl) || exit
for PACKAGE_ORIG in $PACKAGE_LIST; do
MINDINSIGHT_VERSION=$(echo "$PACKAGE_ORIG" | awk -F'-' '{print $2}')
PYTHON_VERSION_NUM=$(echo "$VERSION" | awk -F'.' '{print $1$2}')
PYTHON_VERSION_TAG="cp$PYTHON_VERSION_NUM"
PYTHON_ABI_TAG="cp$(python3-config --extension-suffix | awk -F'-' '{print $2}')"
MACHINE_TAG="$(uname -s | tr '[:upper:]' '[:lower:]')_$(uname -m)"
PACKAGE_NEW="mindinsight-$MINDINSIGHT_VERSION-$PYTHON_VERSION_TAG-$PYTHON_ABI_TAG-$MACHINE_TAG.whl"
mv "$PACKAGE_ORIG" "$PACKAGE_NEW"
done
}

write_checksum() {
cd "$PROJECT_BASEDIR/output" || exit
PACKAGE_LIST=$(ls mindinsight-*.whl) || exit
for PACKAGE_NAME in $PACKAGE_LIST; do
sha256sum -b "$PACKAGE_NAME" >"$PACKAGE_NAME.sha256"
for PACKAGE_ORIG in ${PACKAGE_LIST}; do
MINDINSIGHT_VERSION=$(echo "${PACKAGE_ORIG}" | awk -F"-" '{print $2}')
PYTHON_VERSION_NUM=$(echo "${VERSION}" | awk -F"." '{print $1$2}')
PYTHON_VERSION_TAG="cp${PYTHON_VERSION_NUM}"
PYTHON_ABI_TAG="cp${PYTHON_VERSION_NUM}m"
OS_NAME=$(uname | tr '[:upper:]' '[:lower:]')
MACHINE_TAG="${OS_NAME}_$(uname -i)"
PACKAGE_NEW="mindinsight-${MINDINSIGHT_VERSION}-${PYTHON_VERSION_TAG}-${PYTHON_ABI_TAG}-${MACHINE_TAG}.whl"
mv "${PACKAGE_ORIG}" "${PACKAGE_NEW}"
done
}

build_wheel() {
cd "$PROJECT_BASEDIR" || exit
PROJECT_BASEDIR=$(cd "$(dirname "$SCRIPT_BASEDIR")" || exit; pwd)
cd "${PROJECT_BASEDIR}" || exit

if [ $# -gt 0 ]; then
if [ "$1" = "clean" ]; then
@@ -60,41 +48,43 @@ build_wheel() {
echo "start building mindinsight"
clean_files

if command -v python3 > /dev/null; then
PYTHON=python3
elif command -v python > /dev/null; then
PYTHON=python
else
command python3
PYTHON=$(command -v python3 || command -v python)
if [ -z "${PYTHON}" ]; then
echo "Could not find python3 or python command"
exit 1
fi
if ! "$PYTHON" -c 'import sys; assert sys.version_info >= (3, 7)' > /dev/null; then
echo "Python 3.7 or higher is required. You are running $("$PYTHON" -V)"
PYTHON_VERSION=$(${PYTHON} -c "import platform; print(platform.python_version())" | grep '^3.*')
if [ -z "${PYTHON_VERSION}" ]; then
echo "Could not find Python 3"
exit 1
fi

rm -rf output
rm -f output
mkdir output

"$PYTHON" setup.py bdist_wheel
${PYTHON} setup.py bdist_wheel
if [ ! -x "dist" ]; then
echo "Build failed"
exit 1
fi

mv dist output
mv dist/mindinsight-*-any.whl output/

cd output || exit
rename_wheel "${PYTHON_VERSION}"
cd - >/dev/null 2>&1 || exit

rename_wheel
write_checksum
clean_files

echo "Build success, output directory is: $PROJECT_BASEDIR/output"
echo "Build success, output directory is: ${PROJECT_BASEDIR}/output"
}

clean_files() {
cd "$PROJECT_BASEDIR" || exit
rm -rf third_party/build
rm -rf build/lib
rm -rf build/bdist.*
rm -rf mindinsight.egg-info
rm -rf dist
}

show_usage() {
@@ -103,12 +93,13 @@ show_usage() {
echo "usage: build.sh [-h] [clean]"
echo ""
echo "options:"
echo " -h show this help message and exit"
echo " -h show usage info"
echo " clean clean build files"
}

check_opts() {
while getopts ':h' OPT; do
while getopts ':h' OPT
do
case "$OPT" in
h)
show_usage
@@ -124,4 +115,5 @@ check_opts() {

check_opts "$@"

cd "${SCRIPT_BASEDIR}" || exit
build_wheel "$@"

+ 0
- 70
build/scripts/build_vulkan_vision_linux.sh View File

@@ -1,70 +0,0 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

start_dir=$(pwd)
cd "$(dirname "$0")"/../../ecosystem_tools/VulkanVision

if [[ ! -d "SPIRV-Tools" ]]
then
echo "Cloning SPIRV-Tools"
git clone https://github.com/KhronosGroup/SPIRV-Tools --branch v2020.6
cp st-patches/*.patch SPIRV-Tools
cd SPIRV-Tools
# These are the current stable changes and can be updated with new releases
git apply 0001-spirv-opt-Add-auto-inst-passes.patch
rm *.patch
git clone https://github.com/KhronosGroup/SPIRV-Headers.git external/spirv-headers --branch 1.5.4.raytracing.fixed
git clone https://github.com/google/effcee.git external/effcee --branch v2019.1
git clone https://github.com/google/re2.git external/re2 --branch 2020-11-01
cd ..
fi

if [[ ! -d "Vulkan-ValidationLayers" ]]
then
echo "Cloning Vulkan-ValidationLayers"
git clone https://github.com/KhronosGroup/Vulkan-ValidationLayers --branch sdk-1.2.162
cp vv-patches/*.patch Vulkan-ValidationLayers
cd Vulkan-ValidationLayers
# These are the current stable changes and can be updated with new releases
git apply 0001-layers-Added-auto-inst-layers.patch
rm *.patch
cd ..
fi

build_dir=$(pwd)

echo "Building SPIRV-Tools"
cd SPIRV-Tools
mkdir build
cd build
mkdir install
cmake -DCMAKE_BUILD_TYPE=release -DCMAKE_INSTALL_PREFIX=install ..
cmake --build . --target install --config Release -- -j 4
cd $build_dir

echo "Building Vulkan-ValidationLayers"
cd Vulkan-ValidationLayers
mkdir build
cd build
mkdir install
python ../scripts/update_deps.py --config release
cmake -DCMAKE_BUILD_TYPE=release -DCMAKE_INSTALL_PREFIX=install -DSPIRV_TOOLS_INSTALL_DIR=$build_dir/SPIRV-Tools/build/install -C helper.cmake ..
cmake --build . --target install --config Release -- -j 4

echo "Build completed at $build_dir"!

cd $start_dir

+ 0
- 67
build/scripts/build_vulkan_vision_windows.bat View File

@@ -1,67 +0,0 @@
REM Copyright 2021 Huawei Technologies Co., Ltd.
REM
REM Licensed under the Apache License, Version 2.0 (the "License");
REM you may not use this file except in compliance with the License.
REM You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.

@echo off

set start_dir=%cd%
cd %~dp0..\..\ecosystem_tools\VulkanVision

IF NOT EXIST SPIRV-Tools (
echo Cloning SPIRV-Tools
git clone https://github.com/KhronosGroup/SPIRV-Tools --branch v2020.6
copy st-patches\*.patch SPIRV-Tools
cd SPIRV-Tools
REM These are the current stable changes and can be updated with new releases
git apply 0001-spirv-opt-Add-auto-inst-passes.patch
del *.patch
git clone https://github.com/KhronosGroup/SPIRV-Headers.git external\spirv-headers --branch 1.5.4.raytracing.fixed
git clone https://github.com/google/effcee.git external\effcee --branch v2019.1
git clone https://github.com/google/re2.git external\re2 --branch 2020-11-01
cd ..
)

IF NOT EXIST Vulkan-ValidationLayers (
echo Cloning Vulkan-ValidationLayers
git clone https://github.com/KhronosGroup/Vulkan-ValidationLayers --branch sdk-1.2.162
copy vv-patches\*.patch Vulkan-ValidationLayers
cd Vulkan-ValidationLayers
REM These are the current stable changes and can be updated with new releases
git apply 0001-layers-Added-auto-inst-layers.patch
del *.patch
cd ..
)

set build_dir=%cd%

echo Building SPIRV-Tools
cd SPIRV-Tools
mkdir build
cd build
mkdir install
cmake -DCMAKE_BUILD_TYPE=release -DCMAKE_INSTALL_PREFIX=install ..
cmake --build . --target install --config Release
cd %build_dir%

echo Building Vulkan-ValidationLayers
cd Vulkan-ValidationLayers
mkdir build
cd build
mkdir install
python ../scripts/update_deps.py --config release
cmake -DCMAKE_BUILD_TYPE=release -DCMAKE_INSTALL_PREFIX=install -DSPIRV_TOOLS_INSTALL_DIR=%cd%/../../SPIRV-Tools/build/install -C helper.cmake ..
cmake --build . --target install --config Release

echo Build completed at %build_dir%!

cd %start_dir%

+ 52
- 43
build/scripts/crc32.sh View File

@@ -13,66 +13,75 @@
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
SCRIPT_BASEDIR=$(cd "$(dirname "$0")" || exit; pwd)

SCRIPT_BASEDIR=$(realpath "$(dirname "$0")")
THIRD_PARTY_DIR=$(realpath "${SCRIPT_BASEDIR}/../../third_party")
SECUREC_SOURCE_DIR="${THIRD_PARTY_DIR}/securec"


build_crc32() {
if ! command -v cmake > /dev/null; then
command cmake
build_securec() {
CMAKE=$(command -v cmake)
if [ -z "${CMAKE}" ]; then
echo "Could not find cmake command"
exit 1
fi

if ! command -v c++ > /dev/null; then
command c++
fi
cd "${SECUREC_SOURCE_DIR}" || exit
rm -rf build
mkdir build
cd build || exit
${CMAKE} ..
make
cd - >/dev/null 2>&1 || exit
}

if command -v python3 > /dev/null; then
PYTHON=python3
elif command -v python > /dev/null; then
PYTHON=python
else
command python3
build_crc32() {
CPP=$(command -v c++)
if [ -z "${CPP}" ]; then
echo "Could not find c++ command"
exit 1
fi

if ! "$PYTHON" -c 'import sys; assert sys.version_info >= (3, 7)' > /dev/null; then
echo "Python 3.7 or higher is required. You are running $("$PYTHON" -V)"
PYTHON=$(command -v python3 || command -v python)
if [ -z "${PYTHON}" ]; then
echo "Could not find python3 or python command"
exit 1
fi

PYBIND11_INCLUDES="$($PYTHON -m pybind11 --includes)"
if [ ! -n "$PYBIND11_INCLUDES" ]; then
echo "pybind11 is required"
PYTHON_VERSION=$(${PYTHON} -c "import platform; print(platform.python_version())" | grep '^3.*')
if [ -z "${PYTHON_VERSION}" ]; then
echo "Could not find Python 3"
exit 1
fi

BUILDDIR="$(dirname "$SCRIPT_BASEDIR")/build_securec"
[ -d "$BUILDDIR" ] && rm -rf "$BUILDDIR"
mkdir "$BUILDDIR"
cd "$BUILDDIR" || exit

cmake ../..
cmake --build .

MINDINSIGHT_DIR=$(realpath "$SCRIPT_BASEDIR/../../mindinsight")
THIRD_PARTY_DIR=$(realpath "$SCRIPT_BASEDIR/../../third_party")

cd "$MINDINSIGHT_DIR/datavisual/utils" || exit
DATAVISUAL_DIR=$(realpath "${SCRIPT_BASEDIR}/../../mindinsight/datavisual")
CRC32_SOURCE_DIR="${DATAVISUAL_DIR}/utils/crc32"
CRC32_OUTPUT_DIR="${DATAVISUAL_DIR}/utils"
CRC32_SO_FILE="crc32$(python3-config --extension-suffix)"
rm -f "$CRC32_SO_FILE"

SECUREC_LIB_FILE="$BUILDDIR/libsecurec.a"
c++ -O2 -O3 -shared -std=c++11 -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 \
-Wno-maybe-uninitialized -Wno-unused-parameter -Wall -Wl,-z,relro,-z,now,-z,noexecstack \
-I"$MINDINSIGHT_DIR" -I"$THIRD_PARTY_DIR" $PYBIND11_INCLUDES \
-o "$CRC32_SO_FILE" crc32/crc32.cc "$SECUREC_LIB_FILE"

if [ ! -f "$CRC32_SO_FILE" ]; then
echo "$CRC32_SO_FILE file does not exist, build failed"
rm -f "${CRC32_SOURCE_DIR}/${CRC32_SO_FILE}"
rm -f "${CRC32_OUTPUT_DIR}/${CRC32_SO_FILE}"
cd "${CRC32_SOURCE_DIR}" || exit
PYBIND11_INCLUDES=$(${PYTHON} -m pybind11 --includes)
if [ -z "${PYBIND11_INCLUDES}" ]; then
echo "Could not find pybind11 module"
exit 1
fi

[ -d "$BUILDDIR" ] && rm -rf "$BUILDDIR"
PYTHON_INCLUDE=$(echo "${PYBIND11_INCLUDES}" | awk '{print $1}' | sed "s/^-I//g")
PYTHON_HEADERS=$(echo "${PYBIND11_INCLUDES}" | awk '{print $2}' | sed "s/^-I//g")
${CPP} -O2 -O3 -shared -std=c++11 -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 \
-Wno-maybe-uninitialized -Wno-unused-parameter -Wall -Wl,-z,relro,-z,now,-z,noexecstack \
-I"${THIRD_PARTY_DIR}" -I"${DATAVISUAL_DIR}/utils" -I"${PYTHON_INCLUDE}" -I"${PYTHON_HEADERS}" \
-o "${CRC32_SO_FILE}" crc32.cc "${SECUREC_SOURCE_DIR}/build/src/libsecurec.a"

if [ ! -f "${CRC32_SO_FILE}" ]; then
echo "crc so file does not exist, build failed"
exit 1
fi
mv "${CRC32_SO_FILE}" "${CRC32_OUTPUT_DIR}"
}

cd "${SCRIPT_BASEDIR}" || exit
build_securec

cd "${SCRIPT_BASEDIR}" || exit
build_crc32

+ 15
- 13
build/scripts/ui.sh View File

@@ -13,32 +13,34 @@
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

SCRIPT_BASEDIR=$(realpath "$(dirname "$0")")
SCRIPT_BASEDIR=$(cd "$(dirname "$0")" || exit; pwd)

build_ui() {
cd "$(realpath "$SCRIPT_BASEDIR/../../mindinsight/ui")" || exit
if ! command -v npm > /dev/null; then
command npm
NPM=$(command -v npm)
if [ -z "${NPM}" ]; then
echo "Could not find npm command"
exit 1
fi

UI_SOURCE_DIR=$(realpath "${SCRIPT_BASEDIR}/../../mindinsight/ui")

cd "${UI_SOURCE_DIR}" || exit
rm -rf dist

npm config set strict-ssl false
npm config set unsafe-perm true
npm config set user 0
${NPM} config set strict-ssl false
${NPM} config set unsafe-perm true
${NPM} config set user 0

npm install --loglevel=error
npm run build
${NPM} install
${NPM} run build

if [ ! -f "dist/index.html" ]; then
echo "dist does not have file index.html, build failed"
exit 1
fi

cp node_modules/@hpcc-js/wasm/dist/graphvizlib.wasm dist/static/js
rm -rf node_modules
}

cd "${SCRIPT_BASEDIR}" || exit
build_ui

+ 0
- 25
ecosystem_tools/VulkanVision/README.md View File

@@ -1,25 +0,0 @@
# Vulkan Vision

If Vulkan Vision is useful to you, please cite "Vulkan Vision: Ray Tracing Workload Characterization using Automatic Graphics Instrumentation".

Vulkan Vision is released as patches on the Khronos Group [Vulkan-ValidationLayers](https://github.com/KhronosGroup/Vulkan-ValidationLayers) and [SPIRV-Tools](https://github.com/KhronosGroup/SPIRV-Tools) repositories.

To generate a vvision build:

## Windows

```bat
..\..\build\scripts\build_vulkan_vision_windows.bat
```

## Linux

```bash
../../build/scripts/build_vulkan_vision_linux.sh
```

The completed build will be at `mindinsight/ecosystem_tools/VulkanVision/Vulkan-ValidationLayers/build/install`

V-Vision Documentation will be at `mindinsight/ecosystem_tools/VulkanVision/Vulkan-ValidationLayers/docs/auto_instrument.md`

Documentation for enabling and using Vulkan Validation layers can be found [here](https://vulkan.lunarg.com/doc/sdk/1.2.162.0/windows/layer_configuration.html)

+ 0
- 3187
ecosystem_tools/VulkanVision/st-patches/0001-spirv-opt-Add-auto-inst-passes.patch
File diff suppressed because it is too large
View File


+ 0
- 1
ecosystem_tools/VulkanVision/st-patches/vvision-st.diff View File

@@ -1 +0,0 @@
0001-spirv-opt-Add-auto-inst-passes.patch

+ 0
- 3600
ecosystem_tools/VulkanVision/vv-patches/0001-layers-Added-auto-inst-layers.patch
File diff suppressed because it is too large
View File


+ 0
- 1
ecosystem_tools/VulkanVision/vv-patches/vvision-vv.diff View File

@@ -1 +0,0 @@
0001-layers-Added-auto-inst-layers.patch

+ 1
- 1
mindinsight/_version.py View File

@@ -14,4 +14,4 @@
# ============================================================================
"""Mindinsight version module."""

VERSION = '1.2.0'
VERSION = '0.1.0'

+ 0
- 1
mindinsight/backend/__init__.py View File

@@ -12,4 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This module defines the methods associated with the web app."""

+ 5
- 15
mindinsight/backend/application.py View File

@@ -1,4 +1,4 @@
# Copyright 2019-2021 Huawei Technologies Co., Ltd
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -31,7 +31,6 @@ from mindinsight.datavisual.common import error_handler
from mindinsight.datavisual.utils.tools import find_app_package
from mindinsight.datavisual.utils.tools import get_img_mimetype
from mindinsight.utils.exceptions import MindInsightException
from mindinsight.utils.log import setup_logger


def get_security_headers():
@@ -46,7 +45,6 @@ def get_security_headers():
'frame-src': ["'self'"] + domain_white_list,
'frame-ancestors': ["'self'"] + domain_white_list,
'default-src': ["'self'"],
'script-src': ["'self'", "'unsafe-eval'"]
}

headers = {
@@ -72,14 +70,13 @@ class CustomResponse(Response):
"""Define custom response."""
def __init__(self, response=None, **kwargs):
headers = kwargs.get("headers")
security_headers = list(SECURITY_HEADERS)
if isinstance(response, bytes):
mimetype = get_img_mimetype(response)
security_headers.append(('Content-Type', mimetype))
SECURITY_HEADERS.append(('Content-Type', mimetype))
if headers is None:
headers = Headers(security_headers)
headers = Headers(SECURITY_HEADERS)
else:
for header in security_headers:
for header in SECURITY_HEADERS:
headers.add(*header)
kwargs['headers'] = headers
super(CustomResponse, self).__init__(response, **kwargs)
@@ -93,13 +90,10 @@ def _init_app_module(app):
app (Flask): An instance of Flask.
"""
packages = find_app_package()
gunicorn_logger = setup_logger("gunicorn", "error")
for package in packages:
try:
app_module = import_module(package)
gunicorn_logger.info("[%s].init_module starts.", package)
app_module.init_module(app)
gunicorn_logger.info("[%s].init_module ends.", package)
except AttributeError:
logger.debug('[%s].init_module not exists.', package)

@@ -112,13 +106,10 @@ def before_request():

def create_app():
"""Set flask APP config, and start the data manager."""
gunicorn_logger = setup_logger("gunicorn", "error")
gunicorn_logger.info("create_app starts.")
static_url_path = settings.URL_PATH_PREFIX + "/static"
static_url_path = "/static"
static_folder_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.pardir, 'ui', 'dist', 'static'))

app = Flask(__name__, static_url_path=static_url_path, static_folder=static_folder_path)
app.config['JSON_SORT_KEYS'] = False

if settings.ENABLE_CORS:
CORS(app, supports_credentials=True)
@@ -132,7 +123,6 @@ def create_app():
app.response_class = CustomResponse

_init_app_module(app)
gunicorn_logger.info("create_app ends.")

return app



+ 5
- 75
mindinsight/backend/config/gunicorn_conf.py View File

@@ -1,4 +1,4 @@
# Copyright 2019-2021 Huawei Technologies Co., Ltd
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -15,19 +15,11 @@
"""Config file for gunicorn."""

import os
import time
import signal
import sys
import multiprocessing
import threading
from importlib import import_module

import psutil
import gunicorn

from mindinsight.utils.computing_resource_mgr import terminate
from mindinsight.debugger.session_manager import SessionManager


gunicorn.SERVER_SOFTWARE = 'unknown'

@@ -38,12 +30,12 @@ worker_connections = 1000

timeout = 30
graceful_timeout = 30
daemon = False
daemon = True

captureoutput = False
captureoutput = True

# write gunicorn default log to devnull, and using mindinsight logger write gunicorn log to file.
accesslog = os.devnull
# write gunicorn default log to stream, and using mindinsight logger write gunicorn log to file.
accesslog = '-'


def on_starting(server):
@@ -51,65 +43,3 @@ def on_starting(server):
hook_module = import_module('mindinsight.utils.hook')
for hook in hook_module.HookUtils.instance().hooks():
threading.Thread(target=hook.on_startup, args=(server.log,)).start()


# This global variable is to manage the listen process so that we can close the
# process when gunicorn is exiting.
LISTEN_PROCESS = None


def post_worker_init(worker):
"""
Launch a process to listen worker after gunicorn worker is initialized.

Children processes of gunicorn worker should be killed when worker has been killed
because gunicorn master murders this worker for some reasons such as worker timeout.

Args:
worker (ThreadWorker): worker instance.
"""
def murder_worker_children_processes():
signal.signal(
signal.SIGTERM,
lambda signal_num, handler: sys.exit(0))
processes_to_kill = []
# sleep 3 seconds so that all worker children processes have been launched.
time.sleep(3)
process = psutil.Process(worker.pid)
for child in process.children(recursive=True):
if child.pid != os.getpid():
processes_to_kill.append(child)
while True:
if os.getppid() != worker.pid:
# Kill the remaining sub-processed after the worker process died
_, alive = psutil.wait_procs(processes_to_kill, 0.1)
current_worker_pid = os.getppid()
for proc in alive:
worker.log.info("Original worker pid: %d, current worker pid: %d, stop process %d",
worker.pid, current_worker_pid, proc.pid)
try:
proc.send_signal(signal.SIGKILL)
except psutil.NoSuchProcess:
continue
except psutil.Error as ex:
worker.log.error("Stop process %d failed. Detail: %s.", proc.pid, str(ex))
worker.log.info("%d processes have been terminated by listener.", len(alive))
break
time.sleep(1)

listen_process = multiprocessing.Process(target=murder_worker_children_processes,
name="murder_worker_children_processes")
listen_process.start()
global LISTEN_PROCESS
LISTEN_PROCESS = listen_process
worker.log.info("Server pid: %d, start to listening.", worker.ppid)


def worker_int(worker):
"""Terminate child processes when worker is interrupted."""
terminate()
global LISTEN_PROCESS
if LISTEN_PROCESS is not None:
LISTEN_PROCESS.terminate()
SessionManager.get_instance().exit()
worker.log.info("Worker int processed.")

+ 0
- 45
mindinsight/backend/data_manager/__init__.py View File

@@ -1,45 +0,0 @@
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Trigger data manager load."""
import time

from mindinsight.conf import settings
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER
from mindinsight.lineagemgr.cache_item_updater import LineageCacheItemUpdater
from mindinsight.debugger.debugger_folder_analyzer import DebuggerFolderAnalyzer

ANALYZERS = {
"debugger_folder_analyzer": DebuggerFolderAnalyzer()
}


def init_module(app):
"""
Interface to init module.

Args:
app (Flask): An instance of Flask.

"""
# Just to suppress pylint warning about unused arg.
logger.debug("App: %s", type(app))
for analyzer in ANALYZERS.values():
DATA_MANAGER.register_folder_analyzer(analyzer)
DATA_MANAGER.register_brief_cache_item_updater(LineageCacheItemUpdater())
# Let gunicorn load other modules first.
time.sleep(1)

DATA_MANAGER.start_load_data(reload_interval=settings.RELOAD_INTERVAL)

+ 6
- 0
mindinsight/backend/datavisual/__init__.py View File

@@ -18,6 +18,9 @@ from mindinsight.backend.datavisual.static_resource_api import init_module as st
from mindinsight.backend.datavisual.task_manager_api import init_module as task_init_module
from mindinsight.backend.datavisual.train_visual_api import init_module as train_init_module

from mindinsight.conf import settings
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER


def init_module(app):
"""
@@ -30,3 +33,6 @@ def init_module(app):
static_init_module(app)
task_init_module(app)
train_init_module(app)

DATA_MANAGER.start_load_data(reload_interval=int(settings.RELOAD_INTERVAL),
max_threads_count=int(settings.MAX_THREADS_COUNT))

+ 8
- 21
mindinsight/backend/datavisual/static_resource_api.py View File

@@ -17,36 +17,23 @@ import os
import sys

from flask import current_app
from flask import make_response
from flask import redirect
from flask import send_from_directory
from flask import Blueprint

from mindinsight.conf import settings

BLUEPRINT = Blueprint("static_resource", __name__, url_prefix=settings.URL_PATH_PREFIX)
APP_PATH = os.path.realpath(os.path.dirname(sys.argv[0]))
BLUEPRINT = Blueprint("static_resource", __name__)


@BLUEPRINT.route("/", methods=["GET"])
def index():
"""Interface to return index.html."""
app_path = os.path.realpath(os.path.dirname(sys.argv[0]))
index_html_file = os.path.realpath(os.path.join(app_path, current_app.static_folder, os.pardir, 'index.html'))
with open(index_html_file, 'r') as file_pointer:
html_content = file_pointer.read()
return make_response(html_content)

"""Interface to return static index.html."""
return send_from_directory(get_index_resource_dir(), "index.html")

@BLUEPRINT.route("/graphvizlib.wasm", methods=["GET"])
def return_wasm_file():
"""
Interface to redirect graphvizlib.wasm

When accessing the graphvizlib.wasm file in front module via Firefox browser, the file path will change to
"/graphvizlib.wasm" which makes the computed diagram inaccessible. Redirecting the path to correct address can
ensure the computed graph accessible properly.
"""
return redirect(location="static/js/graphvizlib.wasm")
def get_index_resource_dir():
"""Interface to return index.html resource directory."""
return os.path.realpath(os.path.join(APP_PATH, current_app.static_folder, os.pardir))


def init_module(app):


+ 11
- 26
mindinsight/backend/datavisual/task_manager_api.py View File

@@ -25,16 +25,14 @@ from flask import request
from flask import jsonify

from mindinsight.conf import settings
from mindinsight.utils.exceptions import ParamMissError
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
from mindinsight.datavisual.utils.tools import str_to_bool
from mindinsight.datavisual.utils.tools import get_train_id
from mindinsight.datavisual.processors.train_task_manager import TrainTaskManager
from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER


BLUEPRINT = Blueprint("task_manager", __name__, url_prefix=settings.URL_PATH_PREFIX+settings.API_PREFIX)
BLUEPRINT = Blueprint("task_manager", __name__, url_prefix=settings.URL_PREFIX)


@BLUEPRINT.route("/datavisual/single-job", methods=["GET"])
@@ -66,13 +64,17 @@ def query_train_jobs():
"""Query train jobs."""
offset = request.args.get("offset", default=0)
limit = request.args.get("limit", default=10)
train_id = get_train_id(request)

offset = Validation.check_offset(offset=offset)
limit = Validation.check_limit(limit, min_value=1, max_value=SummaryWatcher.MAX_SUMMARY_DIR_COUNT)
summary_watcher = SummaryWatcher()
total, directories = summary_watcher.list_summary_directories_by_pagination(
settings.SUMMARY_BASE_DIR, offset, limit)

processor = TrainTaskManager(DATA_MANAGER)
total, train_jobs = processor.query_train_jobs(offset, limit, train_id)
train_jobs = [{
'train_id': directory['relative_path'],
'relative_path': directory['relative_path'],
'create_time': directory['create_time'].strftime('%Y-%m-%d %H:%M:%S'),
'update_time': directory['update_time'].strftime('%Y-%m-%d %H:%M:%S'),
} for directory in directories]

return jsonify({
'name': os.path.basename(os.path.realpath(settings.SUMMARY_BASE_DIR)),
@@ -81,23 +83,6 @@ def query_train_jobs():
})


@BLUEPRINT.route("/datavisual/train-job-caches", methods=["POST"])
def cache_train_jobs():
""" Cache train jobs."""
data = request.get_json(silent=True)
if data is None:
raise ParamMissError('train_ids')

train_ids = data.get('train_ids')
if train_ids is None:
raise ParamMissError('train_ids')

processor = TrainTaskManager(DATA_MANAGER)
cache_result = processor.cache_train_jobs(train_ids)

return jsonify({'cache_result': cache_result})


def init_module(app):
"""
Init module entry.


+ 4
- 51
mindinsight/backend/datavisual/train_visual_api.py View File

@@ -24,15 +24,13 @@ from flask import jsonify
from mindinsight.conf import settings
from mindinsight.datavisual.utils.tools import get_train_id
from mindinsight.datavisual.utils.tools import if_nan_inf_to_none
from mindinsight.datavisual.processors.histogram_processor import HistogramProcessor
from mindinsight.datavisual.processors.tensor_processor import TensorProcessor
from mindinsight.datavisual.processors.images_processor import ImageProcessor
from mindinsight.datavisual.processors.scalars_processor import ScalarsProcessor
from mindinsight.datavisual.processors.graph_processor import GraphProcessor
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER


BLUEPRINT = Blueprint("train_visual", __name__, url_prefix=settings.URL_PATH_PREFIX+settings.API_PREFIX)
BLUEPRINT = Blueprint("train_visual", __name__, url_prefix=settings.URL_PREFIX)


@BLUEPRINT.route("/datavisual/image/metadata", methods=["GET"])
@@ -80,7 +78,7 @@ def scalar_metadata():
one of which is an object containing items' wall_time, step and value.
"""
tag = request.args.get("tag")
train_id = get_train_id(request)
train_id = request.args.get("train_id")

processor = ScalarsProcessor(DATA_MANAGER)
response = processor.get_metadata_list(train_id, tag)
@@ -103,11 +101,12 @@ def graph_nodes():

"""
name = request.args.get('name', default=None)
node_type = request.args.get('type', default='name_scope')
tag = request.args.get("tag", default=None)
train_id = get_train_id(request)

graph_process = GraphProcessor(train_id, DATA_MANAGER, tag)
response = graph_process.list_nodes(scope=name)
response = graph_process.get_nodes(name=name, node_type=node_type)
return jsonify(response)


@@ -147,52 +146,6 @@ def graph_search_single_node():
return jsonify(resp)


@BLUEPRINT.route("/datavisual/histograms", methods=["GET"])
def histogram():
"""
Interface to obtain histogram data.

Returns:
Response, which contains a JSON object.
"""
tag = request.args.get("tag", default=None)
train_id = get_train_id(request)

processor = HistogramProcessor(DATA_MANAGER)
response = processor.get_histograms(train_id, tag)
return jsonify(response)


@BLUEPRINT.route("/datavisual/scalars", methods=["GET"])
def get_scalars():
"""Get scalar data for given train_ids and tags."""
train_ids = request.args.getlist('train_id')
tags = request.args.getlist('tag')

processor = ScalarsProcessor(DATA_MANAGER)
scalars = processor.get_scalars(train_ids, tags)
return jsonify({'scalars': scalars})


@BLUEPRINT.route("/datavisual/tensors", methods=["GET"])
def get_tensors():
"""
Interface to obtain tensor data.

Returns:
Response, which contains a JSON object.
"""
train_ids = request.args.getlist('train_id')
tags = request.args.getlist('tag')
step = request.args.get("step", default=None)
dims = request.args.get("dims", default=None)
detail = request.args.get("detail", default=None)

processor = TensorProcessor(DATA_MANAGER)
response = processor.get_tensors(train_ids, tags, step, dims, detail)
return jsonify(response)


def init_module(app):
"""
Init module entry.


+ 0
- 26
mindinsight/backend/debugger/__init__.py View File

@@ -1,26 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Module init file."""
from mindinsight.backend.debugger.debugger_api import init_module as init_query_module


def init_module(app):
"""
Init module entry.

Args:
app (Flask): A Flask instance.
"""
init_query_module(app)

+ 0
- 426
mindinsight/backend/debugger/debugger_api.py View File

@@ -1,426 +0,0 @@
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Debugger restful api."""
import json
from urllib.parse import unquote

from flask import Blueprint, jsonify, request

from mindinsight.conf import settings
from mindinsight.debugger.session_manager import SessionManager
from mindinsight.utils.exceptions import ParamMissError, ParamValueError

BLUEPRINT = Blueprint("debugger", __name__,
url_prefix=settings.URL_PATH_PREFIX + settings.API_PREFIX)


def _unquote_param(param):
"""
Decode parameter value.

Args:
param (str): Encoded param value.

Returns:
str, decoded param value.
"""
if isinstance(param, str):
try:
param = unquote(param, errors='strict')
except UnicodeDecodeError:
raise ParamValueError('Unquote error with strict mode.')
return param


def _read_post_request(post_request):
"""
Extract the body of post request.

Args:
post_request (object): The post request.

Returns:
dict, the deserialized body of request.
"""
body = post_request.stream.read()
try:
body = json.loads(body if body else "{}")
except Exception:
raise ParamValueError("Json data parse failed.")
return body


def _wrap_reply(func, *args, **kwargs):
"""Serialize reply."""
reply = func(*args, **kwargs)
return jsonify(reply)


@BLUEPRINT.route("/debugger/sessions/<session_id>/poll-data", methods=["GET"])
def poll_data(session_id):
"""
Wait for data to be updated on UI.

Get data from server and display the change on UI.

Returns:
str, the updated data.

Examples:
>>> Get http://xxxx/v1/mindinsight/debugger/sessions/xxxx/poll-data?pos=xx
"""
pos = request.args.get('pos')

reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).poll_data, pos)

return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/search", methods=["GET"])
def search(session_id):
"""
Search nodes in specified watchpoint.

Returns:
str, the required data.

Examples:
>>> Get http://xxxx/v1/mindinsight/debugger/sessions/xxxx/search?name=mock_name&watch_point_id=1
"""
name = request.args.get('name')
graph_name = request.args.get('graph_name')
watch_point_id = int(request.args.get('watch_point_id', 0))
node_category = request.args.get('node_category')
rank_id = int(request.args.get('rank_id', 0))
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).search,
{'name': name,
'graph_name': graph_name,
'watch_point_id': watch_point_id,
'node_category': node_category,
'rank_id': rank_id})

return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/tensor-comparisons", methods=["GET"])
def tensor_comparisons(session_id):
"""
Get tensor comparisons.

Returns:
str, the required data.

Examples:
>>> Get http://xxxx/v1/mindinsight/debugger/sessions/xxxx/tensor-comparisons
"""
name = request.args.get('name')
detail = request.args.get('detail', 'data')
shape = _unquote_param(request.args.get('shape'))
tolerance = request.args.get('tolerance', '0')
rank_id = int(request.args.get('rank_id', 0))
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).tensor_comparisons, name, shape, detail,
tolerance, rank_id)

return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/retrieve", methods=["POST"])
def retrieve(session_id):
"""
Retrieve data according to mode and params.

Returns:
str, the required data.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/sessions/xxxx/retrieve
"""
body = _read_post_request(request)
mode = body.get('mode')
params = body.get('params')
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).retrieve, mode, params)
return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/tensor-history", methods=["POST"])
def retrieve_tensor_history(session_id):
"""
Retrieve data according to mode and params.

Returns:
str, the required data.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/sessions/xxxx/tensor-history
"""
body = _read_post_request(request)
name = body.get('name')
graph_name = body.get('graph_name')
rank_id = body.get('rank_id')
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).retrieve_tensor_history, name, graph_name,
rank_id)
return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/tensors", methods=["GET"])
def retrieve_tensor_value(session_id):
"""
Retrieve tensor value according to name and shape.

Returns:
str, the required data.

Examples:
>>> GET http://xxxx/v1/mindinsight/debugger/sessions/xxxx/tensors?name=tensor_name&detail=data&shape=[1,1,:,:]
"""
name = request.args.get('name')
detail = request.args.get('detail')
shape = _unquote_param(request.args.get('shape'))
graph_name = request.args.get('graph_name')
prev = bool(request.args.get('prev') == 'true')
rank_id = int(request.args.get('rank_id', 0))

reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).retrieve_tensor_value, name, detail,
shape, graph_name, prev, rank_id)
return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/create-watchpoint", methods=["POST"])
def create_watchpoint(session_id):
"""
Create watchpoint.

Returns:
str, watchpoint id.

Raises:
MindInsightException: If method fails to be called.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/sessions/xxxx/create-watchpoint
"""
params = _read_post_request(request)
params['watch_condition'] = params.pop('condition', None)
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).create_watchpoint, params)
return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/update-watchpoint", methods=["POST"])
def update_watchpoint(session_id):
"""
Update watchpoint.

Returns:
str, reply message.

Raises:
MindInsightException: If method fails to be called.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/sessions/xxxx/update-watchpoint
"""
params = _read_post_request(request)
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).update_watchpoint, params)
return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/delete-watchpoint", methods=["POST"])
def delete_watchpoint(session_id):
"""
Delete watchpoint.

Returns:
str, reply message.

Raises:
MindInsightException: If method fails to be called.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/sessions/xxxx/delete-watchpoint
"""
body = _read_post_request(request)

watch_point_id = body.get('watch_point_id')

reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).delete_watchpoint, watch_point_id)

return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/control", methods=["POST"])
def control(session_id):
"""
Control request.

Returns:
str, reply message.

Raises:
MindInsightException: If method fails to be called.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/sessions/xxxx/control
"""
params = _read_post_request(request)
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).control, params)

return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/recheck", methods=["POST"])
def recheck(session_id):
"""
Recheck request.

Returns:
str, reply message.

Raises:
MindInsightException: If method fails to be called.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/sessions/xxxx/recheck
"""
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).recheck)

return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/tensor-graphs", methods=["GET"])
def retrieve_tensor_graph(session_id):
"""
Retrieve tensor value according to name and shape.

Returns:
str, the required data.

Examples:
>>> GET http://xxxx/v1/mindinsight/debugger/sessions/xxxx/tensor-graphs?tensor_name=xxx&graph_name=xxx
"""
tensor_name = request.args.get('tensor_name')
graph_name = request.args.get('graph_name')
rank_id = int(request.args.get('rank_id', 0))
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).retrieve_tensor_graph, tensor_name,
graph_name, rank_id)
return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/tensor-hits", methods=["GET"])
def retrieve_tensor_hits(session_id):
"""
Retrieve tensor value according to name and shape.

Returns:
str, the required data.

Examples:
>>> GET http://xxxx/v1/mindinsight/debugger/sessions/xxxx/tensor-hits?tensor_name=xxx&graph_name=xxx
"""
tensor_name = request.args.get('tensor_name')
graph_name = request.args.get('graph_name')
rank_id = int(request.args.get('rank_id', 0))
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).retrieve_tensor_hits, tensor_name,
graph_name, rank_id)
return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/search-watchpoint-hits", methods=["POST"])
def search_watchpoint_hits(session_id):
"""
Search watchpoint hits by group condition.

Returns:
str, the required data.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/sessions/xxxx/search-watchpoint-hits
"""
body = _read_post_request(request)
group_condition = body.get('group_condition')
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).search_watchpoint_hits, group_condition)
return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/condition-collections", methods=["GET"])
def get_condition_collections(session_id):
"""Get condition collections."""
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).get_condition_collections)
return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/set-recommended-watch-points", methods=["POST"])
def set_recommended_watch_points(session_id):
"""Set recommended watch points."""
body = _read_post_request(request)
request_body = body.get('requestBody')
if request_body is None:
raise ParamMissError('requestBody')

set_recommended = request_body.get('set_recommended')
reply = _wrap_reply(SessionManager.get_instance().get_session(session_id).set_recommended_watch_points,
set_recommended)
return reply


@BLUEPRINT.route("/debugger/sessions", methods=["POST"])
def create_session():
"""
Get session id if session exist, else create a session.

Returns:
str, session id.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/sessions
"""
body = _read_post_request(request)
summary_dir = body.get('dump_dir')
session_type = body.get('session_type')
reply = _wrap_reply(SessionManager.get_instance().create_session, session_type, summary_dir)
return reply


@BLUEPRINT.route("/debugger/sessions", methods=["GET"])
def get_train_jobs():
"""
Check the current active sessions.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/sessions
"""
reply = _wrap_reply(SessionManager.get_instance().get_train_jobs)
return reply


@BLUEPRINT.route("/debugger/sessions/<session_id>/delete", methods=["POST"])
def delete_session(session_id):
"""
Delete session by session id.

Examples:
>>> POST http://xxxx/v1/mindinsight/debugger/xxx/delete-session
"""
reply = _wrap_reply(SessionManager.get_instance().delete_session, session_id)
return reply


def init_module(app):
"""
Init module entry.

Args:
app (Flask): The application obj.
"""
app.register_blueprint(BLUEPRINT)

+ 0
- 32
mindinsight/backend/explainer/__init__.py View File

@@ -1,32 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Module init file."""
from mindinsight.conf import settings
from mindinsight.backend.explainer.explainer_api import init_module as init_query_module
from mindinsight.explainer.manager.explain_manager import EXPLAIN_MANAGER


def init_module(app):
"""
Init module entry.

Args:
app (Flask): A Flask instance.

Returns:

"""
init_query_module(app)
EXPLAIN_MANAGER.start_load_data(reload_interval=settings.RELOAD_INTERVAL)

+ 0
- 338
mindinsight/backend/explainer/explainer_api.py View File

@@ -1,338 +0,0 @@
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Explainer restful api."""

import json
import os
import urllib.parse

from flask import Blueprint
from flask import jsonify
from flask import request

from mindinsight.conf import settings
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
from mindinsight.datavisual.utils.tools import get_train_id
from mindinsight.explainer.encapsulator.datafile_encap import DatafileEncap
from mindinsight.explainer.encapsulator.evaluation_encap import EvaluationEncap
from mindinsight.explainer.encapsulator.explain_job_encap import ExplainJobEncap
from mindinsight.explainer.encapsulator.hierarchical_occlusion_encap import HierarchicalOcclusionEncap
from mindinsight.explainer.encapsulator.saliency_encap import SaliencyEncap
from mindinsight.explainer.manager.explain_manager import EXPLAIN_MANAGER
from mindinsight.utils.exceptions import ParamMissError
from mindinsight.utils.exceptions import ParamTypeError
from mindinsight.utils.exceptions import ParamValueError

URL_PREFIX = settings.URL_PATH_PREFIX + settings.API_PREFIX
BLUEPRINT = Blueprint("explainer", __name__, url_prefix=URL_PREFIX)


def _validate_type(param, name, expected_types):
"""
Common function to validate type.

Args:
param (object): Parameter to be validated.
name (str): Name of the parameter.
expected_types (type, tuple[type]): Expected type(s) of param.

Raises:
ParamTypeError: When param is not an instance of expected_types.
"""

if not isinstance(param, expected_types):
raise ParamTypeError(name, expected_types)


def _validate_value(param, name, expected_values):
"""
Common function to validate values of param.

Args:
param (object): Parameter to be validated.
name (str): Name of the parameter.
expected_values (tuple) : Expected values of param.

Raises:
ParamValueError: When param is not in expected_values.
"""

if param not in expected_values:
raise ParamValueError(f"Valid options for {name} are {expected_values}, but got {param}.")


def _image_url_formatter(train_id, image_path, image_type):
"""
Returns image url.

Args:
train_id (str): Id that specifies explain job.
image_path (str): Local path or unique string that specifies the image for query.
image_type (str): Image query type.

Returns:
str, url string for image query.
"""
data = {
"train_id": train_id,
"path": image_path,
"type": image_type
}
return f"{URL_PREFIX}/explainer/image?{urllib.parse.urlencode(data)}"


def _read_post_request(post_request):
"""
Extract the body of post request.

Args:
post_request (object): The post request.

Returns:
dict, the deserialized body of request.
"""
body = post_request.stream.read()
try:
body = json.loads(body if body else "{}")
except json.decoder.JSONDecodeError:
raise ParamValueError("Json data parse failed.")
return body


def _get_query_sample_parameters(data):
"""
Get parameter for query.

Args:
data (dict): Dict that contains request info.

Returns:
dict, key-value pairs to call backend query functions.

Raises:
ParamMissError: If train_id info is not in the request.
ParamTypeError: If certain key is not in the expected type in the request.
ParamValueError: If certain key does not have the expected value in the request.
"""

train_id = data.get("train_id")
if train_id is None:
raise ParamMissError('train_id')

labels = data.get("labels")
if labels is not None:
_validate_type(labels, "labels", list)
if labels:
for item in labels:
_validate_type(item, "element of labels", str)

limit = data.get("limit", 10)
limit = Validation.check_limit(limit, min_value=1, max_value=100)
offset = data.get("offset", 0)
offset = Validation.check_offset(offset=offset)
sorted_name = data.get("sorted_name", "")
_validate_value(sorted_name, "sorted_name", ('', 'confidence', 'uncertainty'))

sorted_type = data.get("sorted_type", "descending")
_validate_value(sorted_type, "sorted_type", ("ascending", "descending"))

prediction_types = data.get("prediction_types")
if prediction_types is not None:
_validate_type(prediction_types, "element of labels", list)
if prediction_types:
for item in prediction_types:
_validate_value(item, "element of prediction_types", ('TP', 'FN', 'FP'))

query_kwarg = {"train_id": train_id,
"labels": labels,
"limit": limit,
"offset": offset,
"sorted_name": sorted_name,
"sorted_type": sorted_type,
"prediction_types": prediction_types}
return query_kwarg


@BLUEPRINT.route("/explainer/explain-jobs", methods=["GET"])
def query_explain_jobs():
"""
Query explain jobs.

Returns:
Response, contains dict that stores base directory, total number of jobs and their detailed job metadata.

Raises:
ParamMissError: If train_id info is not in the request.
ParamTypeError: If one of (offset, limit) is not integer in the request.
ParamValueError: If one of (offset, limit) does not have the expected value in the request.
"""
offset = request.args.get("offset", default=0)
limit = request.args.get("limit", default=10)
offset = Validation.check_offset(offset=offset)
limit = Validation.check_limit(limit, min_value=1, max_value=SummaryWatcher.MAX_SUMMARY_DIR_COUNT)

encapsulator = ExplainJobEncap(EXPLAIN_MANAGER)
total, jobs = encapsulator.query_explain_jobs(offset, limit)

return jsonify({
'name': os.path.basename(os.path.realpath(settings.SUMMARY_BASE_DIR)),
'total': total,
'explain_jobs': jobs,
})


@BLUEPRINT.route("/explainer/explain-job", methods=["GET"])
def query_explain_job():
"""
Query explain job meta-data.

Returns:
Response, contains dict that stores metadata of the requested job.

Raises:
ParamMissError: If train_id info is not in the request.
"""
train_id = get_train_id(request)
if train_id is None:
raise ParamMissError("train_id")
encapsulator = ExplainJobEncap(EXPLAIN_MANAGER)
metadata = encapsulator.query_meta(train_id)
return jsonify(metadata)


@BLUEPRINT.route("/explainer/saliency", methods=["POST"])
def query_saliency():
"""
Query saliency map related results.

Returns:
Response, contains dict that stores number of samples and the detailed sample info.

Raises:
ParamTypeError: If certain key is not in the expected type in the request.
ParamValueError: If certain key does not have the expected value in the request.
"""
data = _read_post_request(request)
query_kwarg = _get_query_sample_parameters(data)
explainers = data.get("explainers")
if explainers is not None and not isinstance(explainers, list):
raise ParamTypeError("explainers", (list, None))
if explainers:
for item in explainers:
if not isinstance(item, str):
raise ParamTypeError("element of explainers", str)

query_kwarg["explainers"] = explainers

encapsulator = SaliencyEncap(
_image_url_formatter,
EXPLAIN_MANAGER)
count, samples = encapsulator.query_saliency_maps(**query_kwarg)

return jsonify({
"count": count,
"samples": samples
})


@BLUEPRINT.route("/explainer/hoc", methods=["POST"])
def query_hoc():
"""
Query hierarchical occlusion related results.

Returns:
Response, contains dict that stores number of samples and the detailed sample info.

Raises:
ParamTypeError: If certain key is not in the expected type in the request.
ParamValueError: If certain key does not have the expected value in the request.
"""
data = _read_post_request(request)

query_kwargs = _get_query_sample_parameters(data)

filter_empty = data.get("drop_empty", True)
if not isinstance(filter_empty, bool):
raise ParamTypeError("drop_empty", bool)

query_kwargs["drop_empty"] = filter_empty

encapsulator = HierarchicalOcclusionEncap(
_image_url_formatter,
EXPLAIN_MANAGER)
count, samples = encapsulator.query_hierarchical_occlusion(**query_kwargs)

return jsonify({
"count": count,
"samples": samples
})


@BLUEPRINT.route("/explainer/evaluation", methods=["GET"])
def query_evaluation():
"""
Query saliency explainer evaluation scores.

Returns:
Response, contains dict that stores evaluation scores.

Raises:
ParamMissError: If train_id info is not in the request.
"""
train_id = get_train_id(request)
if train_id is None:
raise ParamMissError("train_id")
encapsulator = EvaluationEncap(EXPLAIN_MANAGER)
scores = encapsulator.query_explainer_scores(train_id)
return jsonify({
"explainer_scores": scores,
})


@BLUEPRINT.route("/explainer/image", methods=["GET"])
def query_image():
"""
Query image.

Returns:
bytes, image binary content for UI to demonstrate.
"""
train_id = get_train_id(request)
if train_id is None:
raise ParamMissError("train_id")
image_path = request.args.get("path")
if image_path is None:
raise ParamMissError("path")
image_type = request.args.get("type")
if image_type is None:
raise ParamMissError("type")
if image_type not in ("original", "overlay", "outcome"):
raise ParamValueError(f"type:{image_type}, valid options: 'original' 'overlay' 'outcome'")

encapsulator = DatafileEncap(EXPLAIN_MANAGER)
image = encapsulator.query_image_binary(train_id, image_path, image_type)

return image


def init_module(app):
"""
Init module entry.

Args:
app (flask.app): The application obj.
"""
app.register_blueprint(BLUEPRINT)

+ 94
- 53
mindinsight/backend/lineagemgr/lineage_api.py View File

@@ -14,34 +14,36 @@
# ============================================================================
"""Lineage restful api."""
import json
import os

from flask import Blueprint, jsonify, request

from mindinsight.conf import settings
from mindinsight.datavisual.utils.tools import get_train_id
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER
from mindinsight.lineagemgr.cache_item_updater import update_lineage_object
from mindinsight.lineagemgr.common.validator.validate import validate_train_id
from mindinsight.lineagemgr.model import filter_summary_lineage
from mindinsight.lineagemgr import filter_summary_lineage, get_summary_lineage
from mindinsight.lineagemgr.common.validator.validate import validate_path
from mindinsight.utils.exceptions import MindInsightException, ParamValueError

BLUEPRINT = Blueprint("lineage", __name__, url_prefix=settings.URL_PATH_PREFIX+settings.API_PREFIX)
BLUEPRINT = Blueprint("lineage", __name__, url_prefix=settings.URL_PREFIX.rstrip("/"))


@BLUEPRINT.route("/lineagemgr/lineages", methods=["POST"])
def get_lineage():
@BLUEPRINT.route("/models/model_lineage", methods=["POST"])
def search_model():
"""
Get lineage.
Get model lineage info.

Get model info by summary base dir return a model lineage information list of dict
contains model's all kinds of param and count of summary log.

Returns:
str, the lineage information.
str, the model lineage information.

Raises:
MindInsightException: If method fails to be called.
ParamValueError: If parsing json data search_condition fails.

Examples:
>>> POST http://xxxx/v1/mindinsight/lineagemgr/lineages
>>> POST http://xxxx/v1/mindinsight/models/model_lineage
"""
search_condition = request.stream.read()
try:
@@ -49,55 +51,84 @@ def get_lineage():
except Exception:
raise ParamValueError("Json data parse failed.")

lineage_info = _get_lineage_info(search_condition=search_condition)
model_lineage_info = _get_lineage_info(
lineage_type="model",
search_condition=search_condition
)

return jsonify(lineage_info)
return jsonify(model_lineage_info)


def _get_lineage_info(search_condition):
@BLUEPRINT.route("/datasets/dataset_lineage", methods=["POST"])
def get_datasets_lineage():
"""
Get lineage info for dataset or model.

Args:
search_condition (dict): Search condition.
Get dataset lineage.

Returns:
dict, lineage info.
str, the dataset lineage information.

Raises:
MindInsightException: If method fails to be called.
ParamValueError: If parsing json data search_condition fails.

Examples:
>>> POST http://xxxx/v1/minddata/datasets/dataset_lineage
"""
search_condition = request.stream.read()
try:
lineage_info = filter_summary_lineage(data_manager=DATA_MANAGER, search_condition=search_condition)
search_condition = json.loads(search_condition if search_condition else "{}")
except Exception:
raise ParamValueError("Json data parse failed.")

except MindInsightException as exception:
raise MindInsightException(exception.error, exception.message, http_code=400)
dataset_lineage_info = _get_lineage_info(
lineage_type="dataset",
search_condition=search_condition
)

return lineage_info
return jsonify(dataset_lineage_info)


@BLUEPRINT.route("/lineagemgr/lineages", methods=["PUT"])
def update_lineage():
def _get_lineage_info(lineage_type, search_condition):
"""
Get lineage.
Get lineage info for dataset or model.

Args:
lineage_type (str): Lineage type, 'dataset' or 'model'.
search_condition (dict): Search condition.

Returns:
str, update the lineage information about cache and tag.
dict, lineage info.

Raises:
MindInsightException: If method fails to be called.

Examples:
>>> PUT http://xxxx/v1/mindinsight/lineagemgr/lineages?train_id=./run1
"""
train_id = get_train_id(request)
added_info = request.json
if not isinstance(added_info, dict):
raise ParamValueError("The request body should be a dict.")
if 'lineage_type' in search_condition:
raise ParamValueError("Lineage type does not need to be assigned in a specific interface.")
if lineage_type == 'dataset':
search_condition.update({'lineage_type': 'dataset'})
summary_base_dir = str(settings.SUMMARY_BASE_DIR)
try:
lineage_info = filter_summary_lineage(
summary_base_dir, search_condition)

update_lineage_object(DATA_MANAGER, train_id, added_info)
lineages = lineage_info['object']

return jsonify({"status": "success"})
summary_base_dir = os.path.realpath(summary_base_dir)
length = len(summary_base_dir)

for lineage in lineages:
summary_dir = lineage['summary_dir']
summary_dir = os.path.realpath(summary_dir)
if summary_base_dir == summary_dir:
relative_dir = './'
else:
relative_dir = os.path.join(os.curdir, summary_dir[length+1:])
lineage['summary_dir'] = relative_dir

except MindInsightException as exception:
raise MindInsightException(exception.error, exception.message, http_code=400)

return lineage_info


@BLUEPRINT.route("/datasets/dataset_graph", methods=["GET"])
@@ -116,28 +147,38 @@ def get_dataset_graph():
>>> GET http://xxxx/v1/mindinsight/datasets/dataset_graph?train_id=xxx
"""

train_id = get_train_id(request)
validate_train_id(train_id)
search_condition = {
'summary_dir': {
'in': [train_id]
}
}
result = {}
summary_base_dir = str(settings.SUMMARY_BASE_DIR)
summary_dir = get_train_id(request)
if summary_dir.startswith('/'):
validate_path(summary_dir)
elif summary_dir.startswith('./'):
summary_dir = os.path.join(summary_base_dir, summary_dir[2:])
summary_dir = validate_path(summary_dir)
else:
raise ParamValueError(
"Summary dir should be absolute path or "
"relative path that relate to summary base dir."
)
try:
objects = filter_summary_lineage(data_manager=DATA_MANAGER, search_condition=search_condition).get('object')
dataset_graph = get_summary_lineage(
summary_dir=summary_dir,
keys=['dataset_graph']
)
except MindInsightException as exception:
raise MindInsightException(exception.error, exception.message, http_code=400)

if objects:
lineage_obj = objects[0]
dataset_graph = lineage_obj.get('dataset_graph')

if dataset_graph:
result.update({'dataset_graph': dataset_graph})
result.update({'summary_dir': lineage_obj.get('summary_dir')})

return jsonify(result)
if dataset_graph:
summary_dir_result = dataset_graph.get('summary_dir')
base_dir_len = len(summary_base_dir)
if summary_base_dir == summary_dir_result:
relative_dir = './'
else:
relative_dir = os.path.join(
os.curdir, summary_dir[base_dir_len + 1:]
)
dataset_graph['summary_dir'] = relative_dir

return jsonify(dataset_graph)


def init_module(app):


+ 0
- 26
mindinsight/backend/optimizer/__init__.py View File

@@ -1,26 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Optimizer API module."""
from mindinsight.backend.optimizer.optimizer_api import init_module as init_optimizer_model


def init_module(app):
"""
Init module entry.

Args:
app: Flask. A Flask instance.
"""
init_optimizer_model(app)

+ 0
- 106
mindinsight/backend/optimizer/optimizer_api.py View File

@@ -1,106 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Optimizer API module."""
import json
from flask import Blueprint, jsonify, request

from mindinsight.conf import settings
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER
from mindinsight.lineagemgr.model import get_lineage_table
from mindinsight.optimizer.common.enums import ReasonCode
from mindinsight.optimizer.common.exceptions import SamplesNotEnoughError, CorrelationNanError
from mindinsight.optimizer.utils.importances import calc_hyper_param_importance
from mindinsight.optimizer.utils.utils import calc_histogram
from mindinsight.utils.exceptions import ParamValueError

BLUEPRINT = Blueprint("optimizer", __name__, url_prefix=settings.URL_PATH_PREFIX+settings.API_PREFIX)


@BLUEPRINT.route("/optimizer/targets/search", methods=["POST"])
def get_optimize_targets():
"""Get optimize targets."""
search_condition = request.stream.read()
try:
search_condition = json.loads(search_condition if search_condition else "{}")
except Exception:
raise ParamValueError("Json data parse failed.")

response = _get_optimize_targets(DATA_MANAGER, search_condition)
return jsonify(response)


def _get_optimize_targets(data_manager, search_condition=None):
"""Get optimize targets."""
table = get_lineage_table(data_manager=data_manager, search_condition=search_condition)

target_summaries = []
for target in table.target_names:
hyper_parameters = []
for hyper_param in table.hyper_param_names:
param_info = {"name": hyper_param}
try:
importance = calc_hyper_param_importance(table.dataframe_data, hyper_param, target)
param_info.update({"importance": importance})
except SamplesNotEnoughError:
param_info.update({"importance": 0})
param_info.update({"reason_code": ReasonCode.SAMPLES_NOT_ENOUGH.value})
except CorrelationNanError:
param_info.update({"importance": 0})
param_info.update({"reason_code": ReasonCode.CORRELATION_NAN.value})
hyper_parameters.append(param_info)

# Sort `hyper_parameters` in descending order of `importance` and ascending order of `name`.
# If the automatically collected parameters and user-defined parameters have the same importance,
# the user-defined parameters will be ranked behind.
hyper_parameters.sort(key=lambda hyper_param: (-hyper_param.get("importance"),
hyper_param.get("name").startswith('['),
hyper_param.get("name")))

target_summary = {
"name": target,
"buckets": calc_histogram(table.get_column(target)),
"hyper_parameters": hyper_parameters,
"data": table.get_column_values(target)
}
target_summaries.append(target_summary)

target_summaries.sort(key=lambda summary: summary.get("name"))

hyper_params_metadata = [{
"name": hyper_param,
"data": table.get_column_values(hyper_param)
} for hyper_param in table.hyper_param_names]

result = {
"metadata": {
"train_ids": table.train_ids,
"possible_hyper_parameters": hyper_params_metadata,
"unrecognized_params": table.drop_column_info
},
"targets": target_summaries
}

return result


def init_module(app):
"""
Init module entry.

Args:
app: the application obj.

"""
app.register_blueprint(BLUEPRINT)

+ 0
- 31
mindinsight/backend/profiler/__init__.py View File

@@ -1,31 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
module init file.
"""
from mindinsight.backend.profiler.profile_api import init_module as init_profiler_module
def init_module(app):
"""
Init module entry.
Args:
app: Flask. A Flask instance.
Returns:
"""
init_profiler_module(app)

+ 0
- 713
mindinsight/backend/profiler/profile_api.py View File

@@ -1,713 +0,0 @@
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Profile api.

This module provides the interfaces to profile functions.
"""
import json
import os

from flask import Blueprint
from flask import jsonify
from flask import request
from marshmallow import ValidationError

from mindinsight.conf import settings
from mindinsight.datavisual.utils.tools import get_train_id, get_profiler_dir, to_int, get_device_id
from mindinsight.datavisual.utils.tools import unquote_args
from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory
from mindinsight.profiler.analyser.minddata_analyser import MinddataAnalyser
from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException
from mindinsight.profiler.common.util import analyse_device_list_from_profiler_dir, \
check_train_job_and_profiler_dir
from mindinsight.profiler.common.validator.validate import validate_condition, validate_ui_proc
from mindinsight.profiler.common.validator.validate import validate_minddata_pipeline_condition
from mindinsight.profiler.common.validator.validate_path import \
validate_and_normalize_path
from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_profiler_path
from mindinsight.profiler.proposer.compose_proposer import ComposeProposal
from mindinsight.profiler.common.log import logger
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.backend.application import CustomResponse

BLUEPRINT = Blueprint("profile", __name__, url_prefix=settings.URL_PATH_PREFIX+settings.API_PREFIX)


@BLUEPRINT.route("/profile/ops/search", methods=["POST"])
def get_profile_op_info():
"""
Get operation profiling info.

Returns:
str, the operation profiling information.

Raises:
ParamValueError: If the search condition contains some errors.

Examples:
>>> POST http://xxxx/v1/mindinsight/profile/ops/search
"""
profiler_dir = get_profiler_dir(request)
train_id = get_train_id(request)
if not profiler_dir or not train_id:
raise ParamValueError("No profiler_dir or train_id.")

search_condition = request.stream.read()
try:
search_condition = json.loads(search_condition if search_condition else "{}")
except (json.JSONDecodeError, ValueError):
raise ParamValueError("Json data parse failed.")
validate_condition(search_condition)

device_id = search_condition.get("device_id", "0")
to_int(device_id, 'device_id')
profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
try:
profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
except ValidationError:
raise ParamValueError("Invalid profiler dir")

check_train_job_and_profiler_dir(profiler_dir_abs)

op_type = search_condition.get("op_type")

analyser = AnalyserFactory.instance().get_analyser(
op_type, profiler_dir_abs, device_id
)

op_info = analyser.query(search_condition)
return jsonify(op_info)


@BLUEPRINT.route("/profile/devices", methods=["GET"])
def get_profile_device_list():
"""
Get profile device list.

Returns:
list, the available device list.

Raises:
ParamValueError: If the search condition contains some errors.

Examples:
>>> POST http://xxxx/v1/mindinsight/profile/devices
"""
profiler_dir = get_profiler_dir(request)
train_id = get_train_id(request)
if not profiler_dir or not train_id:
raise ParamValueError("No profiler_dir or train_id.")

profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
try:
profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
except ValidationError:
raise ParamValueError("Invalid profiler dir")

check_train_job_and_profiler_dir(profiler_dir_abs)

device_list, _ = analyse_device_list_from_profiler_dir(profiler_dir_abs)
return jsonify(device_list)


@BLUEPRINT.route("/profile/training-trace/graph", methods=["GET"])
def get_training_trace_graph():
"""
Get training trace info of one step.

Returns:
Response, the training trace info of one step.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/training-trace/graph
"""
summary_dir = request.args.get("dir")
profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
check_train_job_and_profiler_dir(profiler_dir_abs)

graph_type = request.args.get("type", default='0')
graph_type = to_int(graph_type, 'graph_type')
device_id = request.args.get("device_id", default='0')
_ = to_int(device_id, 'device_id')
graph_info = {}
try:
analyser = AnalyserFactory.instance().get_analyser(
'step_trace', profiler_dir_abs, device_id)
except ProfilerFileNotFoundException:
return jsonify(graph_info)

graph_info = analyser.query({
'filter_condition': {
'mode': 'step',
'step_id': graph_type
}})
graph_info['summary'] = analyser.summary
graph_info['point_info'] = analyser.point_info
graph_info['is_heterogeneous'] = False

# In heterogeneous training scene, do not display step trace data.
cpu_op_type_file_name = f"cpu_op_type_info_{device_id}.csv"
if cpu_op_type_file_name in os.listdir(profiler_dir_abs):
graph_info = {'is_heterogeneous': True}

return jsonify(graph_info)


@BLUEPRINT.route("/profile/training-trace/target-time-info", methods=["GET"])
def get_target_time_info():
"""
Get all the time information of the specified column.

Returns:
Response, all the time information of the specified column.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/training-trace/target-time-info
"""
summary_dir = request.args.get("dir")
profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
check_train_job_and_profiler_dir(profiler_dir_abs)

proc_name = request.args.get("type")
validate_ui_proc(proc_name)
device_id = request.args.get("device_id", default='0')
_ = to_int(device_id, 'device_id')

analyser = AnalyserFactory.instance().get_analyser(
'step_trace', profiler_dir_abs, device_id)
target_time_info = analyser.query({
'filter_condition': {
'mode': 'proc',
'proc_name': proc_name
}})
target_time_info['summary'] = analyser.summary
return jsonify(target_time_info)


@BLUEPRINT.route("/profile/queue_info", methods=["GET"])
def get_queue_info():
"""
Get each type queue info.

Returns:
Response, the queue info.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/queue_info
"""
profiler_dir_abs = get_profiler_abs_dir(request)
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = unquote_args(request, "device_id")
to_int(device_id, 'device_id')
queue_type = unquote_args(request, "type")
queue_info = {}

minddata_analyser = AnalyserFactory.instance().get_analyser(
'minddata', profiler_dir_abs, device_id)
if queue_type == "get_next":
queue_info, _ = minddata_analyser.analyse_get_next_info(info_type="queue")
elif queue_type == "device_queue":
queue_info, _ = minddata_analyser.analyse_device_queue_info(info_type="queue")

return jsonify(queue_info)


@BLUEPRINT.route("/profile/minddata_op", methods=["GET"])
def get_time_info():
"""
Get minddata operation info.

Returns:
Response, the minddata operation info.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/minddata_op
"""
profiler_dir_abs = get_profiler_abs_dir(request)
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = unquote_args(request, "device_id")
to_int(device_id, 'device_id')
op_type = unquote_args(request, "type")

time_info = {
'size': 0,
'info': [],
"summary": {"time_summary": {}},
"advise": {}
}
minddata_analyser = AnalyserFactory.instance().get_analyser(
'minddata', profiler_dir_abs, device_id)
if op_type == "get_next":
_, time_info = minddata_analyser.analyse_get_next_info(info_type="time")
elif op_type == "device_queue":
_, time_info = minddata_analyser.analyse_device_queue_info(info_type="time")

return jsonify(time_info)


@BLUEPRINT.route("/profile/process_summary", methods=["GET"])
def get_process_summary():
"""
Get interval process summary.

Returns:
Response, the process summary.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/process_summary
"""
profiler_dir_abs = get_profiler_abs_dir(request)
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = unquote_args(request, "device_id")
to_int(device_id, 'device_id')

minddata_analyser = AnalyserFactory.instance().get_analyser(
'minddata', profiler_dir_abs, device_id)
get_next_queue_info, _ = minddata_analyser.analyse_get_next_info(info_type="queue")
device_queue_info, _ = minddata_analyser.analyse_device_queue_info(info_type="queue")

result = MinddataAnalyser.analyse_queue_summary(get_next_queue_info, device_queue_info)

return jsonify(result)


def get_profiler_abs_dir(requests):
"""
Get interval process summary.

Args:
requests (LocalProxy): The requests.

Returns:
str, the profiler abs dir.
"""
profiler_dir = get_profiler_dir(requests)
train_id = get_train_id(requests)
if not profiler_dir or not train_id:
raise ParamValueError("No profiler_dir or train_id.")

profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
try:
profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
except ValidationError:
raise ParamValueError("Invalid profiler dir")

return profiler_dir_abs


@BLUEPRINT.route("/profile/summary/propose", methods=["GET"])
def get_profile_summary_proposal():
"""
Get summary profiling proposal.

Returns:
str, the summary profiling proposal.

Raises:
ParamValueError: If the parameters contain some errors.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/summary/propose
"""
profiler_dir = get_profiler_dir(request)
train_id = get_train_id(request)
device_id = get_device_id(request)
if not profiler_dir or not train_id:
raise ParamValueError("No profiler_dir or train_id.")
to_int(device_id, 'device_id')

profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
try:
profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
except ValidationError:
raise ParamValueError("Invalid profiler dir")

check_train_job_and_profiler_dir(profiler_dir_abs)

step_trace_condition = {"filter_condition": {"mode": "proc",
"proc_name": "iteration_interval",
"step_id": 0}}
options = {'step_trace': {"iter_interval": step_trace_condition}}

proposal_type_list = ['step_trace', 'minddata', 'minddata_pipeline', 'common']
proposal_obj = ComposeProposal(profiler_dir_abs, device_id, proposal_type_list)
proposal_info = proposal_obj.get_proposal(options)
# Use json.dumps for orderly return
return CustomResponse(json.dumps(proposal_info), mimetype='application/json')


@BLUEPRINT.route("/profile/minddata-pipeline/op-queue", methods=["POST"])
def get_minddata_pipeline_op_queue_info():
"""
Get minddata pipeline operator info and queue info.

Returns:
str, the operation information and queue information.

Raises:
ParamValueError: If the search condition contains some errors.

Examples:
>>> POST http://xxxx/v1/mindinsight/profile/minddata-pipeline/op-queue
"""
profiler_dir = get_profiler_dir(request)
train_id = get_train_id(request)
if not profiler_dir or not train_id:
raise ParamValueError("No profiler_dir or train_id.")

profiler_dir_abs = os.path.join(
settings.SUMMARY_BASE_DIR, train_id, profiler_dir
)
try:
profiler_dir_abs = validate_and_normalize_path(
profiler_dir_abs, "profiler"
)
except ValidationError:
raise ParamValueError("Invalid profiler dir.")

check_train_job_and_profiler_dir(profiler_dir_abs)
condition = request.stream.read()
try:
condition = json.loads(condition) if condition else {}
except Exception:
raise ParamValueError("Json data parse failed.")
validate_minddata_pipeline_condition(condition)

device_id = condition.get("device_id", "0")
to_int(device_id, 'device_id')
analyser = AnalyserFactory.instance().get_analyser(
'minddata_pipeline', profiler_dir_abs, device_id
)
op_info = analyser.query(condition)
return jsonify(op_info)


@BLUEPRINT.route("/profile/minddata-pipeline/queue", methods=["GET"])
def get_minddata_pipeline_queue_info():
"""
Get the special minddata pipeline queue info.

Returns:
str, the queue information.

Raises:
ParamValueError: If the search condition contains some errors.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/minddata-pipeline/queue
"""
profiler_dir = get_profiler_dir(request)
train_id = get_train_id(request)
if not profiler_dir or not train_id:
raise ParamValueError("No profiler_dir or train_id.")

profiler_dir_abs = os.path.join(
settings.SUMMARY_BASE_DIR, train_id, profiler_dir
)
try:
profiler_dir_abs = validate_and_normalize_path(
profiler_dir_abs, "profiler"
)
except ValidationError:
raise ParamValueError("Invalid profiler dir.")

check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = request.args.get('device_id', default='0')
to_int(device_id, 'device_id')
op_id = request.args.get('op_id', type=int)
if op_id is None:
raise ParamValueError("Invalid operator id or operator id does not exist.")

analyser = AnalyserFactory.instance().get_analyser(
'minddata_pipeline', profiler_dir_abs, device_id
)
op_queue_info = analyser.get_op_and_parent_op_info(op_id)
return jsonify(op_queue_info)


@BLUEPRINT.route("/profile/timeline-summary", methods=["GET"])
def get_timeline_summary():
"""
Get timeline summary info.

Returns:
Response, the timeline summary info.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/timeline-summary
"""
summary_dir = request.args.get("dir")
profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = request.args.get("device_id", default='0')
_ = to_int(device_id, 'device_id')
device_type = request.args.get("device_type", default='ascend')
if device_type not in ['gpu', 'ascend']:
logger.info("Invalid device_type, device_type should be gpu or ascend.")
raise ParamValueError("Invalid device_type.")

analyser = AnalyserFactory.instance().get_analyser(
'timeline', profiler_dir_abs, device_id)
summary = analyser.get_timeline_summary(device_type)

return summary


@BLUEPRINT.route("/profile/timeline", methods=["GET"])
def get_timeline_detail():
"""
Get timeline detail.

Returns:
Response, the detail information of timeline.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/timeline
"""
summary_dir = request.args.get("dir")
profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = request.args.get("device_id", default='0')
_ = to_int(device_id, 'device_id')
device_type = request.args.get("device_type", default='ascend')
scope_name_num = request.args.get("scope_name_num", default='0')
if device_type not in ['gpu', 'ascend']:
logger.info("Invalid device_type, device_type should be gpu or ascend.")
raise ParamValueError("Invalid device_type.")

analyser = AnalyserFactory.instance().get_analyser(
'timeline', profiler_dir_abs, device_id)
timeline = analyser.get_display_timeline(device_type, scope_name_num)

return jsonify(timeline)


@BLUEPRINT.route("/profile/memory-summary", methods=["GET"])
def get_memory_usage_summary():
"""
Get memory usage summary info.

Returns:
Response, the memory usage summary info.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/memory-summary
"""
summary_dir = request.args.get("dir")
profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = request.args.get("device_id", default='0')
to_int(device_id, 'device_id')
device_type = request.args.get("device_type", default='ascend')
if device_type not in ['ascend']:
logger.info("Invalid device_type, Memory Usage only supports Ascend for now.")
raise ParamValueError("Invalid device_type.")

analyser = AnalyserFactory.instance().get_analyser(
'memory_usage', profiler_dir_abs, device_id)
summary = analyser.get_memory_usage_summary(device_type)

return summary


@BLUEPRINT.route("/profile/memory-graphics", methods=["GET"])
def get_memory_usage_graphics():
"""
Get graphic representation of memory usage.

Returns:
Response, the graphic representation of memory usage.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/memory-graphics
"""
summary_dir = request.args.get("dir")
profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = request.args.get("device_id", default='0')
to_int(device_id, 'device_id')
device_type = request.args.get("device_type", default='ascend')
if device_type not in ['ascend']:
logger.info("Invalid device_type, Memory Usage only supports Ascend for now.")
raise ParamValueError("Invalid device_type.")

analyser = AnalyserFactory.instance().get_analyser(
'memory_usage', profiler_dir_abs, device_id)
graphics = analyser.get_memory_usage_graphics(device_type)

return graphics


@BLUEPRINT.route("/profile/memory-breakdowns", methods=["GET"])
def get_memory_usage_breakdowns():
"""
Get memory breakdowns of each node.

Returns:
Response, the memory breakdowns for each node.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/memory-breakdowns
"""
summary_dir = request.args.get("dir")
profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = request.args.get("device_id", default='0')
to_int(device_id, 'device_id')
device_type = request.args.get("device_type", default='ascend')
graph_id = request.args.get("graph_id", default='0')
node_id = request.args.get("node_id", default='0')
node_id = to_int(node_id, 'node_id')
if device_type not in ['ascend']:
logger.error("Invalid device_type, Memory Usage only supports Ascend for now.")
raise ParamValueError("Invalid device_type.")

analyser = AnalyserFactory.instance().get_analyser(
'memory_usage', profiler_dir_abs, device_id)
breakdowns = analyser.get_memory_usage_breakdowns(device_type, graph_id, node_id)

return breakdowns


@BLUEPRINT.route("/profile/minddata-cpu-utilization-summary", methods=["POST"])
def get_minddata_cpu_utilization_info():
"""
Get minddata cpu utilization info.

Returns:
str, the minddata cpu utilization info.

Raises:
ParamValueError: If the search condition contains some errors.

Examples:
>>>POST http://xxx/v1/mindinsight/profile/minddata-cpu-utilization-summary
"""
profiler_dir = get_profiler_dir(request)
train_id = get_train_id(request)
if not profiler_dir or not train_id:
raise ParamValueError("No profiler_dir or train_id.")

profiler_dir_abs = os.path.join(
settings.SUMMARY_BASE_DIR, train_id, profiler_dir
)

try:
profiler_dir_abs = validate_and_normalize_path(
profiler_dir_abs, "profiler"
)
except ValidationError:
raise ParamValueError("Invalid profiler dir.")

check_train_job_and_profiler_dir(profiler_dir_abs)
condition = request.stream.read()
try:
condition = json.loads(condition) if condition else {}
except (json.JSONDecodeError, ValueError):
raise ParamValueError("Json data parse failed.")

device_id = condition.get("device_id", "0")
to_int(device_id, 'device_id')
analyser = AnalyserFactory.instance().get_analyser(
'minddata_cpu_utilization', profiler_dir_abs, device_id
)
cpu_utilization = analyser.query(condition)
return jsonify(cpu_utilization)


@BLUEPRINT.route("/profile/cluster-step-trace-summary", methods=["POST"])
def get_cluster_step_trace_info():
"""
Get cluster step trace info.

Returns:
str, the cluster step trace info.

Raises:
ParamValueError: If the search condition contains some errors.

Examples:
>>>POST http://xxx/v1/mindinsight/profile/cluster-step-trace-summary
"""
train_id = get_train_id(request)
cluster_profiler_dir = os.path.join(settings.SUMMARY_BASE_DIR, train_id)
try:
cluster_profiler_dir = validate_and_normalize_path(cluster_profiler_dir, 'cluster_profiler')
except ValidationError:
raise ParamValueError('Invalid cluster_profiler dir')

condition = request.stream.read()
try:
condition = json.loads(condition) if condition else {}
except (json.JSONDecodeError, ValueError):
raise ParamValueError("Json data parse failed.")

device_id = condition.get("device_id", "0")
to_int(device_id, 'device_id')

analyser = AnalyserFactory.instance().get_analyser(
'cluster_step_trace', cluster_profiler_dir, device_id
)
step_trace_info = analyser.query(condition)
return jsonify(step_trace_info)


@BLUEPRINT.route("/profile/cluster-peak-memory", methods=["GET"])
def get_cluster_peak_memory():
"""
Get cluster peak memory.

Returns:
str, the cluster peak memory.

Raises:
ParamValueError: If the cluster profiler dir is invalid.

Examples:
>>>GET http://xxx/v1/mindinsight/profile/cluster-peak-memory
"""
train_id = get_train_id(request)
if not train_id:
raise ParamValueError('No train id.')
cluster_profiler_dir = os.path.join(settings.SUMMARY_BASE_DIR, train_id)
cluster_profiler_dir = validate_and_normalize_path(cluster_profiler_dir, 'cluster_profiler')
check_train_job_and_profiler_dir(cluster_profiler_dir)

analyser = AnalyserFactory.instance().get_analyser(
'cluster_memory', cluster_profiler_dir
)
peak_mem = analyser.get_peak_memory()
return jsonify(peak_mem)


def init_module(app):
"""
Init module entry.

Args:
app: the application obj.

"""
app.register_blueprint(BLUEPRINT)

+ 57
- 92
mindinsight/backend/run.py View File

@@ -14,19 +14,20 @@
# ============================================================================
"""Web service entrance."""
import os
import re
import shlex
import stat
import re
import subprocess
import sys
import time
from enum import Enum, unique
import shlex

from gunicorn.glogging import Logger

from mindinsight.backend.config import gunicorn_conf
from mindinsight.backend.config import WEB_CONFIG_DIR
from mindinsight.conf import settings
from mindinsight.utils.log import setup_logger


MINDBOARD_APP_MODULE = "mindinsight.backend.application:APP"
GUNICORN_LOGGER = "mindinsight.backend.run.GunicornLogger"

@@ -34,16 +35,6 @@ _MIN_PORT = 1
_MAX_PORT = 65535


@unique
class ServerStateEnum(Enum):
"""
The service startup status are as follows: "unknown", "failed" and "success"
"""
UNKNOWN = "unknown"
FAILED = "failed"
SUCCESS = "success"


def _get_file_size(file_path):
"""
Get the file size.
@@ -78,23 +69,19 @@ def _is_match_one(sub_string_list, src_string):
return False


def _check_stat_from_log(pid, log_info):
def _check_stat_from_log(log_info):
"""
Determine the service startup status based on the log information.

Args:
pid (int): The gunicorn process ID.
log_info (str): The output log of service startup.

Returns:
str, the state value that is one of the follows: "unknown", "failed" and "success".
"""
server_state = ServerStateEnum.UNKNOWN.value

# should be synchronized to startup log in gunicorn post_worker_init hook
# refer to mindinsight/backend/config/gunicorn_conf.py
match_success_info = "Server pid: %d, start to listening." % pid

server_state = "unknown"
match_success_info = "Listening at: http://%s:%d" % \
(settings.HOST, int(settings.PORT))
common_failed_info_list = [
"[ERROR] Retrying in 1 second",
"[INFO] Reason: App failed to load",
@@ -106,10 +93,10 @@ def _check_stat_from_log(pid, log_info):
# matched failed output log by fuzzy match
if re.search(re_pattern, log_info) or \
_is_match_one(common_failed_info_list, log_info):
server_state = ServerStateEnum.FAILED.value
server_state = "failed"

if match_success_info in log_info:
server_state = ServerStateEnum.SUCCESS.value
server_state = "success"

return server_state

@@ -122,24 +109,23 @@ def _get_error_log_path():
str, the path of error log.
"""

path = os.path.join(settings.WORKSPACE, 'log/gunicorn/error.{}.log'.format(settings.PORT))
path = os.path.join(settings.WORKSPACE, 'log/gunicorn/error.log')
errorlog_abspath = os.path.realpath(path)
return errorlog_abspath


def _get_access_log_path():
"""Get gunicorn access log path."""
access_log_path = os.path.join(settings.WORKSPACE, 'log/gunicorn/access.{}.log'.format(settings.PORT))
access_log_path = os.path.join(settings.WORKSPACE, 'log/gunicorn/access.log')
access_log_path = os.path.realpath(access_log_path)
return access_log_path


def _check_state_from_log(pid, log_abspath, start_pos=0):
def _check_state_from_log(log_abspath, start_pos=0):
"""
Check the service startup status based on the log file.

Args:
pid (int): The gunicorn process ID.
log_abspath (str): Absolute path of the log file.
start_pos (int): Offset position of the log file.

@@ -149,24 +135,29 @@ def _check_state_from_log(pid, log_abspath, start_pos=0):
The value of the "prompt_message" key is a list of prompt messages.

"""
state_result = {"state": ServerStateEnum.UNKNOWN.value, "prompt_message": []}
server_is_start = False
state_result = {"state": "unknown", "prompt_message": []}
prompt_messages = []
match_start_log = "Starting gunicorn"
with open(log_abspath) as f_log:
f_log.seek(start_pos)
for line in f_log.readlines():
log_result = _check_stat_from_log(pid, line)
# ignore "unknown" result
if log_result != ServerStateEnum.UNKNOWN.value:
state_result["state"] = log_result

if log_result == ServerStateEnum.FAILED.value:
prompt_messages.append(line.strip())
prompt_messages.append(
"more failed details in log: %s" % log_abspath)
break
if log_result == ServerStateEnum.UNKNOWN.value:
prompt_messages.append(
"more details in log: %s" % log_abspath)
if match_start_log in line:
if server_is_start:
break
server_is_start = True
continue
if server_is_start:
log_result = _check_stat_from_log(line)
# ignore "unknown" result
if log_result != "unknown":
state_result["state"] = log_result

if log_result == "failed":
prompt_messages.append(line.strip())
prompt_messages.append(
"more failed details in log: %s" % log_abspath)
break
state_result["prompt_message"].append(
"service start state: %s" % state_result["state"])
for prompt_message in prompt_messages:
@@ -175,42 +166,35 @@ def _check_state_from_log(pid, log_abspath, start_pos=0):
return state_result


def _check_server_start_stat(pid, log_abspath, log_pos):
def _check_server_start_stat(log_abspath, start_pos=None):
"""
Checking the Server Startup Status.

Args:
pid (int): The gunicorn process ID.
log_abspath (str): The log file path.
start_pos (int): The log file start position.

Returns:
dict, an dict object that contains the state and prompt_message fields.
The state values are as follows: "unknown", "failed" and "success".

"""
state_result = {"state": ServerStateEnum.UNKNOWN.value, "prompt_message": []}
state_result = {"state": "unknown", "prompt_message": []}
# return unknown when not config gunicorn error log file
if not log_abspath:
return state_result

# sleep 1 second for gunicorn master to be ready
time.sleep(1)

log_pos = _get_file_size(log_abspath) if start_pos is None else start_pos
try_cnt = 0
try_cnt_max = 2

while try_cnt < try_cnt_max:
time.sleep(1)
try_cnt += 1
file_size = _get_file_size(log_abspath)
if file_size > log_pos:
state_result.update(_check_state_from_log(pid, log_abspath, log_pos))
time.sleep(1)
if _get_file_size(log_abspath) > log_pos:
state_result.update(_check_state_from_log(log_abspath, log_pos))
break

if not state_result['prompt_message']:
state_result["prompt_message"].append(
"service start state: %s" % state_result["state"])

return state_result


@@ -218,25 +202,19 @@ class GunicornLogger(Logger):
"""Rewrite gunicorn default logger."""

def __init__(self, cfg):
self.cfg = cfg
self.access_log = setup_logger('gunicorn', 'access', formatter='%(message)s')
self.error_log = setup_logger('gunicorn', 'error', formatter=self.error_fmt)
self.access_log = setup_logger('gunicorn', 'access')
self.error_log = setup_logger('gunicorn', 'error')
super(GunicornLogger, self).__init__(cfg)
access_log_path = _get_access_log_path()
error_log_path = _get_error_log_path()
os.chmod(access_log_path, stat.S_IREAD | stat.S_IWRITE)
os.chmod(error_log_path, stat.S_IREAD | stat.S_IWRITE)
super(GunicornLogger, self).__init__(cfg)

def now(self):
"""Get log format."""
return time.strftime('[%Y-%m-%d-%H:%M:%S %z]')

def setup(self, cfg):
"""Rewrite the setup method of Logger, and we don't need to do anything"""


def start():
"""Start web service."""
errorlog_abspath = _get_error_log_path()

gunicorn_conf_file = os.path.join(WEB_CONFIG_DIR, "gunicorn_conf.py")
cmd = "gunicorn " \
"-b {host}:{port} {app_module} " \
@@ -251,40 +229,27 @@ def start():
log_format=settings.GUNICORN_ACCESS_FORMAT
)

error_log_abspath = _get_error_log_path()
log_size = _get_file_size(errorlog_abspath)

# Init the logger file
setup_logger('gunicorn', 'error')
log_handler = open(error_log_abspath, 'a+')
pre_log_pos = _get_file_size(error_log_abspath)
# start server
process = subprocess.Popen(
shlex.split(cmd),
shell=False,
# Change stdout to DEVNULL to prevent broken pipe error when creating new processes.
stdin=subprocess.DEVNULL,
stdout=log_handler,
stderr=subprocess.STDOUT
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
_, stderr = process.communicate()
if stderr:
print(stderr.decode())

# sleep 1 second for gunicorn application to load modules
time.sleep(1)

# check if gunicorn application is running
console = setup_logger('mindinsight', 'console', console=True, logfile=False, formatter='%(message)s')
if process.poll() is not None:
console.error("Start MindInsight failed. See log for details, log path: %s.", error_log_abspath)
sys.exit(1)
else:
state_result = _check_server_start_stat(process.pid, error_log_abspath, pre_log_pos)
# wait command success to end when gunicorn running in daemon.
if gunicorn_conf.daemon and process.wait() == 0:
state_result = _check_server_start_stat(errorlog_abspath, log_size)
# print gunicorn start state to stdout
label = 'Web address:'
format_args = label, settings.HOST, str(settings.PORT), settings.URL_PATH_PREFIX
console.info('%s http://%s:%s%s', *format_args)
print('Web address: http://{}:{}'.format(settings.HOST, settings.PORT))
for line in state_result["prompt_message"]:
console.info(line)
if state_result["state"] == ServerStateEnum.FAILED.value:
sys.exit(1)
print(line)


if __name__ == '__main__':


+ 0
- 26
mindinsight/backend/ui_config/__init__.py View File

@@ -1,26 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""UI config module init file."""
from mindinsight.backend.ui_config.ui_config_api import init_module as init_config_module


def init_module(app):
"""
Init module entry.

Args:
app (Flask): A Flask instance.
"""
init_config_module(app)

+ 0
- 40
mindinsight/backend/ui_config/ui_config_api.py View File

@@ -1,40 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""UI config restful api."""
from flask import Blueprint, jsonify

from mindinsight.conf import settings

BLUEPRINT = Blueprint("ui_config", __name__,
url_prefix=settings.URL_PATH_PREFIX + settings.API_PREFIX)


@BLUEPRINT.route("/ui-config", methods=["GET"])
def get_config():
"""Get config of UI."""
reply = {}
enable_debugger = settings.ENABLE_DEBUGGER if hasattr(settings, 'ENABLE_DEBUGGER') else False
reply["enable_debugger"] = enable_debugger
return jsonify(reply)


def init_module(app):
"""
Init module entry.

Args:
app (Flask): The application obj.
"""
app.register_blueprint(BLUEPRINT)

+ 0
- 1
mindinsight/common/__init__.py View File

@@ -12,4 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Common function for mindinsight."""

+ 0
- 1
mindinsight/common/hook/__init__.py View File

@@ -12,4 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Hook module used to define relative hook."""

+ 2
- 2
mindinsight/common/hook/datavisual.py View File

@@ -35,8 +35,8 @@ class ReloadIntervalAction(argparse.Action):
option_string (str): Option string for specific argument name.
"""
reload_interval = values
if reload_interval < 0 or reload_interval > settings.MAX_RELOAD_INTERVAL:
parser.error(f'{option_string} should be greater than or equal to 0 or less than or equal to 300 ')
if reload_interval < 0:
parser.error(f'{option_string} should be greater than or equal to 0')
setattr(namespace, self.dest, reload_interval)


+ 0
- 99
mindinsight/common/hook/debugger.py View File

@@ -1,99 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Debugger hook."""
import argparse
from mindinsight.conf import settings
from mindinsight.utils.hook import BaseHook
def enable_debugger_string(string):
"""Convert str to bool"""
if string.lower() in ('false', '0'):
return False
if string.lower() in ('true', '1'):
return True
raise ValueError
class EnableDebuggerAction(argparse.Action):
"""Enable debugger action class definition."""
def __call__(self, parser, namespace, values, option_string=None):
"""
Inherited __call__ method from argparse.Action.
Args:
parser (ArgumentParser): Passed-in argument parser.
namespace (Namespace): Namespace object to hold arguments.
values (object): Argument values with type depending on argument definition.
option_string (str): Optional string for specific argument name. Default: None.
"""
enable_debugger = values
setattr(namespace, self.dest, enable_debugger)
class PortAction(argparse.Action):
"""Port action class definition."""
MIN_PORT = 1
MAX_PORT = 65535
OPEN_PORT_LIMIT = 1024
def __call__(self, parser, namespace, values, option_string=None):
"""
Inherited __call__ method from argparse.Action.
Args:
parser (ArgumentParser): Passed-in argument parser.
namespace (Namespace): Namespace object to hold arguments.
values (object): Argument values with type depending on argument definition.
option_string (str): Optional string for specific argument name. Default: None.
"""
port = values
if not self.MIN_PORT <= port <= self.MAX_PORT:
parser.error(f'{option_string} should be chosen from {self.MIN_PORT} to {self.MAX_PORT}')
setattr(namespace, self.dest, port)
class Hook(BaseHook):
"""Hook class definition."""
def register_startup_arguments(self, parser):
"""
Hook function to register startup arguments.
Args:
parser (ArgumentParser): Specify parser to which arguments are added.
"""
parser.add_argument(
'--enable-debugger',
type=enable_debugger_string,
action=EnableDebuggerAction,
default=False,
help="""
Enable debugger or not. The value can be True/False/1/0 (case insensitive).
Default is False.""")
parser.add_argument(
'--debugger-port',
type=int,
action=PortAction,
help="""
Debugger port ranging from %s to %s. Default value is %s.
""" % (PortAction.MIN_PORT, PortAction.MAX_PORT, settings.DEBUGGER_PORT))

+ 10
- 0
mindinsight/conf/__init__.py View File

@@ -110,6 +110,16 @@ class Settings:
setattr(self, setting, value)
self._explicit_settings.add(setting)

def config_workspace(self, workspace):
"""
Config workspace value.

Args:
workspace (str): Path of workspace.
"""
setattr(self, 'WORKSPACE', workspace)
self._explicit_settings.add('WORKSPACE')

def is_overridden(self, setting_name):
"""
Check if specified setting is overridden.


+ 4
- 13
mindinsight/conf/constants.py View File

@@ -14,8 +14,6 @@
# ============================================================================
"""Constants module for mindinsight settings."""
import logging
import multiprocessing


####################################
# Global default settings.
@@ -23,7 +21,7 @@ import multiprocessing
LOG_FORMAT = '[%(levelname)s] MI(%(process)d:%(thread)d,%(processName)s):%(asctime)s ' \
'[%(filepath)s:%(lineno)d][%(sub_module)s] %(message)s'

GUNICORN_ACCESS_FORMAT = "'%(t)s %(h)s <%(r)s> %(s)s %(b)s <%(f)s> <%(a)s> %(L)s '"
GUNICORN_ACCESS_FORMAT = "'%(h)s <%(r)s> %(s)s %(b)s <%(f)s> <%(a)s> %(D)s'"

LOG_LEVEL = logging.INFO
# rotating max bytes, default is 50M
@@ -43,25 +41,18 @@ ENABLE_CORS = False

SUPPORT_REQUEST_METHODS = {'POST', 'GET', 'PUT', 'DELETE'}

# api prefix should not end with slash, correct format is /v1/url
API_PREFIX = '/v1/mindinsight'
# url prefix should not end with slash, correct format is /v1/url
URL_PREFIX = '/v1/mindinsight'

####################################
# Datavisual default settings.
####################################
MAX_PROCESSES_COUNT = max(min(int(multiprocessing.cpu_count() * 0.75), 45), 1)
MAX_THREADS_COUNT = 15

MAX_TAG_SIZE_PER_EVENTS_DATA = 300
DEFAULT_STEP_SIZES_PER_TAG = 500

MAX_GRAPH_TAG_SIZE = 10
MAX_TENSOR_TAG_SIZE = 6
MAX_IMAGE_STEP_SIZE_PER_TAG = 10
MAX_RELOAD_INTERVAL = 300
MAX_SCALAR_STEP_SIZE_PER_TAG = 1000
MAX_GRAPH_STEP_SIZE_PER_TAG = 1
MAX_HISTOGRAM_STEP_SIZE_PER_TAG = 50
MAX_TENSOR_STEP_SIZE_PER_TAG = 20
MAX_TENSOR_RESPONSE_DATA_SIZE = 100000

ENABLE_RECOMMENDED_WATCHPOINTS = True

+ 1
- 8
mindinsight/conf/defaults.py View File

@@ -24,16 +24,9 @@ WORKSPACE = os.path.join(os.environ['HOME'], 'mindinsight')
# Web default settings.
####################################
PORT = 8080
URL_PATH_PREFIX = ''

####################################
# Debugger default settings.
####################################
DEBUGGER_PORT = 50051
ENABLE_DEBUGGER = False

####################################
# Datavisual default settings.
####################################
RELOAD_INTERVAL = 3 # Seconds
RELOAD_INTERVAL = 3 # Seconds
SUMMARY_BASE_DIR = os.getcwd()

+ 0
- 1
mindinsight/datavisual/__init__.py View File

@@ -12,4 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Provide data visualization function."""

+ 0
- 1
mindinsight/datavisual/common/__init__.py View File

@@ -12,4 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This module defines the public methods that are relevant to the business."""

+ 2
- 12
mindinsight/datavisual/common/enums.py View File

@@ -14,9 +14,9 @@
# ============================================================================
"""Enums."""

import enum
from enum import Enum

class BaseEnum(enum.Enum):
class BaseEnum(Enum):

@classmethod
def list_members(cls):
@@ -37,13 +37,3 @@ class PluginNameEnum(BaseEnum):
IMAGE = 'image'
SCALAR = 'scalar'
GRAPH = 'graph'
HISTOGRAM = 'histogram'
TENSOR = 'tensor'


@enum.unique
class CacheStatus(enum.Enum):
"""Train job cache status."""
NOT_IN_CACHE = "NOT_IN_CACHE"
CACHING = "CACHING"
CACHED = "CACHED"

+ 4
- 136
mindinsight/datavisual/common/exceptions.py View File

@@ -57,22 +57,13 @@ class SummaryLogPathInvalid(MindInsightException):

class CRCFailedError(MindInsightException):
"""CRC fail, record corrupted."""
def __init__(self, error_detail):
error_msg = 'CRC Failed. Detail: %s' % error_detail
def __init__(self):
error_msg = 'CRC Failed.'
super(CRCFailedError, self).__init__(DataVisualErrors.CRC_FAILED,
error_msg,
http_code=400)


class CRCLengthFailedError(MindInsightException):
"""CRC length fail, record corrupted."""
def __init__(self, error_detail):
error_msg = 'CRC Length Failed. Detail: %s' % error_detail
super(CRCLengthFailedError, self).__init__(DataVisualErrors.CRC_LENGTH_FAILED,
error_msg,
http_code=400)


class SummaryLogIsLoading(MindInsightException):
"""Data is loading."""

@@ -85,131 +76,8 @@ class SummaryLogIsLoading(MindInsightException):

class NodeNotInGraphError(MindInsightException):
"""Can not find node in graph error."""
def __init__(self, node_name, node_type=None):
if node_type is not None:
error_msg = f"Can not find node in graph by the given node name. node name: {node_name}, type: {node_type}."
else:
error_msg = f"Can not find node in graph by the given node name. node name: {node_name}."
super(NodeNotInGraphError, self).__init__(DataVisualErrors.NODE_NOT_IN_GRAPH_ERROR,
error_msg,
http_code=400)


class MaxCountExceededError(MindInsightException):
"""Count is out of limit."""
def __init__(self):
error_msg = "Count is out of limit."
super(MaxCountExceededError, self).__init__(DataVisualErrors.MAX_COUNT_EXCEEDED_ERROR,
error_msg,
http_code=400)


class TrainJobNotExistError(MindInsightException):
"""Can not find the given train job."""
def __init__(self, error_detail=None):
if error_detail is None:
error_msg = f"Train job is not exist."
else:
error_msg = f"Train job is not exist. Detail: {error_detail}"
super(TrainJobNotExistError, self).__init__(DataVisualErrors.TRAIN_JOB_NOT_EXIST,
error_msg,
http_code=400)


class QueryStringContainsNullByteError(MindInsightException):
"""Query string contains null byte error."""
def __init__(self, error_detail):
error_msg = f"Query string contains null byte error. Detail: {error_detail}"
super(QueryStringContainsNullByteError, self).__init__(DataVisualErrors.QUERY_STRING_CONTAINS_NULL_BYTE,
error_msg,
http_code=400)


class PluginNotAvailableError(MindInsightException):
"""The given plugin is not available."""
def __init__(self, error_detail):
error_msg = f"Plugin is not available. Detail: {error_detail}"
super(PluginNotAvailableError, self).__init__(DataVisualErrors.PLUGIN_NOT_AVAILABLE,
error_msg,
http_code=400)


class GraphNotExistError(MindInsightException):
"""Can not found the given graph."""
def __init__(self, error_detail=None):
error_msg = 'Graph is not exist.' if error_detail is None else f'Graph is not exist. Detail: {error_detail}'
super(GraphNotExistError, self).__init__(DataVisualErrors.GRAPH_NOT_EXIST,
error_msg,
http_code=400)


class ImageNotExistError(MindInsightException):
"""Unable to get a image based on a given condition."""
def __init__(self, error_detail):
error_msg = f'Image is not exist. Detail: {error_detail}'
super(ImageNotExistError, self).__init__(DataVisualErrors.IMAGE_NOT_EXIST,
error_msg,
http_code=400)


class ScalarNotExistError(MindInsightException):
"""Unable to get scalar values based on a given condition."""
def __init__(self, error_detail):
error_msg = f'Scalar value is not exist. Detail: {error_detail}'
super(ScalarNotExistError, self).__init__(DataVisualErrors.SCALAR_NOT_EXIST,
error_msg,
http_code=400)


class HistogramNotExistError(MindInsightException):
"""Unable to get histogram values based on a given condition."""
def __init__(self, error_detail):
error_msg = f'Histogram value is not exist. Detail: {error_detail}'
super(HistogramNotExistError, self).__init__(DataVisualErrors.HISTOGRAM_NOT_EXIST,
error_msg,
http_code=400)


class TensorNotExistError(MindInsightException):
"""Unable to get tensor values based on a given condition."""
def __init__(self, error_detail):
error_msg = f'Tensor value is not exist. Detail: {error_detail}'
super(TensorNotExistError, self).__init__(DataVisualErrors.TENSOR_NOT_EXIST,
error_msg,
http_code=400)


class StepTensorDataNotInCacheError(MindInsightException):
"""Tensor data with specific step does not in cache."""
def __init__(self, error_detail):
error_msg = f'Tensor data not in cache. Detail: {error_detail}'
super(StepTensorDataNotInCacheError, self).__init__(DataVisualErrors.STEP_TENSOR_DATA_NOT_IN_CACHE,
error_msg,
http_code=400)


class ResponseDataExceedMaxValueError(MindInsightException):
"""Response data exceed max value based on a given condition."""
def __init__(self, error_detail):
error_msg = f'Response data exceed max value. Detail: {error_detail}'
super(ResponseDataExceedMaxValueError, self).__init__(DataVisualErrors.MAX_RESPONSE_DATA_EXCEEDED_ERROR,
error_msg,
http_code=400)


class TrainJobDetailNotInCacheError(MindInsightException):
"""Detail info of given train job is not in cache."""
def __init__(self, error_detail="no detail provided."):
error_msg = f'Detail info of the given train job is not in cache. Detail: {error_detail}'
super().__init__(DataVisualErrors.TRAIN_JOB_DETAIL_NOT_IN_CACHE,
error_msg,
http_code=400)


class TensorTooLargeError(MindInsightException):
"""The given tensor is too large to shown on UI."""
def __init__(self, error_detail):
error_msg = f'Tensor is too large to show on UI. Detail: {error_detail}'
super(TensorTooLargeError, self).__init__(DataVisualErrors.TENSOR_TOO_LARGE,
error_msg = "Can not find node in graph by given node name."
super(NodeNotInGraphError, self).__init__(DataVisualErrors.NODE_NOT_IN_GRAPH_ERROR,
error_msg,
http_code=400)

+ 0
- 2
mindinsight/datavisual/common/log.py View File

@@ -17,5 +17,3 @@ from mindinsight.utils.log import setup_logger

logger = setup_logger("datavisual", "datavisual")
restful_logger = setup_logger("restful_api", "restful_api")
parse_summary_logger = setup_logger("parse_summary", "parse_summary", console=True,
formatter='[%(levelname)s]%(message)s')

+ 3
- 3
mindinsight/datavisual/common/validation.py View File

@@ -16,7 +16,6 @@
from numbers import Number
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.utils.exceptions import ParamMissError
from mindinsight.datavisual.common.exceptions import PluginNotAvailableError
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.utils.tools import to_int

@@ -95,8 +94,9 @@ class Validation:
plugin_name (str): The plugin name.

Raises:
PluginNotAvailableError: When plugin name is not valid.
ParamValueError: When plugin name is not valid.
"""
plugin_name_list = PluginNameEnum.list_members()
if plugin_name not in plugin_name_list:
raise PluginNotAvailableError(f"'plugin_name' only can be one of {plugin_name_list}")
raise ParamValueError("'plugin_name' only can be one of {}"
"".format(plugin_name_list))

+ 0
- 1
mindinsight/datavisual/data_access/__init__.py View File

@@ -12,4 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Data access module used to define file operation."""

+ 0
- 1
mindinsight/datavisual/data_transform/__init__.py View File

@@ -12,4 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Data transform module used to load data and define relative container and so on."""

+ 4
- 12
mindinsight/datavisual/data_transform/data_loader.py View File

@@ -34,16 +34,8 @@ class DataLoader:
self._summary_dir = summary_dir
self._loader = None

def load(self, executor=None):
"""Load the data when loader is exist.

Args:
executor (Optional[Executor]): The executor instance.

Returns:
bool, True if the loader is finished loading.
"""

def load(self):
"""Load the data when loader is exist."""
if self._loader is None:
ms_dataloader = MSDataLoader(self._summary_dir)
loaders = [ms_dataloader]
@@ -56,14 +48,14 @@ class DataLoader:
logger.warning("No valid files can be loaded, summary_dir: %s.", self._summary_dir)
raise exceptions.SummaryLogPathInvalid()

return self._loader.load(executor)
self._loader.load()

def get_events_data(self):
"""
Get events data from log file.

Returns:
EventsData, indicates events data.
Optional[EventsData], None or events data.
"""
return self._loader.get_events_data()



+ 232
- 775
mindinsight/datavisual/data_transform/data_manager.py
File diff suppressed because it is too large
View File


+ 11
- 29
mindinsight/datavisual/data_transform/events_data.py View File

@@ -17,33 +17,30 @@
import collections
import threading

from mindinsight.conf import settings
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.data_transform import reservoir
from mindinsight.conf import settings


# Type of the tensor event from external component
_Tensor = collections.namedtuple('_Tensor', ['wall_time', 'step', 'value', 'filename'])
_Tensor = collections.namedtuple('_Tensor', ['wall_time', 'step', 'value'])
TensorEvent = collections.namedtuple(
'TensorEvent', ['wall_time', 'step', 'tag', 'plugin_name', 'value', 'filename'])
'TensorEvent', ['wall_time', 'step', 'tag', 'plugin_name', 'value'])

# config for `EventsData`
_DEFAULT_STEP_SIZES_PER_TAG = settings.DEFAULT_STEP_SIZES_PER_TAG
_MAX_DELETED_TAGS_SIZE = settings.MAX_TAG_SIZE_PER_EVENTS_DATA * 100
CONFIG = {
'max_total_tag_sizes': settings.MAX_TAG_SIZE_PER_EVENTS_DATA,
'max_tag_sizes_per_plugin':
{
PluginNameEnum.GRAPH.value: settings.MAX_GRAPH_TAG_SIZE,
PluginNameEnum.TENSOR.value: settings.MAX_TENSOR_TAG_SIZE
},
'max_step_sizes_per_tag':
{
PluginNameEnum.SCALAR.value: settings.MAX_SCALAR_STEP_SIZE_PER_TAG,
PluginNameEnum.IMAGE.value: settings.MAX_IMAGE_STEP_SIZE_PER_TAG,
PluginNameEnum.GRAPH.value: settings.MAX_GRAPH_STEP_SIZE_PER_TAG,
PluginNameEnum.HISTOGRAM.value: settings.MAX_HISTOGRAM_STEP_SIZE_PER_TAG,
PluginNameEnum.TENSOR.value: settings.MAX_TENSOR_STEP_SIZE_PER_TAG
}
}

@@ -62,7 +59,6 @@ class EventsData:
self._max_step_sizes_per_tag = self._config['max_step_sizes_per_tag']

self._tags = list()
self._deleted_tags = set()
self._reservoir_by_tag = {}
self._reservoir_mutex_lock = threading.Lock()

@@ -85,9 +81,6 @@ class EventsData:
if tag not in set(self._tags):
deleted_tag = self._check_tag_out_of_spec(plugin_name)
if deleted_tag is not None:
if tag in self._deleted_tags:
logger.debug("Tag is in deleted tags: %s.", tag)
return
self.delete_tensor_event(deleted_tag)

self._tags.append(tag)
@@ -99,17 +92,14 @@ class EventsData:
with self._reservoir_mutex_lock:
if tag not in self._reservoir_by_tag:
reservoir_size = self._get_reservoir_size(tensor_event.plugin_name)
self._reservoir_by_tag[tag] = reservoir.ReservoirFactory().create_reservoir(
plugin_name, reservoir_size
)
self._reservoir_by_tag[tag] = reservoir.Reservoir(reservoir_size)

tensor = _Tensor(wall_time=tensor_event.wall_time,
step=tensor_event.step,
value=tensor_event.value,
filename=tensor_event.filename)
value=tensor_event.value)

if self._is_out_of_order_step(tensor_event.step, tensor_event.tag):
self.purge_reservoir_data(tensor_event.filename, tensor_event.step, self._reservoir_by_tag[tag])
self.purge_reservoir_data(tensor_event.step, self._reservoir_by_tag[tag])

self._reservoir_by_tag[tag].add_sample(tensor)

@@ -120,13 +110,6 @@ class EventsData:
Args:
tag (str): The tag name.
"""
if len(self._deleted_tags) < _MAX_DELETED_TAGS_SIZE:
self._deleted_tags.add(tag)
else:
logger.warning(
'Too many deleted tags, %d upper limit reached, tags updating may not function hereafter',
_MAX_DELETED_TAGS_SIZE)
logger.info('%r and all related samples are going to be deleted', tag)
self._tags.remove(tag)
for plugin_name, lock in self._tags_by_plugin_mutex_lock.items():
with lock:
@@ -155,7 +138,7 @@ class EventsData:
raise KeyError('Plugin %r could not be found.' % plugin_name)
with self._tags_by_plugin_mutex_lock[plugin_name]:
# Return a snapshot to avoid concurrent mutation and iteration issues.
return sorted(list(self._tags_by_plugin[plugin_name]))
return list(self._tags_by_plugin[plugin_name])

def tensors(self, tag):
"""
@@ -190,7 +173,7 @@ class EventsData:
return False

@staticmethod
def purge_reservoir_data(filename, start_step, tensor_reservoir):
def purge_reservoir_data(start_step, tensor_reservoir):
"""
Purge all tensor event that are out-of-order step after the given start step.

@@ -202,8 +185,7 @@ class EventsData:
Returns:
int, the number of items removed.
"""
cnt_out_of_order = tensor_reservoir.remove_sample(
lambda x: x.step < start_step or (x.step > start_step and x.filename == filename))
cnt_out_of_order = tensor_reservoir.remove_sample(lambda x: x.step < start_step)

return cnt_out_of_order



+ 333
- 463
mindinsight/datavisual/data_transform/graph/graph.py View File

@@ -15,138 +15,125 @@
"""
This file is used to define the basic graph.
"""
import copy
import time

from enum import Enum
from collections import defaultdict

from mindinsight.datavisual.common.exceptions import NodeNotInGraphError
from mindinsight.datavisual.common.log import logger
from mindinsight.utils.exceptions import ParamMissError
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.datavisual.common import exceptions
from .node import NodeTypeEnum
from .node import Node


def check_invalid_character(string):
"""Check for invalid characters. These characters will cause frontend crash."""
invalid_char = {'>', '<', '"'}
result = set(string).intersection(invalid_char)
if result:
raise ParamValueError(f"There are some invalid characters in graph node, invalid string: {string}, "
f"unexpected characters: {result}")
class EdgeTypeEnum:
"""Node edge type enum."""
control = 'control'
data = 'data'


class EdgeTypeEnum(Enum):
"""Node edge type enum."""
CONTROL = 'control'
DATA = 'data'
class DataTypeEnum:
"""Data type enum."""
DT_TENSOR = 13


class Graph:
"""The `Graph` object is used to describe a graph file."""
# Limit the size of a single attribute value per node to avoid storing too much data
MAX_NODE_ATTRIBUTE_VALUE_BYTES = 1024

# In the same scope, the number of children of the same type exceeds this threshold, and we will combine them.
MIN_GROUP_NODE_COUNT = 5
MIN_POLYMERIC_NODE_COUNT = 5

def __init__(self):
# Used to cache all nodes, and the key is node name, value is `Node` object.
self._normal_node_map = {}
self._node_id_map_name = {}
# Store nodes contain leaf nodes, name scope node, except polymeric nodes
self._normal_nodes = {}

# The additional caching of Const and Parameter is to handle the Const
# and Parameter nodes separately later.
self._const_node_temp_cache = {}
self._parameter_node_temp_cache = {}
# Store polymeric nodes.
self._polymeric_nodes = {}

# Store all nodes resolved from the file.
self._leaf_nodes = {}
self._full_name_map_name = {}

def build_graph(self, proto_data):
"""This method is used to build the graph."""
logger.info("Start to build graph")
start_time = time.time()

# Notice:
# The following methods are interdependent and cannot be switched at will.
self._parse_data(proto_data)
self._add_variable_nodes(NodeTypeEnum.PARAMETER.value)
self._build_aggregation_scope_nodes()
self._process_independent_layout()
self._build_name_scope_nodes()

# Since const nodes are not aggregated, adding them at the end can save a lot of computation.
self._add_variable_nodes(NodeTypeEnum.CONST.value)
self._calc_subnode_count()
self._leaf_nodes = self._get_leaf_nodes()
self._full_name_map_name = self._get_leaf_node_full_name_map()

precision = 6
time_consuming = round(time.time() - start_time, precision)
logger.info("Build graph end, all node count: %s, const count: %s, parameter count: %s, time-consuming: %s s.",
self.normal_node_count, len(self._const_node_temp_cache),
len(self._parameter_node_temp_cache), time_consuming)
# The format of node groups is {'group_name': {'node_name': <Node>}}
self._node_groups = {}

def _get_leaf_nodes(self):
"""
Get all leaf nodes, including normal leaf nodes, const nodes and param nodes.
def exist_node(self, name):
"""
leaf_nodes = {}
for node_name, node in self._normal_node_map.items():
# update full name
if not node.full_name:
node.full_name = node.name
if not node.type or node.type.endswith('_scope'):
continue
leaf_nodes[node_name] = node
Check node exist in graph.

return leaf_nodes
Args:
name (str): The node name.

def _get_leaf_node_full_name_map(self):
"""Get node by debugger name."""
full_name_map = {}
for name, node in self._leaf_nodes.items():
if not node.full_name:
logger.warning("Node %s does not have full name.", name)
continue
full_name_map[node.full_name] = name
Returns:
bool, if node is exist will return True.

return full_name_map
"""
if self._normal_nodes.get(name) is None:
return False
return True

def exist_node(self, name):
def get_normal_nodes(self, namescope=None):
"""
Check node exist in graph.
Get nodes by namescope.

Args:
name (str): The node name.
namescope (str): A namescope of nodes.

Returns:
bool, if node exists, will return True.
list[dict], a list object contain `Node` object.

"""
if name is None:
return False
return self._is_node_exist(node_name=name)
nodes = []
if namescope is None:
for name, node in self._normal_nodes.items():
if '/' not in name:
# Get first layer nodes
nodes.append(node.to_dict())
return nodes

namescope = namescope + '/'
for name, node in self._normal_nodes.items():
if name.startswith(namescope) and '/' not in name.split(namescope)[1]:
nodes.append(node.to_dict())

return nodes

def list_node_by_scope(self, scope=None):
def get_polymeric_nodes(self, polymeric_scope):
"""
List nodes by the scope of nodes. The scope of a node is the same as its parent node name.
Get polymeric nodes by polymeric scope.

Args:
scope (str): A scope of nodes.
polymeric_scope (str): The polymeric scope name of nodes.

Returns:
list[dict], a list object contain `Node` object.
"""
scope = "" if scope is None else scope
nodes = []
for node in self._normal_node_map.values():
if node.scope == scope:
for node in self._polymeric_nodes.values():
if node.polymeric_scope_name == polymeric_scope:
nodes.append(node.to_dict())
return nodes

def search_node_names(self, content, offset, limit):
"""
Search node names by content.

Args:
content (Union[str, None]): This content can be the key content of the node to search,
if None, will get all node names.
offset (int): An offset for page. Ex, offset is 0, mean current page is 1.
limit (int): An offset for page. Ex, offset is 0, mean current page is 1.

Returns:
list[str], a list of node names.
"""
all_names = []
all_names.extend(list(self._normal_nodes.keys()))
all_names.extend(list(self._polymeric_nodes.keys()))
if content is not None:
content = content.lower()
catch_names = [name for name in all_names if content in name.lower()]
else:
catch_names = all_names
catch_names = sorted(catch_names)
real_offset = offset * limit
return catch_names[real_offset:real_offset+limit]

def search_single_node(self, node_name):
"""
Search node, and return every layer nodes until this node.
@@ -160,426 +147,309 @@ class Graph:
'scope_name': '<Node scope>',
'children': {<item_object>}}
"""
if node_name and not self.exist_node(name=node_name):
raise NodeNotInGraphError(node_name=node_name)
if node_name and self._polymeric_nodes.get(node_name) is None \
and self._normal_nodes.get(node_name) is None:
raise exceptions.NodeNotInGraphError()

response = {}
nodes = self.list_node_by_scope()
nodes = self.get_normal_nodes()
response.update({
'nodes': nodes,
'scope_name': '',
'children': {}
})

names = node_name.split('/')
children = response['children']

index = node_name.find('/')
while index != -1:
scope = node_name[:index]
nodes = self.list_node_by_scope(scope)
for i in range(1, len(names)+1):
if i == len(names):
polymeric_node = self._polymeric_nodes.get(node_name)
if polymeric_node:
polymeric_scope = polymeric_node.polymeric_scope_name
nodes = self.get_polymeric_nodes(polymeric_scope)
children.update({'nodes': nodes,
'scope_name': polymeric_scope,
'children': {}})
break

name_scope = '/'.join(names[:i])
nodes = self.get_normal_nodes(name_scope)
children.update({
'nodes': nodes,
'scope_name': scope,
'scope_name': name_scope,
'children': {}
})
children = children['children']

index = node_name.find('/', index+1)

return response

def _parse_data(self, proto_data):
"""
This method will parse the data and create basic nodes to store in the cache.

The graph is then built based on the cache.
"""
raise NotImplementedError("Before you can build a graph, you need to parse the data.")

def _build_name_scope_nodes(self):
"""
Build name scope node by every node name.
def _build_polymeric_nodes(self):
"""Build polymeric node."""
logger.debug("Start to build polymeric nodes")

self._find_polymeric_nodes()

group_count_map = {}
for group_name, group in self._node_groups.items():
name = group_name.split('/')[-1]
count = group_count_map.get(name, 0)
count += 1
group_count_map[name] = count
polymeric_node_name = group_name + '_{}_[{}]'.format(count, len(group))
polymeric_node = Node(polymeric_node_name, node_id=polymeric_node_name)
polymeric_node.node_type = NodeTypeEnum.POLYMERIC_SCOPE.value
polymeric_node.name_scope = '/'.join(group_name.split('/')[:-1])
polymeric_node.subnode_count = len(group)

for name_tmp, node_tmp in group.items():
node_tmp.polymeric_scope_name = polymeric_node_name
self._polymeric_nodes.update({name_tmp: node_tmp})
polymeric_node.update_input(node_tmp.input)
polymeric_node.update_output(node_tmp.output)

self._normal_nodes.update({polymeric_node_name: polymeric_node})

self._update_input_output()

def _find_polymeric_nodes(self):
"""Find polymeric nodes from node groups."""
node_groups = copy.deepcopy(self._node_groups)
for group_name, group in node_groups.items():
if len(group) < self.MIN_POLYMERIC_NODE_COUNT:
self._normal_nodes.update(group)
self._node_groups.pop(group_name)
continue

We create the name scope node by the slash('/') in the node name.
For example, if a node name is "Default/add", we generate a scope named 'Default' based on slash('/') and
create a name scope node named 'Default'.
"""
logger.info("Start to build name scope nodes.")
scope_node_map = {}
for name, node in self._normal_node_map.items():
index = name.find('/')
pre_index = None
while index > 0:
scope = name[:index]
scope_node = scope_node_map.get(scope)
if scope_node is None:
if self._is_node_exist(node_name=scope):
exist_node = self._get_normal_node(node_name=scope)
if exist_node.type == NodeTypeEnum.AGGREGATION_SCOPE.value:
# This scope is aggregation scope, so we don't have to do anything.
pre_index = index
index = name.find('/', pre_index + 1)
move_node_names = []
is_move_group = False
for node_name, group_node in group.items():
node_list = []
is_in_group = False
for dst_name in group_node.output:
node_tmp = self._leaf_nodes[dst_name]
node_list.append(node_tmp)

start = time.time()
run_count = 0
visit_nodes = {}
while node_list:
# Iterate to find if the output of the node in the group causes a loop
# example: there is a group A, and node_a is a Node in group.
# if there is a loop in node_a, like A/node_a -> B/node_b -> A/node_b
# we will remove the node_a from group A.
node_tmp = node_list[0]
node_list = node_list[1:]
visit_nodes.update({node_tmp.name: True})
if node_tmp in group.values():
is_in_group = True
break
for dst_name_tmp in node_tmp.output:
run_count += 1
node_tmp = self._leaf_nodes[dst_name_tmp]
if visit_nodes.get(dst_name_tmp):
continue
node_list.append(node_tmp)
logger.debug("Find group %s node end, is_in_group: %s, use time: %s, "
"run count: %s.", group_name, is_in_group,
time.time() - start, run_count)

# We find a node name that conflicts with the current scope and rename the node
self._update_conflict_node(conflict_name=scope)

# We create a node for current scope.
scope_node = Node(scope, node_id=scope)
scope_node.type = NodeTypeEnum.NAME_SCOPE.value
scope_node.scope = '' if pre_index is None else name[:pre_index]
scope_node_map.update({scope_node.name: scope_node})

# Inherit input and output from sub nodes.
self._inherit_input_output_from_subnode(scope_node, subnode_list=[node])

pre_index = index
index = name.find('/', pre_index+1)

# Cache all the scope node to normal node dict
for node in scope_node_map.values():
self._cache_node(node)

def _update_conflict_node(self, conflict_name):
conflict_node = self._get_normal_node(node_name=conflict_name)
base_name = conflict_name.split('/')[-1]
new_name = Node.create_node_name(scope=conflict_node.scope, base_name=f'({base_name})')
self._update_node_name_of_cache(conflict_node, new_name, update_parent=True)

def _inherit_input_output_from_subnode(self, parent_node, subnode_list, filtered_type=None):
"""
Adds the input and output of all direct child nodes to the current node.

Args:
parent_node (Node): The nodes that inherit the input and output of the child nodes.
subnode_list (list[Node]): A list of child nodes that are inherited from the input and output.
filtered_type (set(str)): Filter some input and output that do not require inheritance
based on the node type. Default is filter const node.

Note:
- Only the inputs and outputs of the external scope are inherited.
- Before add_const_node method, if the input is a const,
the scope of the const node is not startswith the name of parent node.
So in this scenario, we need to filter the const nodes.
"""
filtered_type = {NodeTypeEnum.CONST.value} if filtered_type is None else filtered_type
for method in ['inputs', 'outputs', 'proxy_inputs', 'proxy_outputs']:
for node in subnode_list:
for item_name, item_attr in getattr(node, method).items():
target_node = self._get_normal_node(node_name=item_name)
if item_name.startswith(f'{parent_node.name}/'):
# Own scope, ignore
continue

if target_node.type in filtered_type:
continue

getattr(parent_node, f'add_{method}')(item_name, item_attr)
if is_in_group:
move_node_names.append(node_name)

def _build_aggregation_scope_nodes(self):
"""
Under the same scope, the number of nodes of the same type will be aggregated after exceeding the set threshold.
if (len(group) - len(move_node_names)) < self.MIN_POLYMERIC_NODE_COUNT:
is_move_group = True
break

Note:
The threshold value refers to the `MIN_GROUP_NODE_COUNT`.
"""
logger.info("Start to build aggregation scope nodes.")
group_node_map, filtered_group_names = self._find_group_nodes()

# create merge scope nodes
aggregation_scope_node_map = {}
for i, group_name in enumerate(filtered_group_names):
slash_index = group_name.rfind('/')
if slash_index != -1:
scope, op_type = group_name[:slash_index], group_name[slash_index+1:]
if is_move_group:
self._normal_nodes.update(group)
self._node_groups.pop(group_name)
else:
scope, op_type = '', group_name

count = len(group_node_map.get(group_name))
aggregation_node_name = Node.create_node_name(scope=scope, base_name=f'{op_type}[{count}]_{i}')
aggregation_scope_node = Node(name=aggregation_node_name, node_id=aggregation_node_name)
aggregation_scope_node.subnode_count = count
aggregation_scope_node.scope = scope
aggregation_scope_node.type = NodeTypeEnum.AGGREGATION_SCOPE.value

# Update the name and scope of all children nodes
for node in group_node_map[group_name]:
base_name = node.name.split('/')[-1]
new_name = Node.create_node_name(scope=aggregation_node_name, base_name=base_name)
node.scope = aggregation_node_name

# Since the name scope has not been created, there is no need to update the parent node.
self._update_node_name_of_cache(node, new_name, update_parent=False)

# Cache this node
self._cache_node(aggregation_scope_node)
aggregation_scope_node_map.update({group_name: aggregation_scope_node})

# Adds the input and output of all direct child nodes to the current node.
for group_name, node in aggregation_scope_node_map.items():
self._inherit_input_output_from_subnode(node, group_node_map[group_name])

def _find_group_nodes(self):
"""
Find nodes that can be grouped into a group.

For direct child nodes in a scope, we divide them into multiple groups by node type.
However, we will exclude several types of child nodes,
because these types of nodes are not operational nodes.
"""
exclude_types = {
NodeTypeEnum.CONST.value,
NodeTypeEnum.NAME_SCOPE.value,
}

group_node_map = defaultdict(list)
for node in self._normal_node_map.values():
if node.type in exclude_types:
continue
group_name = Node.create_node_name(scope=node.scope, base_name=node.type)
group_node_map[group_name].append(node)

# filter can group scope.
filtered_group_names = []
for name, nodes in group_node_map.items():
if len(nodes) < self.MIN_GROUP_NODE_COUNT:
continue
filtered_group_names.append(name)

return group_node_map, filtered_group_names

def _add_variable_nodes(self, node_type):
"""
We create the Const nodes or Parameter nodes in this method.

Args:
node_type (str): Decide which type of node to add.
Optional is `NodeTypeEnum.CONST.value` and `NodeTypeEnum.PARAMETER.value`.

Note:
This method relies on the presence of data in the const cache or parameter cache.
"""
logger.info("Start to add %s nodes to each scope in graph.", node_type)
node_map = {}
for node in self._normal_node_map.values():
for src_name, input_attr in dict(node.inputs).items():

if node_type == NodeTypeEnum.CONST.value and not self._const_node_temp_cache.get(src_name):
for name_tmp in move_node_names:
node_tmp = self._node_groups[group_name].pop(name_tmp)
self._normal_nodes.update({name_tmp: node_tmp})

def _update_input_output(self):
"""We need to update input and output attribute after build polymeric node."""
for node in self._normal_nodes.values():
for src_name, input_attr in node.input.items():
if self._polymeric_nodes.get(src_name):
input_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value
node.update_input({src_name: input_attr})

for dst_name, output_attr in node.output.items():
if self._polymeric_nodes.get(dst_name):
output_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value
node.update_output({dst_name: output_attr})

for node in self._polymeric_nodes.values():
for src_name, input_attr in node.input.items():
if self._polymeric_nodes.get(src_name):
input_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value
node.update_input({src_name: input_attr})

for dst_name, output_attr in node.output.items():
if self._polymeric_nodes.get(dst_name):
output_attr['scope'] = NodeTypeEnum.POLYMERIC_SCOPE.value
node.update_output({dst_name: output_attr})

def _calc_polymeric_input_output(self):
"""Calc polymeric input and output after build polymeric node."""
for name, node in self._normal_nodes.items():
polymeric_input = {}
for src_name in node.input:
src_node = self._polymeric_nodes.get(src_name)
if node.node_type == NodeTypeEnum.POLYMERIC_SCOPE.value:
src_name = src_name if not src_node else src_node.polymeric_scope_name
output_name = self._calc_dummy_node_name(name, src_name)
polymeric_input.update({output_name: {'edge_type': EdgeTypeEnum.data}})
continue

if node_type == NodeTypeEnum.PARAMETER.value and not self._parameter_node_temp_cache.get(src_name):
if not src_node:
continue

variable_name = Node.create_node_name(scope=node.scope, base_name=src_name)
if node_map.get(variable_name):
# There is no need to create the node repeatedly
variable_node = node_map.get(variable_name)
else:
cache_node = self._get_normal_node(node_name=src_name)
variable_node = Node(name=variable_name, node_id=variable_name)
Node.copy_node_without_input_output(cache_node, variable_node)
variable_node.scope = node.scope

variable_node.add_outputs(dst_name=node.name, output_attr=input_attr)
node_map.update({variable_name: variable_node})

node.delete_inputs(src_name)
node.add_inputs(variable_name, input_attr)

for node in node_map.values():
self._cache_node(node)

# Remove nodes that are not used in the cache.
if node_type == NodeTypeEnum.CONST.value:
unused_names = set(self._const_node_temp_cache) - set(node_map)
elif node_type == NodeTypeEnum.PARAMETER.value:
unused_names = set(self._parameter_node_temp_cache) - set(node_map)
else:
raise ParamValueError("The node type should be const or parameter.")

self._delete_nodes_of_cache(unused_names)

def _calc_subnode_count(self):
"""Calc all the direct sub node count."""
subnode_count_map = defaultdict(int)
for node in self._normal_node_map.values():
if not node.scope:
continue

if not self._is_node_exist(node_name=node.scope):
logger.warning("Can not find a scope node by the given name(%s), "
"the name scope nodes may not have been created.", node.scope)
continue
subnode_count_map[node.scope] = subnode_count_map[node.scope] + 1

for name, count in subnode_count_map.items():
node = self._get_normal_node(node_name=name)
node.subnode_count = count

def _get_normal_node(self, node_id=None, node_name=None):
"""Query node by node id or node name."""
if node_id is not None:
name = self._node_id_map_name.get(node_id)
node = self._normal_node_map.get(name)
return node

if node_name is not None:
return self._normal_node_map.get(node_name)
if not node.name_scope and src_node.name_scope:
# if current node is in first layer, and the src node is not in
# the first layer, the src node will not be the polymeric input of current node.
continue

raise ParamMissError('Method requires an argument that is not None.')
if node.name_scope == src_node.name_scope \
or node.name_scope.startswith(src_node.name_scope):
polymeric_input.update(
{src_node.polymeric_scope_name: {'edge_type': EdgeTypeEnum.data}})

def _is_node_exist(self, node_id=None, node_name=None):
"""Check node is exist."""
if node_id is not None:
return bool(self._node_id_map_name.get(node_id))
node.update_polymeric_input(polymeric_input)

if node_name is not None:
return bool(self._normal_node_map.get(node_name))
polymeric_output = {}
for dst_name in node.output:
dst_node = self._polymeric_nodes.get(dst_name)

raise ParamMissError('Method requires an argument that is not None.')
if node.node_type == NodeTypeEnum.POLYMERIC_SCOPE.value:
dst_name = dst_name if not dst_node else dst_node.polymeric_scope_name
output_name = self._calc_dummy_node_name(name, dst_name)
polymeric_output.update({output_name: {'edge_type': EdgeTypeEnum.data}})
continue

@property
def normal_node_count(self):
"""Get the normal node count."""
return len(self._normal_node_map)
if not dst_node:
continue

def _cache_node(self, node):
"""Store the node in the cache."""
# Notice:
# The additional caching of Const and Parameter is to handle the Const and Parameter nodes separately later.
if node.type == NodeTypeEnum.CONST.value:
self._const_node_temp_cache.update({node.name: node})
if node.type == NodeTypeEnum.PARAMETER.value:
self._parameter_node_temp_cache.update({node.name: node})
if not node.name_scope and dst_node.name_scope:
continue

self._normal_node_map.update({node.name: node})
self._node_id_map_name.update({node.node_id: node.name})
if node.name_scope == dst_node.name_scope \
or node.name_scope.startswith(dst_node.name_scope):
polymeric_output.update(
{dst_node.polymeric_scope_name: {'edge_type': EdgeTypeEnum.data}})

def _delete_nodes_of_cache(self, node_names):
"""Delete node from cache."""
logger.debug("These nodes will be removed from the cache, node names: %s.", str(node_names))
for name in node_names:
node.update_polymeric_output(polymeric_output)

if self._parameter_node_temp_cache.get(name):
self._parameter_node_temp_cache.pop(name)
if self._const_node_temp_cache.get(name):
self._const_node_temp_cache.pop(name)
for name, node in self._polymeric_nodes.items():
polymeric_input = {}
for src_name in node.input:
output_name = self._calc_dummy_node_name(name, src_name)
polymeric_input.update({output_name: {'edge_type': EdgeTypeEnum.data}})
node.update_polymeric_input(polymeric_input)

node = self._get_normal_node(node_name=name)
self._normal_node_map.pop(name)
self._node_id_map_name.pop(node.node_id)
polymeric_output = {}
for dst_name in node.output:
polymeric_output = {}
output_name = self._calc_dummy_node_name(name, dst_name)
polymeric_output.update({output_name: {'edge_type': EdgeTypeEnum.data}})
node.update_polymeric_output(polymeric_output)

def _update_node_name_of_cache(self, node, new_name, update_parent=False):
def _calc_dummy_node_name(self, current_node_name, other_node_name):
"""
Update a node name which is stored in cache.
Calc dummy node name.

Args:
node (Node): The node that will be renamed.
new_name (str): The new name.
update_parent (bool): Determines whether the input and output of the parent node need to be updated.
"""
logger.debug('Update node name of cache, node(%s), new name is %s.', str(node), new_name)
origin_name = node.name
node.name = new_name

# Find all nodes that need to modify the input and input
update_node_map = {}
for method in ['inputs', 'outputs', 'proxy_inputs', 'proxy_outputs']:
for target_name in getattr(node, method):
target_node = self._get_normal_node(node_name=target_name)
if target_node is None:
message = f"Node should not be None, name: {target_name}, {method}: {list(getattr(node, method))}."
logger.error(message)
continue
current_node_name (str): The name of current node.
other_node_name (str): The target dummy node name.

update_node_map.update({target_name: target_node})

if not update_parent:
continue

slash_index = target_name.find('/')
while slash_index != -1:
scope_name = target_name[:slash_index]
slash_index = target_name.find('/', slash_index+1)
Returns:
str, the dummy node name.
"""
name_tmp = other_node_name
if self._polymeric_nodes.get(other_node_name):
name_tmp = self._polymeric_nodes[other_node_name].polymeric_scope_name
name_tmp_list = name_tmp.split('/')
current_name_list = current_node_name.split('/')
index = 0
min_len = min(len(name_tmp_list), len(current_name_list))
for i in range(min_len):
index = i
if name_tmp_list[index] != current_name_list[index]:
break
dummy_node_name = '/'.join(name_tmp_list[:index+1])
return dummy_node_name

if update_node_map.get(scope_name):
def _build_name_scope_nodes(self):
"""Build name scope node by every node name."""
normal_nodes = dict(self._normal_nodes)

rename_node_names = {}
for name, node in normal_nodes.items():
name_list = name.split('/')
for i in range(1, len(name_list)):
name_scope = '/'.join(name_list[:i])
name_scope_node = self._normal_nodes.get(name_scope)
if name_scope_node is None:
name_scope_node = Node(name_scope, node_id=name_scope)
name_scope_node.node_type = NodeTypeEnum.NAME_SCOPE.value
name_scope_node.name_scope = '/'.join(name_list[:i-1])
elif name_scope_node.node_type != NodeTypeEnum.NAME_SCOPE.value:
# The name of this node conflicts with namescope, so rename this node
old_name = name_scope_node.name
old_names = name_scope_node.name.split('/')
old_names[-1] = f'({old_names[-1]})'
new_name = '/'.join(old_names)
name_scope_node.name = new_name
self._normal_nodes.pop(old_name)
self._normal_nodes.update({new_name: name_scope_node})
rename_node_names.update({old_name: new_name})

# create new namescope
name_scope_node = Node(name_scope, node_id=name_scope)
name_scope_node.node_type = NodeTypeEnum.NAME_SCOPE.value
name_scope_node.name_scope = '/'.join(name_list[:i-1])

# update the input and output of this to namescope node
name_scope_with_slash = name_scope + '/'
for src_name, input_attr in node.input.items():
if src_name.startswith(name_scope_with_slash):
continue
name_scope_node.update_input({src_name: input_attr})

scope_node = self._get_normal_node(node_name=scope_name)
if scope_node is None:
message = f"Can not find the scope node by scope name({scope_name}), " \
f"may be this scope node has not been built."
logger.debug(message)
for dst_name, output_attr in node.output.items():
if dst_name.startswith(name_scope_with_slash):
continue
name_scope_node.update_output({dst_name: output_attr})

self._normal_nodes.update({name_scope: name_scope_node})

if rename_node_names:
# If existing nodes are renamed, the inputs and outputs of all nodes need to be refreshed
nodes = []
nodes.extend(self._normal_nodes.values())
nodes.extend(self._polymeric_nodes.values())
for node in nodes:
attrs = ['input', 'output', 'polymeric_input', 'polymeric_output']
for item in attrs:
tmp_dict = dict(getattr(node, item))
for name, value in tmp_dict.items():
new_name = rename_node_names.get(name, False)
if new_name:
getattr(node, item).pop(name)
getattr(node, f'update_{item}')({new_name: value})

update_node_map.update({scope_name: scope_node})

# Update the input and output of the nodes
for target_node in update_node_map.values():
for method in ['inputs', 'outputs', 'proxy_inputs', 'proxy_outputs']:
attr_temp = getattr(target_node, method).get(origin_name)
if attr_temp is None:
# This method does not have this node, so it is skipped
continue
self._calc_subnode_count()

# Delete the old attribute and update new name to source node or destination node.
getattr(target_node, f'delete_{method}')(origin_name)
getattr(target_node, f'add_{method}')(new_name, attr_temp)

# Delete the origin node in cache.
self._delete_nodes_of_cache(node_names=[origin_name])
self._cache_node(node)

def _process_independent_layout(self):
"""Handle separate layout nodes."""
independent_layout_node_map = {}
for node in self._normal_node_map.values():
base_name = node.name.split('/')[-1]
if node.type == NodeTypeEnum.AGGREGATION_SCOPE.value and NodeTypeEnum.PARAMETER.value in base_name:
independent_layout_node_map[node.name] = node

# Find all sub nodes
subnode_map = defaultdict(list)
for node in self._normal_node_map.values():
if independent_layout_node_map.get(node.scope):
subnode_map[node.scope].append(node)

# Notice:
# The following processing is only done for the parameter node, other types of nodes are not processed.
# Later, when you need to extend to other nodes, the code needs to be adjusted.
for scope_node in independent_layout_node_map.values():
scope_node.independent_layout = True

method = 'outputs'
for target_name, target_attr in dict(getattr(scope_node, method)).items():
proxy_attr = dict(edge_type=target_attr['edge_type'])

target_node = self._get_normal_node(node_name=target_name)
getattr(target_node, 'add_proxy_inputs')(scope_node.name, proxy_attr)

# Note:
# If the source node and the destination node are not in the same scope,
# the proxy node is presented as scope in order to simplify the flow of the display data.
# For example, the data flow is parameter[5]_1 -> add[5]_1/add1
# we create a scope proxy node(add[5]_1) for parameter[5]_1,
# so there is a proxy data flow parameter[5]_1 -> add[5]_1 instead of parameter[5]_1 -> add[5]_1/add1.
if target_node.scope == scope_node.scope:
getattr(scope_node, f'add_proxy_{method}')(target_name, proxy_attr)
else:
target_scope_node = self._get_normal_node(node_name=target_node.scope)
getattr(scope_node, f'add_proxy_{method}')(target_node.scope, proxy_attr)
getattr(target_scope_node, 'add_proxy_inputs')(scope_node.name, proxy_attr)

for subnode in subnode_map[scope_node.name]:
subnode.independent_layout = True
for target_name, target_attr in dict(getattr(subnode, method)).items():
proxy_attr = dict(edge_type=target_attr['edge_type'])
target_node = self._get_normal_node(node_name=target_name)
if target_node.scope == scope_node.scope:
getattr(subnode, f'add_proxy_{method}')(target_name, proxy_attr)
else:
getattr(subnode, f'add_proxy_{method}')(target_node.scope, proxy_attr)

input_attr = getattr(target_node, 'inputs')[subnode.name]
input_attr['independent_layout'] = True
target_node.add_inputs(subnode.name, input_attr)
def _calc_subnode_count(self):
"""Calc the sub node count of scope node."""
name_scope_mapping = {}
for node in self._normal_nodes.values():
if node.name_scope:
count = name_scope_mapping.get(node.name_scope, 0)
name_scope_mapping[node.name_scope] = count + 1

for name_scope, count in name_scope_mapping.items():
node = self._normal_nodes[name_scope]
node.subnode_count = count

+ 190
- 329
mindinsight/datavisual/data_transform/graph/msgraph.py View File

@@ -1,4 +1,4 @@
# Copyright 2020-2021 Huawei Technologies Co., Ltd
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,401 +13,262 @@
# limitations under the License.
# ============================================================================
"""This file is used to define the MindSpore graph."""
import re
import copy

from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.proto_files.mindinsight_anf_ir_pb2 import DataType
from mindinsight.datavisual.common.enums import PluginNameEnum
from .node_tree import NodeTree
from .node import Node
from .node import NodeTypeEnum
from .graph import Graph
from .graph import EdgeTypeEnum
from .graph import check_invalid_character
from .graph import DataTypeEnum


class MSGraph(Graph):
"""The object describes the MindSpore graph, and it is defined in the anf_ir proto file."""
"""The object describes the MindSpore graph, and it is defined in the anf_if proto file."""

def _parse_data(self, proto_data):
def build_graph(self, graph_proto):
"""
The proto data is parsed and all nodes are stored in the specified structure.
Build graph by graph proto which refer to `anf_ir_pb2.GraphProto`, and set status to loading.

Args:
proto_data (anf_ir_pb2.GraphProto): Refer to anf_ir_pb2.GraphProto object.
graph_proto (anf_ir_pb2.GraphProto): Refer to `anf_ir_pb2.GraphProto`.
"""
logger.info("Start to parse graph proto data.")

self._parse_op_nodes(proto_data.node)
self._parse_parameters(proto_data.parameters)
self._parse_consts(proto_data.const_vals)

self._update_input_after_create_node()
self._update_output_after_create_node()
logger.info("Start to build graph.")

logger.info("Parse proto data end, normal node count(only contain op node, "
"parameter, const): %s.", self.normal_node_count)
self._build_leaf_nodes(graph_proto)
self._build_polymeric_nodes()
self._build_name_scope_nodes()
self._calc_polymeric_input_output()
logger.info("Build graph end, normal node count: %s, polymeric node "
"count: %s.", len(self._normal_nodes), len(self._polymeric_nodes))

def _parse_op_nodes(self, node_protos):
def _build_leaf_nodes(self, graph_proto):
"""
Parse `anf_ir_pb2.NodeProto` object, and create a normal node.
Build leaf node from graph proto.

Left node will contain operation node, parameter node, const node.

Args:
node_protos (list[anf_ir_pb2.NodeProto]): Refer to anf_ir_pb2.NodeProto.
graph_proto (anf_ir_pb2.model_proto.graph): Refer to anf_ir_pb2.model_proto.graph.
"""
logger.debug("Start to parse op nodes from proto.")
for topological_index, node_proto in enumerate(node_protos):
if not node_proto.name:
logger.warning("Finding a node with an empty name will not save it.")
continue
logger.info("Start to build leaf nodes.")
leaf_node_id_map_name = {}
const_nodes_map = {}

if node_proto.op_type == "Load":
# The Load operator needs to be renamed as it has the same name with parameter
node_name = Node.create_node_name(scope=node_proto.scope,
base_name=f'{node_proto.op_type}-op{node_proto.name}')
node_proto.full_name = node_name
elif not node_proto.full_name or any(
node_proto.full_name.lower().endswith(f'[:{plugin.value.lower()}]') for plugin in PluginNameEnum):
node_name = Node.create_node_name(scope=node_proto.scope,
base_name=f'{node_proto.op_type}{node_proto.name}')
else:
node_name = node_proto.full_name

# The Graphviz plug-in that the UI USES can't handle these special characters.
check_invalid_character(node_name)
for node_def in graph_proto.node:
node = self._parse_graph_proto_node(node_def)
leaf_node_id_map_name.update({node.node_id: node.name})

node = Node(name=node_name, node_id=node_proto.name, topological_index=topological_index)
node.full_name = node_proto.full_name
node.type = node_proto.op_type
for parameter in graph_proto.parameters:
node = self._parse_graph_proto_parameter(parameter)
const_nodes_map.update({node.name: node})

self._parse_attributes(node_proto.attribute, node)
self._parse_inputs(node_proto.input, node)
for i, const in enumerate(graph_proto.const_vals):
node_id = 'const_{}'.format(i)
node = self._parse_graph_proto_const(const, node_id)
const_nodes_map.update({const.key: node})

node.output_i = node_proto.output_i
node.scope = node_proto.scope
node.output_shape = self._get_shape_by_parse_type_proto(node_proto.output_type)
node.output_nums = len(node.output_shape)
node.output_data_type = self._get_data_type_by_parse_type_proto(node_proto.output_type, node)
self._calc_input(leaf_node_id_map_name, graph_proto, const_nodes_map)
self._calc_output()

self._cache_node(node)

def _parse_parameters(self, parameter_protos):
"""
Parse `anf_ir_pb2.ParameterProto` object, and create a parameter node.
logger.info("Build leaf nodes end, normal nodes count: %s, group count: %s, "
"left node count: %s.", len(self._normal_nodes), len(self._node_groups),
len(self._leaf_nodes))

Args:
parameter_protos (list[anf_ir_pb2.ParameterProto]): Refer to anf_ir_pb2.ParameterProto.
"""
logger.debug("Start to parse parameters from proto.")
for parameter in parameter_protos:
if not parameter.name:
logger.warning("Finding a parameter with an empty name will not save it.")
continue
check_invalid_character(parameter.name)
node = Node(name=parameter.name, node_id=parameter.name)
node.type = NodeTypeEnum.PARAMETER.value
node.output_shape = self._get_shape_by_parse_type_proto(parameter.type)
node.output_nums = len(node.output_shape)
node.output_data_type = self._get_data_type_by_parse_type_proto(parameter.type, node)
attr = dict(
type=self._get_data_type_by_parse_type_proto(parameter.type, node),
shape=str(self._get_shape_by_parse_type_proto(parameter.type))
)
node.add_attr(attr)

self._cache_node(node)
logger.debug("Foreach graph proto parameters, node id: %s, node name: %s, "
"node def name: %s", node.node_id, node.name, parameter.name)

def _parse_consts(self, consts):
def _calc_input(self, leaf_node_id_map_name, graph_proto, const_nodes_map):
"""
Parse `anf_ir_pb2.NameValueProto` object, and create a const node.
Calc input for every leaf node.

Args:
consts (list[anf_ir_pb2.NameValueProto]): Refer to `anf_ir_pb2.NameValueProto` object.
leaf_node_id_map_name (dict[str, str]): Format is {'node_id': 'node_name'}.
graph_proto (anf_ir_pb2.model_proto.graph): See anf_ir_pb2.model_proto.graph.
const_nodes_map (dict[str, Node]): Format is {'node name': <Const node>}.
"""
logger.debug("Start to parse consts from proto.")
for const in consts:
if not const.key:
logger.warning("Finding a const with an empty key will not save it.")
continue
check_invalid_character(const.key)
node = Node(name=const.key, node_id=const.key)
node.type = NodeTypeEnum.CONST.value
if const.value.ByteSize() > self.MAX_NODE_ATTRIBUTE_VALUE_BYTES:
node.add_attr({const.key: 'dtype: ' + DataType.Name(const.value.dtype)})
logger.debug("Start to calc input.")
for node_def in graph_proto.node:
node_name = leaf_node_id_map_name[node_def.name]
node = self._leaf_nodes[node_name]
for input_def in node_def.input:
edge_type = EdgeTypeEnum.data
if input_def.type == "CONTROL_EDGE":
edge_type = EdgeTypeEnum.control

if const_nodes_map.get(input_def.name):
const_node = copy.deepcopy(const_nodes_map[input_def.name])
src_name = '{}/{}'.format(node.name_scope, input_def.name)
if not self._normal_nodes.get(src_name):
const_node.name = src_name
const_node.name_scope = node.name_scope
self._normal_nodes.update({src_name: const_node})
self._leaf_nodes.update({src_name: const_node})
src_node = self._leaf_nodes.get(src_name)
else:
src_name = leaf_node_id_map_name.get(input_def.name)
if not src_name:
logger.warning("The input_def name '%s' in node '%s' is invalid, "
"will be ignore.", input_def.name, node_name)
continue

src_node = self._leaf_nodes.get(src_name)
if src_node is None:
logger.warning("The input '%s' in node '%s' is not in "
"leaf nodes.", src_name, node_name)
continue

input_item = {
src_name: {
"shape": src_node.shape,
"edge_type": edge_type,
"scope": NodeTypeEnum.NAME_SCOPE.value
}
}
node.update_input(input_item)

if self._normal_nodes.get(node_name):
self._normal_nodes[node_name] = node
else:
node.add_attr({const.key: str(const.value)})
group_name = self._create_group_name(node.name_scope, node.node_type, node.name)
self._node_groups[group_name][node.name] = node

if const.value.dtype == DataType.DT_TENSOR:
shape = list(const.value.tensor_val.dims)
node.output_shape.append(shape)
if const.value.tensor_val.HasField('data_type'):
node.elem_types.append(DataType.Name(const.value.tensor_val.data_type))
else:
node.elem_types.append(DataType.Name(const.value.dtype))
# dim is zero
node.output_shape.append([])
def _calc_output(self):
"""Calc output of every node."""
logger.debug("Start to calc output.")

node.output_nums = len(node.output_shape)
for name, node in self._leaf_nodes.items():
if node.node_type == NodeTypeEnum.CONST.value:
continue
for src_name, input_attr in node.input.items():
src_node = self._leaf_nodes[src_name]
if src_node.node_type == NodeTypeEnum.CONST.value:
continue

self._cache_node(node)
if self._normal_nodes.get(src_name):
self._normal_nodes[src_name].update_output({name: input_attr})
else:
group_name = self._create_group_name(src_node.name_scope,
src_node.node_type, src_node.name)
self._node_groups[group_name][src_name].update_output({name: input_attr})

def _get_shape_by_parse_type_proto(self, type_proto):
def _parse_graph_proto_node(self, node_def):
"""
Parse proto's `message TypeProto` to get shape information.
Parse `anf_ir_pb2.model_proto.graph.node_def`, and create a a node.

Args:
type_proto (anf_ir_pb2.TypeProto): Refer to anf_ir_pb2.TypeProto.
node_def (anf_ir_pb2.model_proto.graph.node_def): Refer to anf_ir_pb2.model_proto.graph.node_def.

Returns:
list, a list of shape.
Node, a `Node` object.
"""
shapes = []
if type_proto.HasField('data_type'):
if type_proto.data_type != DataType.DT_TENSOR and \
type_proto.data_type != DataType.DT_TUPLE:
# Append an empty list as a placeholder
# for the convenience of output number calculation.
shapes.append([])
return shapes
if type_proto.HasField('tensor_type'):
tensor_type = type_proto.tensor_type
tensor_shape_proto = tensor_type.shape
shape = [dim.size for dim in tensor_shape_proto.dim]
shapes.append(shape)
if type_proto.HasField('sequence_type'):
for elem_type in type_proto.sequence_type.elem_types:
shapes.extend(self._get_shape_by_parse_type_proto(elem_type))
return shapes
node_name = '/'.join([node_def.scope, node_def.op_type])+node_def.name
node = Node(name=node_name, node_id=node_def.name)
node.node_type = node_def.op_type
logger.debug("Foreach graph proto nodes, node id: %s, node name: %s, node def name: %s, "
"input count: %s", node.node_id, node.name, node_def.name, len(node_def.input))

for attr in node_def.attribute:
node.update_attr({attr.name: str(attr.value)})

node.output_i = node_def.output_i
node.name_scope = node_def.scope

output_type = node_def.output_type
shape = self._parse_type_proto(output_type)
node.shape = shape

self._leaf_nodes.update({node.name: node})
group_name = self._create_group_name(node.name_scope, node.node_type, node.name)
if group_name is not None:
node_dict = self._node_groups.get(group_name, {})
node_dict.update({node.name: node})
self._node_groups.update({group_name: node_dict})
else:
self._normal_nodes.update({node.name: node})

def _get_data_type_by_parse_type_proto(self, type_proto, node):
"""
Get data type by parse type proto object.
return node

The name of the DataType, refer to `anf_ir_pb2.DataType` object.
If data type is tensor or tuple, the data name we return is `data_type[element_type, element_type]`.
def _parse_graph_proto_parameter(self, parameter):
"""
Parse anf_ir_pb2.model_proto.graph.parameter, and create a parameter node.

Args:
type_proto (anf_ir_pb2.TypeProto): Refer to anf_ir_pb2.TypeProto.
parameter (anf_ir_pb2.model_proto.graph.parameter): Refer to anf_ir_pb2.model_proto.graph.parameter.

Returns:
str, the data type.

Node, a `Node` object.
"""
data_type_name = self._get_data_type_name_by_value(type_proto, type_proto.data_type, field_name='data_type')
if type_proto.data_type == DataType.DT_TENSOR:
tensor_type_proto = type_proto.tensor_type
value = type_proto.tensor_type.elem_type
elem_type_name = self._get_data_type_name_by_value(tensor_type_proto, value, field_name='elem_type')
node.elem_types.append(elem_type_name)
return f'{data_type_name}[{elem_type_name}]'

if type_proto.data_type == DataType.DT_TUPLE:
data_types = []
for elem_type in type_proto.sequence_type.elem_types:
data_types.append(self._get_data_type_by_parse_type_proto(elem_type, node))
return f'{data_type_name}{str(data_types)}'

node.elem_types.append(data_type_name)

return data_type_name

def get_nodes(self, searched_node_list):
node = Node(name=parameter.name, node_id=parameter.name)
node.node_type = NodeTypeEnum.PARAMETER.value
node.shape = self._parse_type_proto(parameter.type)
logger.debug("Foreach graph proto parameters, node id: %s, node name: %s, "
"node def name: %s", node.node_id, node.name, parameter.name)
return node

def _parse_graph_proto_const(self, const, const_node_id):
"""
Get node tree by a searched_node_list.
Parse anf_ir_pb2.model_proto.graph.const, and create a const node.

Args:
searched_node_list (list[Node]): A list of nodes that
matches the given search pattern.
const (anf_ir_pb2.model_proto.graph.const): Refer to anf_ir_pb2.model_proto.graph.const
const_node_id (str): The id of the new const node, it should be unique in graph.

Returns:
A list of dict including the searched nodes.
[{
"name": "Default",
"type": "name_scope",
"nodes": [{
"name": "Default/Conv2D1",
"type": "name_scope",
"nodes": [{
...
}]
}]
},
{
"name": "Gradients",
"type": "name_scope",
"nodes": [{
"name": "Gradients/Default",
"type": "name_scope",
"nodes": [{
...
}]
}]
Node, a `Node` object.
"""
# save the node in the NodeTree
root = NodeTree()
for node in searched_node_list:
self._build_node_tree(root, node.name, node.type)

# get the searched nodes in the NodeTree and reorganize them
searched_list = []
self._traverse_node_tree(root, searched_list)

return searched_list

def search_leaf_nodes_by_pattern(self, pattern, scope_pattern=False):
node = Node(name=const.key, node_id=const_node_id)
node.node_type = NodeTypeEnum.CONST.value
node.update_attr({const.key: str(const.value)})
if const.value.dtype == DataTypeEnum.DT_TENSOR:
shape = []
for dim in const.value.tensor_val.dims:
shape.append(dim)
node.shape = shape
return node

def _parse_type_proto(self, type_proto):
"""
Search leaf node by a given pattern.
Parse proto's `message TypeProto` to get shape information.

Args:
pattern (Union[str, None]): The pattern of the node to search,
if None, return all node names.
scope_pattern (bool): If true, return the children nodes of the scope. Default: False.
type_proto (anf_ir_pb2.TypeProto): Refer to anf_ir_pb2.TypeProto.

Returns:
list[Node], a list of nodes.
list, a list of shape.
"""
is_match = lambda x, y: x.lower().startswith(y) if scope_pattern else y in x.lower()
if pattern is not None:
pattern = pattern.lower()
searched_nodes = [
node for name, node in self._leaf_nodes.items()
if is_match(name, pattern)
]
else:
searched_nodes = [node for node in self._leaf_nodes.values()]
return searched_nodes
shapes = []
if type_proto.HasField('tensor_type'):
tensor_type = type_proto.tensor_type
tensor_shape_proto = tensor_type.shape
for dim in tensor_shape_proto.dim:
shapes.append(dim.size)
if type_proto.HasField('sequence_type'):
for elem_type in type_proto.sequence_type.elem_types:
shapes.append(self._parse_type_proto(elem_type))
return shapes

def search_nodes_by_pattern(self, pattern):
def _create_group_name(self, name_scope, node_type, node_name):
"""
Search node by a given pattern.
Create group name by node name, name scope, node type.

Search node which pattern is the part of the last node. Example: pattern=ops, node1=default/ops,
node2=default/ops/weight, so node2 will be ignore and only node1 will be return.
Only nodes that conform to the rules are aggregated.

Args:
pattern (Union[str, None]): The pattern of the node to search.
name_scope (str): The node name scope.
node_type (str): The node type.
node_name (str): The node name.

Returns:
list[Node], a list of nodes.
Optional[str], if match the rules will return a group name, else return None.
"""
searched_nodes = []
if pattern and pattern != '/':
pattern = pattern.lower()
for name, node in self._normal_node_map.items():
name = name.lower()
pattern_index = name.rfind(pattern)
if pattern_index >= 0 and name.find('/', pattern_index + len(pattern)) == -1:
searched_nodes.append(node)
return searched_nodes

def _build_node_tree(self, root, node_name, node_type):
"""
Build node tree.
group_types = ['Reshape', 'Variable']
pattern_names = r'.*?/Cast-op\d+'

Args:
root (NodeTree): Root node of node tree.
node_name (str): Node name.
node_type (str): Node type.
"""
scope_names = node_name.split('/')
cur_node = root
full_name = ""
for scope_name in scope_names[:-1]:
full_name = '/'.join([full_name, scope_name]) if full_name else scope_name
scope_node = self._get_normal_node(node_name=full_name)
sub_node = cur_node.get(scope_name)
if not sub_node:
sub_node = cur_node.add(scope_name, scope_node.type)
cur_node = sub_node
cur_node.add(scope_names[-1], node_type)

def _traverse_node_tree(self, cur_node, search_node_list):
"""Traverse the node tree and construct the searched nodes list."""
for _, sub_node in cur_node.get_children():
sub_nodes = []
self._traverse_node_tree(sub_node, sub_nodes)
sub_node_dict = {
'name': sub_node.node_name,
'type': sub_node.node_type,
'nodes': sub_nodes
}
search_node_list.append(sub_node_dict)

def _parse_inputs(self, input_protos, node):
"""
Parse `anf_ir_pb2.InputProto` object.

Args:
input_protos (list[anf_ir_pb2.InputProto]): Refer to `anf_ir_pb2.InputProto` object.
node (Node): Refer to `Node` object, it is used to log message and update input.
"""
for input_proto in input_protos:
if not input_proto.name:
logger.warning("The name in input proto of node(%s) is empty, will ignore.", node.name)
continue

edge_type = EdgeTypeEnum.DATA.value if not input_proto.type else EdgeTypeEnum.CONTROL.value

# Notice:
# 1. The name in the input proto is the node id of the Node object.
# 2. In the current step, the shape of source node cannot be obtained,
# so it is set to empty list by default, and the next step will update it.
# 3. Same with scope, set the default value first.
input_attr = {
"shape": [],
"edge_type": edge_type,
"independent_layout": False,
'data_type': ''
}

node.add_inputs(src_name=input_proto.name, input_attr=input_attr)

def _parse_attributes(self, attributes, node):
"""
Parse `anf_ir_pb2.AttributeProto` object., and Filters large attribute values.

Args:
attributes (list[anf_ir_pb2.AttributeProto]): Refer to `anf_ir_pb2.AttributeProto` object.
node (Node): Refer to `Node` object, it is used to log message and update attr.
"""
for attr in attributes:
if attr.value.ByteSize() > self.MAX_NODE_ATTRIBUTE_VALUE_BYTES:
message = f"The attribute value of node({node.name}) " \
f"is over {self.MAX_NODE_ATTRIBUTE_VALUE_BYTES} Bytes, will ignore."
logger.warning(message)
continue
node.add_attr({attr.name: str(attr.value)})

def _update_input_after_create_node(self):
"""Update the input of node after create node."""
for node in self._normal_node_map.values():
for src_node_id, input_attr in dict(node.inputs).items():
node.delete_inputs(src_node_id)
if not self._is_node_exist(node_id=src_node_id):
message = f"The input node could not be found by node id({src_node_id}) " \
f"while updating the input of the node({node})"
logger.warning(message)

continue

src_node = self._get_normal_node(node_id=src_node_id)
input_attr['shape'] = src_node.output_shape
input_attr['data_type'] = src_node.output_data_type
node.add_inputs(src_name=src_node.name, input_attr=input_attr)

def _update_output_after_create_node(self):
"""Update the output of node after create node."""
# Constants and parameter should not exist for input and output.
filtered_node = {NodeTypeEnum.CONST.value, NodeTypeEnum.PARAMETER.value}
for node in self._normal_node_map.values():
for src_name, input_attr in node.inputs.items():
src_node = self._get_normal_node(node_name=src_name)
if src_node.type in filtered_node:
continue
if node_type in group_types:
group_name = name_scope + '/' + node_type if name_scope else node_type
return group_name

src_node.add_outputs(node.name, input_attr)
if node_type == 'FrameworkOp' and re.search(pattern_names, node_name):
group_name = name_scope + '/' + 'Cast-op' if name_scope else 'Cast-op'
return group_name

@staticmethod
def _get_data_type_name_by_value(data_type, value, field_name='data_type'):
"""Get the data type name by the enum value, data_type refer to `DataType` object."""
return data_type.DESCRIPTOR.fields_by_name[field_name].enum_type.values_by_number[value].name
return None

+ 100
- 118
mindinsight/datavisual/data_transform/graph/node.py View File

@@ -17,11 +17,10 @@ This file is used to define the node of graph and associated base types.
"""
from enum import Enum


class NodeTypeEnum(Enum):
"""Node type enum. The following types are new to our custom."""
NAME_SCOPE = 'name_scope'
AGGREGATION_SCOPE = 'aggregation_scope'
POLYMERIC_SCOPE = 'polymeric_scope'
PARAMETER = 'Parameter'
CONST = 'Const'

@@ -35,40 +34,34 @@ class Node:
node_id (str): The id of this node, and node id is unique in graph.
"""

def __init__(self, name, node_id, topological_index=-1):
def __init__(self, name, node_id):
self._node_id = node_id
self.name = name
self.type = ""
self._name = name
self._type = ""
self._attr = dict()
self._input = dict()
self.output_i = 0
self._output_i = -1
self._output = {}
self._proxy_input = {}
self._proxy_output = {}
self.subnode_count = 0
self.scope = ""
self.independent_layout = False
self.output_shape = []
self.output_data_type = ""
self.output_nums = 0
self.elem_types = []
self.full_name = ""
# This value will be used as the priority field.
self.topological_index = topological_index
self._polymeric_input = {}
self._polymeric_output = {}
self._polymeric_scope_name = ""
self._subnode_count = 0
self._name_scope = ""
self.shape = []

def to_dict(self):
"""Converts the node object to dictionary format."""
return {
'name': self.name,
'type': self.type,
'name': self._name,
'type': self._type,
'attr': self._attr,
'input': self._input,
'output_i': self._output_i,
'output': self._output,
'output_i': self.output_i,
'proxy_input': self._proxy_input,
'proxy_output': self._proxy_output,
'subnode_count': self.subnode_count,
'independent_layout': self.independent_layout
'polymeric_input': self._polymeric_input,
'polymeric_output': self._polymeric_output,
'subnode_count': self._subnode_count,
'polymeric_scope_name': self._polymeric_scope_name
}

@property
@@ -76,154 +69,143 @@ class Node:
"""The id of this node, and id is unique in graph."""
return self._node_id

@staticmethod
def create_node_name(scope, base_name):
"""
The name of the node consists of the scope and the basic name.
@property
def name(self):
"""Get node name."""
return self._name

Args:
scope (str): The scope of node, such as 'Default/Conv2D'
base_name (str): The base name of node, such as 'Add11'.
@name.setter
def name(self, name):
"""Set node name."""
self._name = name

Returns:
str, a node name.
"""
return f'{scope}/{base_name}' if scope else base_name
@property
def node_type(self):
"""Get node type."""
return self._type

@node_type.setter
def node_type(self, node_type):
"""Set node type."""
self._type = node_type

@property
def attr(self):
"""Get node attr."""
return self._attr

def add_attr(self, attr_dict):
def update_attr(self, attr_dict):
"""
Update node attr.

Args:
attr_dict (dict[str, str]): The attr of node.
attr_dict (dict[str, str]): Format is {'<key>': '<value>'}.
"""
self._attr.update(attr_dict)

@property
def inputs(self):
def input(self):
"""
Get all input of current node.

Returns:
dict[str, dict], refer to the input attr.
dict[str, dict], format is {'<src_name>': {'shape': [], 'edge_type', 'scope'}}.
"""
return self._input

def add_inputs(self, src_name, input_attr):
def update_input(self, input_dict):
"""
Update input.

Args:
src_name (stc): The source node name.
input_attr (dict): The attribute of the input.

- shape (list): The shape of input tensor.
- edge_type (str): The type of edge, optional value refer to `EdgeTypeEnum`.
- data_type (str): The data type of the input.
- independent_layout (bool): Indicates whether the source nodes are laid out independently.
input_dict (dict[str, dict]): Format is {'<src_name>': {'shape': [], 'edge_type', 'scope'}}.
"""
self._input.update({src_name: input_attr})
self._input.update(input_dict)

@property
def output_i(self):
"""The memory address of this node when it is in run time."""
return self._output_i

@output_i.setter
def output_i(self, output_i):
"""Set memory address."""
self._output_i = output_i

def delete_inputs(self, src_name):
@property
def polymeric_input(self):
"""
Delete input attribute by the given source name.
The polymeric input is the input of the polymeric nodes.

Args:
src_name (str): The source node name.
Returns:
dict[str, dict], format is {'<src_name>': {'edge_type': '<value>'}}.
"""
self._input.pop(src_name)
return self._polymeric_input

def update_polymeric_input(self, polymeric_input):
"""The polymeric input is the input of the polymeric nodes."""
self._polymeric_input.update(polymeric_input)

@property
def outputs(self):
def output(self):
"""The output node of this node."""
return self._output

def add_outputs(self, dst_name, output_attr):
def update_output(self, output):
"""
Add a output node to this node.
Update output node.

Args:
dst_name (str): The name of the output node.
output_attr (dict: Same as the input attribute.
output (dict[str, TypedDict('NodeType', {'type': str})]): Format
is {"<node_name>": {"type": "<node type>"}}.
"""
self._output.update({dst_name: output_attr})

def delete_outputs(self, dst_name):
"""
Delete a output node.

Args:
dst_name (str): The name of the node to be deleted.
"""
self._output.pop(dst_name)
self._output.update(output)

@property
def proxy_inputs(self):
"""Return proxy input, type is dict."""
return self._proxy_input
def polymeric_output(self):
"""Get polymeric output."""
return self._polymeric_output

def add_proxy_inputs(self, src_name, attr):
def update_polymeric_output(self, polymeric_output):
"""
Add a proxy input to node.
Update polymeric output.

Args:
src_name (str): The name of the input node.
attr (dict): The attr of the input.
polymeric_output (dict[str, dict): Format is {dst_node.polymeric_scope_name:
{'edge_type': EdgeTypeEnum.data}}).

- edge_type (str): The edge type, refer to `EdgeTypeEnum`.
"""
self._proxy_input.update({src_name: attr})

def delete_proxy_inputs(self, src_name):
"""Delete a proxy input by the src name."""
self._proxy_input.pop(src_name)
self._polymeric_output.update(polymeric_output)

@property
def proxy_outputs(self):
"""Get proxy output, data type is dict."""
return self._proxy_output
def polymeric_scope_name(self):
"""Get polymeric scope name."""
return self._polymeric_scope_name

def add_proxy_outputs(self, dst_name, attr):
"""
Add a proxy output to node.
@polymeric_scope_name.setter
def polymeric_scope_name(self, name):
"""Set polymeric scope name."""
self._polymeric_scope_name = name

Args:
dst_name (str): The name of the output node.
attr (dict): The attr of the output.

- edge_type (str): The edge type, refer to `EdgeTypeEnum`.
"""
self._proxy_output.update({dst_name: attr})
@property
def subnode_count(self):
"""The sub node count of this node, if this node is a scope node, this count will not be zero."""
return self._subnode_count

def delete_proxy_outputs(self, dst_name):
"""Delete a proxy output by dst name."""
self._proxy_output.pop(dst_name)
@subnode_count.setter
def subnode_count(self, count):
"""Set sub node count."""
self._subnode_count = count

@staticmethod
def copy_node_without_input_output(src_node, dst_node):
"""
Copy a source node attribute to a new node, but not input and output.
@property
def name_scope(self):
"""Get name scope of this node."""
return self._name_scope

Args:
src_node (Node): The copied node.
dst_node (Node): The destination node.
"""
dst_node.full_name = src_node.full_name
dst_node.type = src_node.type
dst_node.output_i = src_node.output_i
dst_node.subnode_count = src_node.subnode_count
dst_node.scope = src_node.scope
dst_node.independent_layout = src_node.independent_layout
dst_node.output_shape = src_node.output_shape
dst_node.output_data_type = src_node.output_data_type
dst_node.output_nums = src_node.output_nums
dst_node.elem_types = src_node.elem_types
dst_node.add_attr(src_node.attr)
@name_scope.setter
def name_scope(self, name_scope):
"""Set name scope."""
self._name_scope = name_scope

def __str__(self):
return f'<Node, name: {self.name}, type: {self.type}>'
return f'<Node, name: {self._name}, type: {self._type}>'

+ 0
- 61
mindinsight/datavisual/data_transform/graph/node_tree.py View File

@@ -1,61 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
This file is used to define the node of graph and associated base types.
"""
from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError
from mindinsight.debugger.common.log import LOGGER as log


class NodeTree:
"""A class for building a node tree."""
def __init__(self, node_name='', node_type=None):
self.node_name = node_name
self._node_type = node_type
self._children = {}

@property
def node_type(self):
"""The property of node type."""
return self._node_type

@node_type.setter
def node_type(self, value):
"""Set the node type."""
self._node_type = value

def add(self, name, node_type=None):
"""Add sub node."""
sub_name = '/'.join([self.node_name, name]) if self.node_name else name
sub_node = NodeTree(sub_name, node_type)
self._children[name] = sub_node
return sub_node

def get(self, sub_name):
"""Get sub node."""
return self._children.get(sub_name)

def get_children(self):
"""Get all childrens."""
for name_scope, sub_node in self._children.items():
yield name_scope, sub_node

def remove(self, sub_name):
"""Remove sub node."""
try:
self._children.pop(sub_name)
except KeyError as err:
log.error("Failed to find node %s. %s", sub_name, err)
raise DebuggerParamValueError("Failed to find node {}".format(sub_name))

+ 0
- 237
mindinsight/datavisual/data_transform/histogram.py View File

@@ -1,237 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Histogram data."""
import math

from mindinsight.utils.exceptions import ParamValueError
from mindinsight.datavisual.utils.utils import calc_histogram_bins


def mask_invalid_number(num):
"""Mask invalid number to 0."""
if math.isnan(num) or math.isinf(num):
return type(num)(0)

return num


class Bucket:
"""
Bucket data class.

Args:
left (double): Left edge of the histogram bucket.
width (double): Width of the histogram bucket.
count (int): Count of numbers fallen in the histogram bucket.
"""
def __init__(self, left, width, count):
self._left = left
self._width = width
self._count = count

@property
def left(self):
"""Gets left edge of the histogram bucket."""
return self._left

@property
def count(self):
"""Gets count of numbers fallen in the histogram bucket."""
return self._count

@property
def width(self):
"""Gets width of the histogram bucket."""
return self._width

@property
def right(self):
"""Gets right edge of the histogram bucket."""
return self._left + self._width

def as_tuple(self):
"""Gets the bucket as tuple."""
return self._left, self._width, self._count

def __repr__(self):
"""Returns repr(self)."""
return "Bucket(left={}, width={}, count={})".format(self._left, self._width, self._count)


class Histogram:
"""
Histogram data class.

Args:
buckets (tuple[Bucket]): The buckets of histogram data.
max_val (number): The max value of histogram data.
min_val (number): The min value of histogram data.
count (int): The count of histogram data.
"""

# Max quantity of original buckets.
MAX_ORIGINAL_BUCKETS_COUNT = 90

def __init__(self, buckets, max_val, min_val, count):
self._visual_max = max_val
self._visual_min = min_val
self._count = count
self._original_buckets = buckets
# default bin number
self._visual_bins = calc_histogram_bins(count)
# Note that tuple is immutable, so sharing tuple is often safe.
self._re_sampled_buckets = ()

@property
def original_buckets_count(self):
"""Gets original buckets quantity."""
return len(self._original_buckets)

def set_visual_range(self, max_val: float, min_val: float, bins: int) -> None:
"""
Sets visual range for later re-sampling.

It's caller's duty to ensure input is valid.

Why we need visual range for histograms? Aligned buckets between steps can help users know about the trend of
tensors. Miss aligned buckets between steps might miss-lead users about the trend of a tensor. Because for
given tensor, if you have thinner buckets, count of every bucket will get lower, however, if you have
thicker buckets, count of every bucket will get higher. When they are displayed together, user might think
the histogram with thicker buckets has more values. This is miss-leading. So we need to unify buckets across
steps. Visual range for histogram is a technology for unifying buckets.

Args:
max_val (float): Max value for visual histogram.
min_val (float): Min value for visual histogram.
bins (int): Bins number for visual histogram.
"""
if max_val < min_val:
raise ParamValueError(
"Invalid input. max_val({}) is less or equal than min_val({}).".format(max_val, min_val))

if bins < 1:
raise ParamValueError("Invalid input bins({}). Must be greater than 0.".format(bins))

self._visual_max = max_val
self._visual_min = min_val
self._visual_bins = bins

# mark _re_sampled_buckets to empty
self._re_sampled_buckets = ()

def _calc_intersection_len(self, max1, min1, max2, min2):
"""Calculates intersection length of [min1, max1] and [min2, max2]."""
if max1 < min1:
raise ParamValueError(
"Invalid input. max1({}) is less than min1({}).".format(max1, min1))

if max2 < min2:
raise ParamValueError(
"Invalid input. max2({}) is less than min2({}).".format(max2, min2))

if min1 <= min2:
if max1 <= min2:
# return value must be calculated by max1.__sub__
return max1 - max1
if max1 <= max2:
return max1 - min2
# max1 > max2
return max2 - min2

# min1 > min2
if max2 <= min1:
return max2 - max2
if max2 <= max1:
return max2 - min1
return max1 - min1

def _re_sample_buckets(self):
"""Re-samples buckets according to visual_max, visual_min and visual_bins."""
if self._visual_max == self._visual_min:
# Adjust visual range if max equals min.
self._visual_max += 0.5
self._visual_min -= 0.5

width = (self._visual_max - self._visual_min) / self._visual_bins

if not self._count:
self._re_sampled_buckets = tuple(
Bucket(self._visual_min + width * i, width, 0)
for i in range(self._visual_bins))
return

re_sampled = []
original_pos = 0
original_bucket = self._original_buckets[original_pos]
for i in range(self._visual_bins):
cur_left = self._visual_min + width * i
cur_right = cur_left + width
cur_estimated_count = 0.0

# Skip no bucket range.
if cur_right <= original_bucket.left:
re_sampled.append(Bucket(cur_left, width, math.ceil(cur_estimated_count)))
continue

# Skip no intersect range.
while cur_left >= original_bucket.right:
original_pos += 1
if original_pos >= len(self._original_buckets):
break
original_bucket = self._original_buckets[original_pos]

# entering with this condition: cur_right > original_bucket.left and cur_left < original_bucket.right
while True:
if original_pos >= len(self._original_buckets):
break
original_bucket = self._original_buckets[original_pos]

intersection = self._calc_intersection_len(
min1=cur_left, max1=cur_right,
min2=original_bucket.left, max2=original_bucket.right)
if not original_bucket.width:
estimated_count = original_bucket.count
else:
estimated_count = (intersection / original_bucket.width) * original_bucket.count

cur_estimated_count += estimated_count
if cur_right > original_bucket.right:
# Need to sample next original bucket to this visual bucket.
original_pos += 1
else:
# Current visual bucket has taken all intersect buckets into account.
break

re_sampled.append(Bucket(cur_left, width, math.ceil(cur_estimated_count)))

self._re_sampled_buckets = tuple(re_sampled)

def buckets(self, convert_to_tuple=True):
"""
Get visual buckets instead of original buckets.

Args:
convert_to_tuple (bool): Whether convert bucket object to tuple.

Returns:
tuple, contains buckets.
"""
if not self._re_sampled_buckets:
self._re_sample_buckets()

if not convert_to_tuple:
return self._re_sampled_buckets

return tuple(bucket.as_tuple() for bucket in self._re_sampled_buckets)

+ 0
- 59
mindinsight/datavisual/data_transform/histogram_container.py View File

@@ -1,59 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Histogram data container."""
from mindinsight.datavisual.data_transform.histogram import Histogram, Bucket, mask_invalid_number
from mindinsight.datavisual.proto_files.mindinsight_summary_pb2 import Summary


class HistogramContainer:
"""
Histogram data container.

Args:
histogram_message (Summary.Histogram): Histogram message in summary file.
"""

def __init__(self, histogram_message: Summary.Histogram):
original_buckets = [Bucket(bucket.left, bucket.width, bucket.count) for bucket in histogram_message.buckets]
# Ensure buckets are sorted from min to max.
original_buckets.sort(key=lambda bucket: bucket.left)
self._count = sum(bucket.count for bucket in original_buckets)
self._max = mask_invalid_number(histogram_message.max)
self._min = mask_invalid_number(histogram_message.min)
self._histogram = Histogram(tuple(original_buckets), self._max, self._min, self._count)

@property
def max(self):
"""Gets max value of the tensor."""
return self._max

@property
def min(self):
"""Gets min value of the tensor."""
return self._min

@property
def count(self):
"""Gets valid number count of the tensor."""
return self._count

@property
def histogram(self):
"""Gets histogram data"""
return self._histogram

def buckets(self):
"""Gets histogram buckets"""
return self._histogram.buckets()

+ 0
- 30
mindinsight/datavisual/data_transform/image_container.py View File

@@ -1,30 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Image container."""
from mindinsight.datavisual.proto_files.mindinsight_summary_pb2 import Summary


class ImageContainer:
"""
Container for image to allow pickling.

Args:
image_message (Summary.Image): Image proto buffer message.
"""
def __init__(self, image_message: Summary.Image):
self.height = image_message.height
self.width = image_message.width
self.colorspace = image_message.colorspace
self.encoded_image = image_message.encoded_image

+ 0
- 1
mindinsight/datavisual/data_transform/loader_generators/__init__.py View File

@@ -12,4 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""This module defines the generator for the loaders."""

+ 1
- 6
mindinsight/datavisual/data_transform/loader_generators/data_loader_generator.py View File

@@ -19,7 +19,6 @@ This module generate loaders from summary logs.
"""
import os
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.common.exceptions import TrainJobNotExistError
from mindinsight.datavisual.data_access.file_handler import FileHandler
from mindinsight.datavisual.data_transform.data_loader import DataLoader
from mindinsight.datavisual.data_transform.loader_generators.loader_generator import MAX_DATA_LOADER_SIZE
@@ -27,7 +26,6 @@ from mindinsight.datavisual.data_transform.loader_generators.loader_struct impor
from mindinsight.datavisual.data_transform.loader_generators.loader_generator import LoaderGenerator
from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.utils.exceptions import PathNotExistError


class DataLoaderGenerator(LoaderGenerator):
@@ -243,9 +241,6 @@ class DataLoaderGenerator(LoaderGenerator):

"""
relative_path = self._get_relative_path_from_train_id(train_id)
try:
loader = self._generate_loader_by_relative_path(relative_path)
except PathNotExistError as ex:
raise TrainJobNotExistError(str(ex))
loader = self._generate_loader_by_relative_path(relative_path)

return loader

+ 0
- 12
mindinsight/datavisual/data_transform/loader_generators/loader_struct.py View File

@@ -13,7 +13,6 @@
# limitations under the License.
# ============================================================================
"""Loader struct."""
from mindinsight.datavisual.common.enums import CacheStatus


class LoaderStruct:
@@ -28,7 +27,6 @@ class LoaderStruct:
self._path = path
self._latest_update_time = latest_update_time
self._data_loader = data_loader
self._cache_status = CacheStatus.NOT_IN_CACHE

@property
def loader_id(self):
@@ -50,21 +48,11 @@ class LoaderStruct:
"""Get data loader."""
return self._data_loader

@property
def cache_status(self):
"""Get cache status of loader."""
return self._cache_status

@latest_update_time.setter
def latest_update_time(self, latest_update_time):
"""Set the latest update time of loader."""
self._latest_update_time = latest_update_time

@cache_status.setter
def cache_status(self, cache_status):
"""Set cache status of loader."""
self._cache_status = cache_status

def to_dict(self):
"""Transform LoaderStruct to dict."""
return dict(


+ 198
- 381
mindinsight/datavisual/data_transform/ms_data_loader.py View File

@@ -1,4 +1,4 @@
# Copyright 2019-2021 Huawei Technologies Co., Ltd
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -32,20 +32,13 @@ from mindinsight.datavisual.data_access.file_handler import FileHandler
from mindinsight.datavisual.data_transform.events_data import EventsData
from mindinsight.datavisual.data_transform.events_data import TensorEvent
from mindinsight.datavisual.data_transform.graph import MSGraph
from mindinsight.datavisual.data_transform.histogram import Histogram
from mindinsight.datavisual.data_transform.histogram_container import HistogramContainer
from mindinsight.datavisual.data_transform.image_container import ImageContainer
from mindinsight.datavisual.data_transform.tensor_container import TensorContainer, MAX_TENSOR_COUNT
from mindinsight.datavisual.proto_files import mindinsight_anf_ir_pb2 as anf_ir_pb2
from mindinsight.datavisual.proto_files import mindinsight_summary_pb2 as summary_pb2
from mindinsight.datavisual.proto_files import mindinsight_anf_ir_pb2 as anf_ir_pb2
from mindinsight.datavisual.utils import crc32
from mindinsight.datavisual.utils.tools import exception_no_raise_wrapper
from mindinsight.utils.computing_resource_mgr import ComputingResourceManager, Executor
from mindinsight.utils.exceptions import UnknownError

HEADER_SIZE = 8
CRC_STR_SIZE = 4
MAX_EVENT_STRING = 500000000


class MSDataLoader:
@@ -57,13 +50,16 @@ class MSDataLoader:
"""

def __init__(self, summary_dir):
self._init_instance(summary_dir)

def _init_instance(self, summary_dir):
self._summary_dir = summary_dir
self._valid_filenames = []
self._events_data = EventsData()
self._parser_list = []
self._parser_list.append(_SummaryParser(summary_dir))
self._parser_list.append(_PbParser(summary_dir))
self._latest_summary_filename = ''
self._latest_summary_file_size = 0
self._summary_file_handler = None
self._latest_pb_file_mtime = 0

def get_events_data(self):
"""Return events data read from log file."""
@@ -79,46 +75,17 @@ class MSDataLoader:
"""
deleted_files = set(old_filenames) - set(filenames)
if deleted_files:
logger.info("There are some files has been deleted, "
"we will reload all files in path %s.", self._summary_dir)
self.__init__(self._summary_dir)
logger.warning("There are some files has been deleted, "
"we will reload all files in path %s.", self._summary_dir)
self._init_instance(self._summary_dir)

def load(self, executor=None):
def load(self):
"""
Load all log valid files.

When the file is reloaded, it will continue to load from where it left off.

Args:
executor (Optional[executor]): The Executor instance.

Returns:
bool, True if the train job is finished loading.
"""
logger.debug("Start to load data in ms data loader.")
if isinstance(executor, Executor):
return self._load(executor)

if executor is not None:
raise TypeError("'executor' should be an Executor instance or None.")

with ComputingResourceManager.get_instance().get_executor() as new_executor:
while not self._load(new_executor):
pass
return True

def _load(self, executor):
"""
Load all log valid files.

When the file is reloaded, it will continue to load from where it left off.

Args:
executor (executor): The Executor instance.

Returns:
bool, True if the train job is finished loading.
"""
filenames = self.filter_valid_files()
if not filenames:
logger.warning("No valid files can be loaded, summary_dir: %s.", self._summary_dir)
@@ -127,295 +94,74 @@ class MSDataLoader:
self._valid_filenames = filenames
self._check_files_deleted(filenames, old_filenames)

finished = True
for parser in self._parser_list:
finished = parser.parse_files(executor, filenames, events_data=self._events_data) and finished
return finished
self._load_summary_files(self._valid_filenames)
self._load_pb_files(self._valid_filenames)

def filter_valid_files(self):
"""
Gets a list of valid files from the given file path.

Returns:
list[str], file name list.

"""
filenames = []
for filename in FileHandler.list_dir(self._summary_dir):
if FileHandler.is_file(FileHandler.join(self._summary_dir, filename)):
filenames.append(filename)

valid_filenames = []
for parser in self._parser_list:
valid_filenames.extend(parser.filter_files(filenames))

return list(set(valid_filenames))


class _Parser:
"""Parsed base class."""

def __init__(self, summary_dir):
self._summary_dir = summary_dir
self._latest_filename = ''

def parse_files(self, executor, filenames, events_data):
"""
Load files and parse files content.

Args:
executor (Executor): The executor instance.
filenames (list[str]): File name list.
events_data (EventsData): The container of event data.
"""
raise NotImplementedError

def filter_files(self, filenames):
"""
Gets a list of files that this parsing class can parse.

Args:
filenames (list[str]): File name list, like [filename1, filename2].

Returns:
list[str], filename list.
"""
raise NotImplementedError


class _PbParser(_Parser):
"""This class is used to parse pb file."""

def __init__(self, summary_dir):
super(_PbParser, self).__init__(summary_dir)
self._latest_mtime = 0

def parse_files(self, executor, filenames, events_data):
pb_filenames = self.filter_files(filenames)
pb_filenames = self.sort_files(pb_filenames)
for filename in pb_filenames:
if not self._set_latest_file(filename):
continue
future = executor.submit(self._parse_pb_file, self._summary_dir, filename)
def add_tensor_event(future_value):
tensor_event = future_value.result()
if tensor_event is not None:
events_data.add_tensor_event(tensor_event)
if future is not None:
future.add_done_callback(exception_no_raise_wrapper(add_tensor_event))
return False
return True

def filter_files(self, filenames):
"""
Get a list of pb files.

Args:
filenames (list[str]): File name list, like [filename1, filename2].

Returns:
list[str], filename list.

Returns:
bool, True if all the pb files are finished loading.
"""
return list(filter(lambda filename: re.search(r'\.pb$', filename), filenames))

def sort_files(self, filenames):
"""Sort by modify time increments and filenames increments."""
filenames = sorted(filenames, key=lambda file: (
FileHandler.file_stat(FileHandler.join(self._summary_dir, file)).mtime, file))
return filenames

def _set_latest_file(self, filename):
"""
Check if the file's modification time is newer than the last time it was loaded, and if so, set the time.

Args:
filename (str): The file name that needs to be checked and set.

Returns:
bool, Returns True if the file was modified earlier than the last time it was loaded, or False.
"""
mtime = FileHandler.file_stat(FileHandler.join(self._summary_dir, filename)).mtime
if mtime < self._latest_mtime or \
(mtime == self._latest_mtime and filename <= self._latest_filename):
return False

self._latest_mtime = mtime
self._latest_filename = filename

return True

@staticmethod
def _parse_pb_file(summary_dir, filename):
"""
Parse pb file and write content to `EventsData`.

Args:
filename (str): The file path of pb file.

Returns:
TensorEvent, if load pb file and build graph success, will return tensor event, else return None.
"""
file_path = FileHandler.join(summary_dir, filename)
logger.info("Start to load graph from pb file, file path: %s.", file_path)
filehandler = FileHandler(file_path)
model_proto = anf_ir_pb2.ModelProto()
try:
model_proto.ParseFromString(filehandler.read())
except ParseError:
logger.warning("The given file is not a valid pb file, file path: %s.", file_path)
return None

graph = MSGraph()

try:
graph.build_graph(model_proto.graph)
except Exception as ex:
# Normally, there are no exceptions, and it is only possible for users on the MindSpore side
# to dump other non-default graphs.
logger.error("Build graph failed, file path: %s.", file_path)
logger.exception(ex)
raise UnknownError(str(ex))

tensor_event = TensorEvent(wall_time=FileHandler.file_stat(file_path).mtime,
step=0,
tag=filename,
plugin_name=PluginNameEnum.GRAPH.value,
value=graph,
filename=filename)

logger.info("Build graph success, file path: %s.", file_path)
return tensor_event


class _SummaryParser(_Parser):
"""The summary file parser."""

def __init__(self, summary_dir):
super(_SummaryParser, self).__init__(summary_dir)
self._latest_file_size = 0
self._summary_file_handler = None

def parse_files(self, executor, filenames, events_data):
def _load_summary_files(self, filenames):
"""
Load summary file and parse file content.

Args:
executor (Executor): The executor instance.
filenames (list[str]): File name list.
events_data (EventsData): The container of event data.

Returns:
bool, True if all the summary files are finished loading.
"""
summary_files = self.filter_files(filenames)
summary_files = self.sort_files(summary_files)
if self._latest_filename in summary_files:
index = summary_files.index(self._latest_filename)
summary_files = summary_files[index:]
summary_files = self._filter_summary_files(filenames)
summary_files = self._sorted_summary_files(summary_files)

for filename in summary_files:
if self._latest_summary_filename and \
(self._compare_summary_file(self._latest_summary_filename, filename)):
continue

file_path = FileHandler.join(self._summary_dir, filename)

if filename != self._latest_filename:
if filename != self._latest_summary_filename:
self._summary_file_handler = FileHandler(file_path, 'rb')
self._latest_filename = filename
self._latest_file_size = 0
self._latest_summary_filename = filename
self._latest_summary_file_size = 0

new_size = FileHandler.file_stat(file_path).size
if new_size == self._latest_file_size:
if new_size == self._latest_summary_file_size:
continue

self._latest_summary_file_size = new_size
try:
if not self._load_single_file(self._summary_file_handler, executor, events_data):
self._latest_file_size = self._summary_file_handler.offset
else:
self._latest_file_size = new_size
# Wait for data in this file to be processed to avoid loading multiple files at the same time.
logger.debug("Parse summary file offset %d, file path: %s.", self._latest_file_size, file_path)
return False
self._load_single_file(self._summary_file_handler)
except UnknownError as ex:
logger.warning("Parse summary file failed, detail: %r,"
"file path: %s.", str(ex), file_path)
return True

def filter_files(self, filenames):
"""
Gets a list of summary files.

Args:
filenames (list[str]): File name list, like [filename1, filename2].

Returns:
list[str], filename list.
"""
return list(filter(
lambda filename: (re.search(r'summary\.\d+', filename)
and not filename.endswith("_lineage")), filenames))

def _load_single_file(self, file_handler, executor, events_data):
def _load_single_file(self, file_handler):
"""
Load a log file data.

Args:
file_handler (FileHandler): A file handler.
executor (Executor): The executor instance.
events_data (EventsData): The container of event data.

Returns:
bool, True if the summary file is finished loading.
"""
logger.debug("Load single summary file, file path: %s.", file_handler.file_path)
while True:
start_offset = file_handler.offset
try:
event_str = self.event_load(file_handler)
event_str = self._event_load(file_handler)
if event_str is None:
file_handler.reset_offset(start_offset)
return True
if len(event_str) > MAX_EVENT_STRING:
logger.warning("file_path: %s, event string: %d exceeds %d and drop it.",
file_handler.file_path, len(event_str), MAX_EVENT_STRING)
continue

future = executor.submit(self._event_parse, event_str, self._latest_filename)

def _add_tensor_event_callback(future_value):
tensor_values = future_value.result()
for tensor_value in tensor_values:
if tensor_value.plugin_name == PluginNameEnum.GRAPH.value:
try:
graph_tags = events_data.list_tags_by_plugin(PluginNameEnum.GRAPH.value)
except KeyError:
graph_tags = []

summary_tags = self.filter_files(graph_tags)
for tag in summary_tags:
events_data.delete_tensor_event(tag)

events_data.add_tensor_event(tensor_value)

if future is not None:
future.add_done_callback(exception_no_raise_wrapper(_add_tensor_event_callback))
return False
except (exceptions.CRCFailedError, exceptions.CRCLengthFailedError) as exc:
break

event = summary_pb2.Event.FromString(event_str)
self._event_parse(event)
except exceptions.CRCFailedError:
file_handler.reset_offset(start_offset)
file_size = file_handler.file_stat(file_handler.file_path).size
logger.error("Check crc failed and ignore this file, please check the integrity of the file, "
"file_path: %s, offset: %s, file size: %s. Detail: %s.",
file_handler.file_path, file_handler.offset, file_size, str(exc))
return True
logger.warning("Check crc faild and ignore this file, file_path=%s, "
"offset=%s.", file_handler.file_path, file_handler.offset)
break
except (OSError, DecodeError, exceptions.MindInsightException) as ex:
logger.error("Parse log file fail, and ignore this file, detail: %r, "
"file path: %s.", str(ex), file_handler.file_path)
return True
logger.warning("Parse log file fail, and ignore this file, detail: %r,"
"file path: %s.", str(ex), file_handler.file_path)
break
except Exception as ex:
logger.exception(ex)
raise UnknownError(str(ex))

@staticmethod
def event_load(file_handler):
def _event_load(self, file_handler):
"""
Load binary string to event string.

@@ -428,16 +174,18 @@ class _SummaryParser(_Parser):
# read the header
header_str = file_handler.read(HEADER_SIZE)
if not header_str:
logger.info("Load summary file finished, file_path=%s.", file_handler.file_path)
logger.info("End of file, file_path=%s.", file_handler.file_path)
return None
header_crc_str = file_handler.read(CRC_STR_SIZE)
if not header_crc_str:
header_crc_str = ''

if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE:
raise exceptions.CRCLengthFailedError("CRC header length or event header length is incorrect.")
if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE):
raise exceptions.CRCFailedError("The header of event crc is failed.")
logger.warning("Check header size and crc, record truncated at offset %s, "
"file_path=%s.", file_handler.offset, file_handler.file_path)
return None
if crc32.GetValueFromStr(header_crc_str) != crc32.GetMaskCrc32cValue(header_str, HEADER_SIZE):
raise exceptions.CRCFailedError()

# read the event body if integrity of header is verified
header = struct.unpack('Q', header_str)
@@ -451,106 +199,175 @@ class _SummaryParser(_Parser):
event_crc_str = ''

if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE:
raise exceptions.CRCLengthFailedError("The event string length or crc length is incorrect.")
if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len):
raise exceptions.CRCFailedError("The event string crc is incorrect.")
logger.warning("Check event crc, record truncated at offset %d, file_path: %s.",
file_handler.offset, file_handler.file_path)
return None
if crc32.GetValueFromStr(event_crc_str) != crc32.GetMaskCrc32cValue(event_str, event_len):
raise exceptions.CRCFailedError()

return event_str

@staticmethod
def _parse_summary_value(value, plugin):
"""
Parse summary value and create corresponding container according to plugin.

Args:
value (Summary.Value): Value message in summary file.
plugin (str): Plugin value.

Returns:
Union[Summary.Value, HistogramContainer, TensorContainer, ImageContainer], original summary value
or an instance of HistogramContainer or TensorContainer or ImageContainer.
"""
tensor_event_value = getattr(value, plugin)
if plugin == PluginNameEnum.HISTOGRAM.value:
tensor_event_value = HistogramContainer(tensor_event_value)
# Drop steps if original_buckets_count exceeds HistogramContainer.MAX_ORIGINAL_BUCKETS_COUNT
# to avoid time-consuming re-sample process.
if tensor_event_value.histogram.original_buckets_count > Histogram.MAX_ORIGINAL_BUCKETS_COUNT:
logger.info('original_buckets_count exceeds '
'HistogramContainer.MAX_ORIGINAL_BUCKETS_COUNT')
return None

elif plugin == PluginNameEnum.TENSOR.value:
tensor_event_value = TensorContainer(tensor_event_value)
if tensor_event_value.error_code is not None:
logger.warning('tag: %s/tensor, dims: %s, tensor count: %d exceeds %d and drop it.',
value.tag, tensor_event_value.dims, tensor_event_value.size, MAX_TENSOR_COUNT)

elif plugin == PluginNameEnum.IMAGE.value:
tensor_event_value = ImageContainer(tensor_event_value)

return tensor_event_value

@staticmethod
def _event_parse(event_str, latest_file_name):
def _event_parse(self, event):
"""
Transform `Event` data to tensor_event and update it to EventsData.

This method is static to avoid sending unnecessary objects to other processes.

Args:
event_str (str): Message event string in summary proto, data read from file handler.
latest_file_name (str): Latest file name.
event (Event): Message event in summary proto, data read from file handler.
"""

plugins = {
'scalar_value': PluginNameEnum.SCALAR,
'image': PluginNameEnum.IMAGE,
'histogram': PluginNameEnum.HISTOGRAM,
'tensor': PluginNameEnum.TENSOR
}
logger.debug("Start to parse event string. Event string len: %s.", len(event_str))
event = summary_pb2.Event.FromString(event_str)
logger.debug("Deserialize event string completed.")

ret_tensor_events = []
if event.HasField('summary'):
for value in event.summary.value:
for plugin in plugins:
if not value.HasField(plugin):
continue
plugin_name_enum = plugins[plugin]
logger.debug("Processing plugin value: %s.", plugin_name_enum)
tensor_event_value = _SummaryParser._parse_summary_value(value, plugin)
if tensor_event_value is None:
continue
if value.HasField('scalar_value'):
tag = '{}/{}'.format(value.tag, PluginNameEnum.SCALAR.value)
tensor_event = TensorEvent(wall_time=event.wall_time,
step=event.step,
tag=tag,
plugin_name=PluginNameEnum.SCALAR.value,
value=value.scalar_value)
self._events_data.add_tensor_event(tensor_event)

if value.HasField('image'):
tag = '{}/{}'.format(value.tag, PluginNameEnum.IMAGE.value)
tensor_event = TensorEvent(wall_time=event.wall_time,
step=event.step,
tag='{}/{}'.format(value.tag, plugin_name_enum.value),
plugin_name=plugin_name_enum.value,
value=tensor_event_value,
filename=latest_file_name)
logger.debug("Tensor event generated, plugin is %s, tag is %s, step is %s.",
plugin_name_enum, value.tag, event.step)
ret_tensor_events.append(tensor_event)

elif event.HasField('graph_def'):
tag=tag,
plugin_name=PluginNameEnum.IMAGE.value,
value=value.image)
self._events_data.add_tensor_event(tensor_event)

if event.HasField('graph_def'):
graph_proto = event.graph_def
graph = MSGraph()
graph.build_graph(event.graph_def)
graph.build_graph(graph_proto)
tensor_event = TensorEvent(wall_time=event.wall_time,
step=event.step,
tag=latest_file_name,
tag=self._latest_summary_filename,
plugin_name=PluginNameEnum.GRAPH.value,
value=graph,
filename=latest_file_name)
ret_tensor_events.append(tensor_event)
value=graph)

try:
graph_tags = self._events_data.list_tags_by_plugin(PluginNameEnum.GRAPH.value)
except KeyError:
graph_tags = []
summary_tags = self._filter_summary_files(graph_tags)
for tag in summary_tags:
self._events_data.delete_tensor_event(tag)

self._events_data.add_tensor_event(tensor_event)

def filter_valid_files(self):
"""
Gets a list of valid files from the given file path.

Returns:
list[str], file name list.

"""
filenames = []
for filename in FileHandler.list_dir(self._summary_dir):
if FileHandler.is_file(FileHandler.join(self._summary_dir, filename)):
filenames.append(filename)

valid_filenames = []
valid_filenames.extend(self._filter_summary_files(filenames))
valid_filenames.extend(self._filter_pb_files(filenames))
return list(set(valid_filenames))

@staticmethod
def _filter_summary_files(filenames):
"""
Gets a list of summary files.

Args:
filenames (list[str]): File name list, like [filename1, filename2].

return ret_tensor_events
Returns:
list[str], filename list.
"""
return list(filter(
lambda filename: (re.search(r'summary\.\d+', filename)
and not filename.endswith("_lineage")), filenames))

@staticmethod
def _compare_summary_file(current_file, dst_file):
"""
Compare the creation times of the two summary log files.

Args:
current_file (str): Must be the summary log file path.
dst_file (str): Must be the summary log file path.

Returns:
bool, returns True if the current file is new, or False if not.
"""
current_time = int(re.search(r'summary\.(\d+)', current_file)[1])
dst_time = int(re.search(r'summary\.(\d+)', dst_file)[1])
if current_time > dst_time or (current_time == dst_time and current_file > dst_file):
return True
return False

def sort_files(self, filenames):
@staticmethod
def _sorted_summary_files(summary_files):
"""Sort by creating time increments and filenames decrement."""
filenames = sorted(filenames,
filenames = sorted(summary_files,
key=lambda filename: (-int(re.search(r'summary\.(\d+)', filename)[1]), filename),
reverse=True)
return filenames

@staticmethod
def _filter_pb_files(filenames):
"""
Get a list of pb files.

Args:
filenames (list[str]): File name list, like [filename1, filename2].

Returns:
list[str], filename list.
"""
return list(filter(lambda filename: re.search(r'\.pb$', filename), filenames))

def _load_pb_files(self, filenames):
"""
Load and parse the pb files.

Args:
filenames (list[str]): File name list, like [filename1, filename2].

Returns:
list[str], filename list.
"""
pb_filenames = self._filter_pb_files(filenames)
pb_filenames = sorted(pb_filenames, key=lambda file: FileHandler.file_stat(
FileHandler.join(self._summary_dir, file)).mtime)
for filename in pb_filenames:
mtime = FileHandler.file_stat(FileHandler.join(self._summary_dir, filename)).mtime
if mtime <= self._latest_pb_file_mtime:
continue
self._latest_pb_file_mtime = mtime
self._parse_pb_file(filename)

def _parse_pb_file(self, filename):
"""
Parse pb file and write content to `EventsData`.

Args:
filename (str): The file path of pb file.
"""
file_path = FileHandler.join(self._summary_dir, filename)
logger.info("Start to load graph from pb file, file path: %s.", file_path)
filehandler = FileHandler(file_path)
model_proto = anf_ir_pb2.ModelProto()
try:
model_proto.ParseFromString(filehandler.read())
except ParseError:
logger.warning("The given file is not a valid pb file, file path: %s.", file_path)
return

graph = MSGraph()
graph.build_graph(model_proto.graph)
tensor_event = TensorEvent(wall_time=FileHandler.file_stat(file_path),
step=0,
tag=filename,
plugin_name=PluginNameEnum.GRAPH.value,
value=graph)
self._events_data.add_tensor_event(tensor_event)

+ 5
- 147
mindinsight/datavisual/data_transform/reservoir.py View File

@@ -17,28 +17,7 @@
import random
import threading

from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.datavisual.utils.utils import calc_histogram_bins


def binary_search(samples, target):
"""Binary search target in samples."""
left = 0
right = len(samples) - 1
while left <= right:
mid = (left + right) // 2
if target < samples[mid].step:
right = mid - 1
elif target > samples[mid].step:
left = mid + 1
else:
return mid

# if right is -1, it is less than the first one.
# if list is [1, 2, 4], target is 3, right will be 1, so wo will insert by 2.
return right + 1


class Reservoir:
@@ -86,28 +65,18 @@ class Reservoir:
"""
with self._mutex:
if len(self._samples) < self._samples_max_size or self._samples_max_size == 0:
self._add_sample(sample)
self._samples.append(sample)
else:
# Use the Reservoir Sampling algorithm to replace the old sample.
rand_int = self._sample_selector.randint(0, self._sample_counter)
rand_int = self._sample_selector.randint(
0, self._sample_counter)
if rand_int < self._samples_max_size:
self._samples.pop(rand_int)
self._samples.append(sample)
else:
self._samples = self._samples[:-1]
self._add_sample(sample)
self._samples[-1] = sample
self._sample_counter += 1

def _add_sample(self, sample):
"""Search the index and add sample."""
if not self._samples or sample.step > self._samples[-1].step:
self._samples.append(sample)
return
index = binary_search(self._samples, sample.step)
if index == len(self._samples):
self._samples.append(sample)
else:
self._samples.insert(index, sample)

def remove_sample(self, filter_fun):
"""
Remove the samples from Reservoir that do not meet the filter criteria.
@@ -137,114 +106,3 @@ class Reservoir:
round(self._sample_counter * sample_remaining_rate))

return remove_size


class _VisualRange:
"""Simple helper class to merge visual ranges."""
def __init__(self):
self._max = 0.0
self._min = 0.0
self._updated = False

def update(self, max_val: float, min_val: float) -> None:
"""
Merge visual range with given range.

Args:
max_val (float): Max value of given range.
min_val (float): Min value of given range.

"""
if not self._updated:
self._max = max_val
self._min = min_val
self._updated = True
return

if max_val > self._max:
self._max = max_val

if min_val < self._min:
self._min = min_val

@property
def max(self):
"""Gets max value of current range."""
return self._max

@property
def min(self):
"""Gets min value of current range."""
return self._min


class HistogramReservoir(Reservoir):
"""
Reservoir for histogram, which needs updating range over all steps.

Args:
size (int): Container Size. If the size is 0, the container is not limited.
"""
def __init__(self, size):
super().__init__(size)
# Marker to avoid redundant calc for unchanged histograms.
self._visual_range_up_to_date = False

def add_sample(self, sample):
"""Adds sample, see parent class for details."""
super().add_sample(sample)
self._visual_range_up_to_date = False

def samples(self):
"""Return all stored samples."""
with self._mutex:
if self._visual_range_up_to_date:
return list(self._samples)

# calc visual range
visual_range = _VisualRange()
max_count = 0
for sample in self._samples:
histogram_container = sample.value
if histogram_container.count == 0:
# ignore empty tensor
continue
max_count = max(histogram_container.count, max_count)
visual_range.update(histogram_container.max, histogram_container.min)

if visual_range.max == visual_range.min and not max_count:
logger.debug("Max equals to min. Count is zero.")

bins = calc_histogram_bins(max_count)

# update visual range
logger.debug(
"Visual histogram: min %s, max %s, bins %s, max_count %s.",
visual_range.min,
visual_range.max,
bins,
max_count)
for sample in self._samples:
histogram = sample.value.histogram
histogram.set_visual_range(visual_range.max, visual_range.min, bins)

self._visual_range_up_to_date = True
return list(self._samples)


class ReservoirFactory:
"""Factory class to get reservoir instances."""
def create_reservoir(self, plugin_name: str, size: int) -> Reservoir:
"""
Creates reservoir for given plugin name.

Args:
plugin_name (str): Plugin name
size (int): Container Size. If the size is 0, the container is not limited.

Returns:
Reservoir, reservoir instance for given plugin name.
"""
if plugin_name in (PluginNameEnum.HISTOGRAM.value, PluginNameEnum.TENSOR.value):
return HistogramReservoir(size)
return Reservoir(size)

+ 0
- 15
mindinsight/datavisual/data_transform/summary_parser/__init__.py View File

@@ -1,15 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Summary parser module used to parse summary data and save it to csv file and image."""

+ 0
- 179
mindinsight/datavisual/data_transform/summary_parser/event_parser.py View File

@@ -1,179 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Parse summary file and save it local file."""

import os
import time

from google.protobuf.message import DecodeError

from mindinsight.datavisual.common import exceptions
from mindinsight.datavisual.common.log import parse_summary_logger
from mindinsight.datavisual.proto_files import lazy_read_pb2
from mindinsight.datavisual.data_access.file_handler import FileHandler
from mindinsight.datavisual.data_transform.summary_parser.image_writer import ImageWriter
from mindinsight.datavisual.data_transform.summary_parser.scalar_writer import ScalarWriter

from ..ms_data_loader import _SummaryParser

HEADER_SIZE = 8
CRC_STR_SIZE = 4
MAX_EVENT_STRING = 500000000
SCALAR = 'scalar_value'
IMAGE = 'image'
INFO_INTERVAL = 10
RETRY_TIMES = 2


class EventParser:
"""Parse summary file and save it to local file."""
def __init__(self, summary_file, output):
self.summary_file = summary_file
self._output = output
self._scalar_writer = ScalarWriter(self._output)
self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE))
self._file_size = 0
self._process_info = 0
self._image_check = False
self._scalar_check = False

def parse(self):
"""Load summary file and parse file content."""

summary_file_handler = FileHandler(self.summary_file, 'rb')

self._file_size = os.path.getsize(self.summary_file)
# when current parsed size bigger than self._process_info, print process
self._process_info = self._file_size // INFO_INTERVAL

parse_summary_logger.info("Loading %s.", self.summary_file)
result = self._load(summary_file_handler)

if result:
warning = ''
scalar_path = FileHandler.join(self._output, "scalar.csv")
image_path = FileHandler.join(self._output, IMAGE)

if not self._image_check:
warning = warning + " The summary file contains no image."
else:
parse_summary_logger.info("Images are written in %s.", image_path)

if not self._scalar_check:
warning = warning + " The summary file contains no scalar value."
else:
parse_summary_logger.info("Writing scalar data into %s.", scalar_path)

self._scalar_writer.write()
if warning:
parse_summary_logger.warning(warning)

parse_summary_logger.info("Finished loading %s.", self.summary_file)

def _load(self, file_handler):
"""
Load a log file data.

Args:
file_handler (FileHandler): A file handler.

Returns:
bool, True if the summary file is finished loading.
"""
crc_check_time = 0
while True:
start_offset = file_handler.offset
try:
event_str = _SummaryParser.event_load(file_handler)
if start_offset != file_handler.offset:
self._print_process(file_handler)
crc_check_time = 0
if event_str is None:
return True
if len(event_str) > MAX_EVENT_STRING:
parse_summary_logger.warning("file_path: %s, event string: %d exceeds %d and drop it.",
file_handler.file_path, len(event_str), MAX_EVENT_STRING)
continue
self._event_parse(event_str)
except exceptions.CRCLengthFailedError:
if crc_check_time > RETRY_TIMES:
parse_summary_logger.error(
"Check crc length failed, please check the summary file integrity, "
"the file may be in transfer, file_path: %s, offset=%s.",
file_handler.file_path, start_offset)
return True
parse_summary_logger.warning(
"Check crc failed, retrying %d/%d times.", crc_check_time + 1, RETRY_TIMES + 1)
file_handler.reset_offset(start_offset)
crc_check_time += 1
time.sleep(0.5)
except exceptions.CRCFailedError:
parse_summary_logger.error(
"Check crc failed, the file may have been modified, file_path=%s, offset=%s.",
file_handler.file_path, start_offset)
return True
except (OSError, DecodeError, exceptions.MindInsightException) as ex:
parse_summary_logger.error("Parse file fail, detail: %r, file path: %s.", str(ex),
file_handler.file_path)
return False

def _print_process(self, file_handler):
"""Prints the current parsing progress based on the progress of the read file."""
current_offset = file_handler.offset
if current_offset >= self._process_info:
parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", current_offset, self._file_size,
100 * current_offset // os.path.getsize(self.summary_file))
self._process_info += self._file_size // INFO_INTERVAL
if self._process_info > os.path.getsize(self.summary_file):
self._process_info = os.path.getsize(self.summary_file)

def _event_parse(self, event_str):
"""
Transform `Event` data to event and extract the scalar and image data.

Args:
event_str (str): Message event string in summary proto, data read from file handler.
"""

plugins = [SCALAR, IMAGE]

event = lazy_read_pb2.Event.FromString(event_str)

if event.HasField('summary'):
for value in event.summary.value:
for plugin in plugins:
if not value.HasField(plugin):
continue
self._parse_summary_value(value.tag, event.step, event.wall_time, value, plugin)

def _parse_summary_value(self, tag, step, wall_time, value, plugin):
"""
Parse summary value and write corresponding file according to plugin.

Args:
tag (str): value tag
step (int): train step
wall_time (float): Timestamp
value (Summary.Value): Value message in summary file.
plugin (str): Plugin value.
"""
if plugin == SCALAR:
self._scalar_writer.add((tag, step, wall_time, value.scalar_value))
self._scalar_check = True

elif plugin == IMAGE:
self._image_writer.add((tag, step, value.image.encoded_image))
self._image_writer.write()
self._image_check = True

+ 0
- 54
mindinsight/datavisual/data_transform/summary_parser/image_writer.py View File

@@ -1,54 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Image Writer.

This module write scalar into a csv file.
"""
import os
from urllib.parse import quote

from mindinsight.datavisual.data_transform.summary_parser.writer import Writer


class ImageWriter(Writer):
"""ImageWriter write image into a png file."""
def __init__(self, file_path):
"""
Init ImageWriter.

Args:
file_path (str): A directory path, e.g. '/output/image/'.
"""
self._file_path = file_path
self._image_data = []

def add(self, value):
"""
Add value.

Args:
value (object): tag and step and image value.
"""
self._image_data.append(value)

def write(self):
"""Write file."""
for i in range(len(self._image_data)):
tag = quote(self._image_data[i][0], safe="")
with os.fdopen(os.open("{}/{}_{}.png".format(self._file_path, tag, self._image_data[i][1]),
os.O_WRONLY | os.O_CREAT, 0o600), 'wb') as fp:
fp.write(self._image_data[i][2])
self._image_data = []

+ 0
- 52
mindinsight/datavisual/data_transform/summary_parser/scalar_writer.py View File

@@ -1,52 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Scalar Writer.

This module write scalar into a csv file.
"""
import csv
import os

from mindinsight.datavisual.data_transform.summary_parser.writer import Writer


class ScalarWriter(Writer):
"""ScalarWriter write scalar into a csv file."""
def __init__(self, file_path):
"""
Init ScalarWriter.

Args:
file_path (str): A directory path, e.g. '/output/'.
"""
self._file_path = file_path
self._scalar_data = [("tag", "step", "wall_time (unit: seconds)", "value")]

def add(self, value):
"""
Add value.

Args:
value (object): wall_time, tag and step and scalar value.
"""
self._scalar_data.append(value)

def write(self):
"""Write file."""
with os.fdopen(os.open('{}/scalar.csv'.format(self._file_path), os.O_WRONLY | os.O_CREAT, 0o600), 'w',
encoding='utf-8') as fp:
writer = csv.writer(fp, dialect='excel')
writer.writerows(self._scalar_data)

+ 0
- 32
mindinsight/datavisual/data_transform/summary_parser/writer.py View File

@@ -1,32 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Base writer."""
from abc import abstractmethod


class Writer:
"""Base writer for writers."""
@abstractmethod
def add(self, value):
"""
Abstract method for adding value.

Args:
value (object): scalar, tensor or image value with wall_time, tag and step.
"""

@abstractmethod
def write(self):
"""Abstract method for writing file."""

+ 85
- 316
mindinsight/datavisual/data_transform/summary_watcher.py View File

@@ -1,4 +1,4 @@
# Copyright 2020-2021 Huawei Technologies Co., Ltd
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,7 +14,6 @@
# ============================================================================
"""Summary watcher module."""
import json
import os
import re
import datetime
@@ -22,46 +21,27 @@ from pathlib import Path
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.utils.tools import Counter
from mindinsight.datavisual.utils.utils import contains_null_byte
from mindinsight.datavisual.common.exceptions import MaxCountExceededError
from mindinsight.utils.exceptions import FileSystemPermissionError
LINEAGE_SUMMARY_SUFFIX = '_lineage'
EXPLAIN_SUMMARY_SUFFIX = '_explain'
DUMP_FILE_PREFIX = 'dump_'
class SummaryWatcher:
"""SummaryWatcher class."""
SUMMARY_FILENAME_REGEX = r'summary\.(?P<timestamp>\d+)'
PB_FILENAME_REGEX = r'\.pb$'
PROFILER_DIRECTORY_REGEX = r'^profiler'
CLUSTER_PROFILER_DIRECTORY_REGEX = r'^cluster_profiler$'
MAX_SUMMARY_DIR_COUNT = 999
# scan at most 20000 files/directories (approximately 1 seconds)
# if overall is False in SummaryWatcher.list_summary_directories
# if overall=False in SummaryWatcher.list_summary_directories
# to avoid long-time blocking
MAX_SCAN_COUNT = 20000
def __init__(self):
self._analyzers = []
def register_folder_analyzer(self, analyzer):
"""Register folder analyzer."""
self._analyzers.append(analyzer)
def list_summary_directories(self, summary_base_dir, overall=True, list_explain=False):
def list_summary_directories(self, summary_base_dir, overall=True):
"""
List summary directories within base directory.
Args:
summary_base_dir (str): Path of summary base directory.
overall (bool): Limit the total num of scanning if overall is False.
list_explain (bool): Indicates whether to list only the mindexplain folder.
Default is False, means not to list mindexplain folder.
Returns:
list, list of summary directory info, each of which including the following attributes.
@@ -69,23 +49,25 @@ class SummaryWatcher:
starting with "./".
- create_time (datetime): Creation time of summary file.
- update_time (datetime): Modification time of summary file.
- profiler (dict): profiler info, including profiler subdirectory path, profiler creation time and
profiler modification time.
Examples:
>>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
>>> summary_watcher = SummaryWatcher()
>>> directories = summary_watcher.list_summary_directories('/summary/base/dir')
"""
if contains_null_byte(summary_base_dir=summary_base_dir):
if self._contains_null_byte(summary_base_dir=summary_base_dir):
return []
relative_path = os.path.join('.', '')
if not self._is_valid_summary_directory(summary_base_dir, relative_path):
if not os.path.exists(summary_base_dir):
logger.warning('Path of summary base directory not exists.')
return []
if not os.path.isdir(summary_base_dir):
logger.warning('Path of summary base directory is not a valid directory.')
return []
summary_dict = {}
counter = Counter(max_count=None if overall else self.MAX_SCAN_COUNT)
scan_count = 0
try:
entries = os.scandir(summary_base_dir)
@@ -93,77 +75,70 @@ class SummaryWatcher:
logger.error('Path of summary base directory is not accessible.')
raise FileSystemPermissionError('Path of summary base directory is not accessible.')
# sort in ascending order according to modification time.
entries = [entry for entry in entries if not entry.is_symlink()]
entries = sorted(entries, key=lambda x: x.stat().st_mtime)
for entry in entries:
if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
break
try:
counter.add()
except MaxCountExceededError:
logger.info('Stop further scanning due to overall is False and '
'number of scanned files exceeds upper limit.')
break
relative_path = os.path.join('.', '')
if entry.is_symlink():
pass
elif entry.is_file():
self._update_summary_dict(summary_dict, summary_base_dir, relative_path, entry, list_explain)
self._update_summary_dict(summary_dict, relative_path, entry)
elif entry.is_dir():
self._update_summary_dict(summary_dict, summary_base_dir, relative_path, entry, list_explain)
entry_path = os.path.realpath(os.path.join(summary_base_dir, entry.name))
self._scan_subdir_entries(summary_dict, summary_base_dir, entry_path, entry, counter, list_explain)
directories = []
for key, value in summary_dict.items():
directory = {
'relative_path': key,
**value
}
directories.append(directory)
full_path = os.path.realpath(os.path.join(summary_base_dir, entry.name))
try:
subdir_entries = os.scandir(full_path)
except PermissionError:
logger.warning('Path of %s under summary base directory is not accessible.', entry.name)
else:
for subdir_entry in subdir_entries:
if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
break
subdir_relative_path = os.path.join('.', entry.name)
if subdir_entry.is_symlink():
pass
elif subdir_entry.is_file():
self._update_summary_dict(summary_dict, subdir_relative_path, subdir_entry)
scan_count += 1
if not overall and scan_count >= self.MAX_SCAN_COUNT:
break
scan_count += 1
if not overall and scan_count >= self.MAX_SCAN_COUNT:
logger.info('Stop further scanning due to overall is False and '
'number of scanned files exceeds upper limit.')
break
directories = [{
'relative_path': key,
'create_time': value['ctime'],
'update_time': value['mtime'],
} for key, value in summary_dict.items()]
# sort by update time in descending order and relative path in ascending order
directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x['relative_path']))
return directories
def _scan_subdir_entries(self, summary_dict, summary_base_dir, entry_path, entry, counter, list_explain):
def _contains_null_byte(self, **kwargs):
"""
Scan subdir entries.
Check if arg contains null byte.
Args:
summary_dict (dict): Temporary data structure to hold summary directory info.
summary_base_dir (str): Path of summary base directory.
entry_path(str): Path entry.
entry_name (str): Name of entry.
counter (Counter): An instance of CountLimiter.
list_explain (bool): Indicates whether to list only the mindexplain folder.
"""
try:
subdir_entries = os.scandir(entry_path)
except PermissionError:
logger.warning('Path of %s under summary base directory is not accessible.', entry.name)
return
kwargs (Any): Check if arg contains null byte.
# sort in ascending order according to modification time.
subdir_entries = [subdir_entry for subdir_entry in subdir_entries if not subdir_entry.is_symlink()]
subdir_entries = sorted(subdir_entries, key=lambda x: x.stat().st_mtime)
for subdir_entry in subdir_entries:
if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
break
try:
counter.add()
except MaxCountExceededError:
logger.info('Stop further scanning due to overall is False and '
'number of scanned files exceeds upper limit.')
break
subdir_relative_path = os.path.join('.', entry.name)
if subdir_entry.is_symlink():
pass
self._update_summary_dict(summary_dict, summary_base_dir, subdir_relative_path, subdir_entry, list_explain)
Returns:
bool, indicates if any arg contains null byte.
"""
for key, value in kwargs.items():
if not isinstance(value, str):
continue
if '\x00' in value:
logger.warning('%s contains null byte \\x00.', key)
return True
relative_path = './'
self._check_by_analyzers(entry, summary_base_dir, relative_path, summary_dict)
return False
def _is_valid_summary_directory(self, summary_base_dir, relative_path):
"""
@@ -179,9 +154,11 @@ class SummaryWatcher:
"""
summary_base_dir = os.path.realpath(summary_base_dir)
summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
if summary_base_dir == summary_directory:
return True
if not os.path.exists(summary_directory):
logger.info('Path of summary directory not exists.')
logger.warning('Path of summary directory not exists.')
return False
if not os.path.isdir(summary_directory):
@@ -196,135 +173,39 @@ class SummaryWatcher:
return True
def _update_summary_dict(self, summary_dict, summary_base_dir, relative_path, entry, list_explain):
def _update_summary_dict(self, summary_dict, relative_path, entry):
"""
Update summary_dict with ctime and mtime.
Args:
summary_dict (dict): Temporary data structure to hold summary directory info.
summary_base_dir (str): Path of summary base directory.
relative_path (str): Relative path of summary directory, referring to summary base directory,
starting with "./" .
entry (DirEntry): Directory entry instance needed to check with regular expression.
list_explain (bool): Indicates whether to list only the mindexplain folder.
"""
try:
ctime, mtime = self._get_stat_time(entry)
except FileNotFoundError:
logger.warning('File %s not found', entry.name)
summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
if summary_pattern is None and pb_pattern is None:
return
if entry.is_file():
summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
if not self._is_valid_pattern_result(summary_pattern, pb_pattern, list_explain, entry):
return
timestamp = None
if summary_pattern is not None:
timestamp = int(summary_pattern.groupdict().get('timestamp'))
try:
# extract created time from filename
ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
except OverflowError:
return
if relative_path not in summary_dict:
summary_dict[relative_path] = _new_entry(ctime, mtime)
job_dict = _get_explain_job_info(summary_base_dir, relative_path, timestamp)
summary_dict[relative_path].update(job_dict)
if summary_dict[relative_path]['create_time'] < ctime:
summary_dict[relative_path].update({'create_time': ctime, 'update_time': mtime})
job_dict = _get_explain_job_info(summary_base_dir, relative_path, timestamp)
summary_dict[relative_path].update(job_dict)
if not summary_pattern:
summary_dict[relative_path]['graph_files'] += 1
elif entry.name.endswith(LINEAGE_SUMMARY_SUFFIX):
summary_dict[relative_path]['lineage_files'] += 1
elif entry.name.endswith(EXPLAIN_SUMMARY_SUFFIX):
summary_dict[relative_path]['explain_files'] += 1
else:
summary_dict[relative_path]['summary_files'] += 1
self._check_by_analyzers(entry, summary_base_dir, relative_path, summary_dict)
elif entry.is_dir():
self._check_by_analyzers(entry, summary_base_dir, relative_path, summary_dict)
if list_explain:
if summary_pattern is not None:
timestamp = int(summary_pattern.groupdict().get('timestamp'))
try:
# extract created time from filename
ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
except OverflowError:
return
else:
ctime = datetime.datetime.fromtimestamp(entry.stat().st_ctime).astimezone()
cluster_profiler_type, is_cluster_profiler = \
self._find_cluster_profiler_dir(entry, summary_base_dir, relative_path)
profiler_type, is_profiler = self._find_profiler_dir(entry, summary_base_dir, relative_path)
if is_cluster_profiler or is_profiler:
if is_cluster_profiler:
profiler_type = cluster_profiler_type
profiler = {
'directory': os.path.join('.', entry.name),
'create_time': ctime,
'update_time': mtime,
"profiler_type": profiler_type
}
if relative_path in summary_dict:
summary_dict[relative_path]['profiler'] = profiler
else:
summary_dict[relative_path] = _new_entry(ctime, mtime, profiler)
def _check_by_analyzers(self, entry, summary_base_dir, relative_path, summary_dict):
"""Check by all analyzers."""
try:
ctime, mtime = self._get_stat_time(entry)
except FileNotFoundError:
logger.warning('File %s not found', entry.name)
return
# extract modified time from filesystem
mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone()
for analyzer in self._analyzers:
register_info = analyzer.analyze(entry, summary_base_dir, relative_path)
if register_info:
if relative_path not in summary_dict:
summary_dict[relative_path] = _new_entry(ctime, mtime)
summary_dict[relative_path].update(register_info)
def _get_stat_time(self, entry):
"""Get ctime and mtime."""
stat = entry.stat()
ctime = datetime.datetime.fromtimestamp(stat.st_ctime).astimezone()
mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()
return ctime, mtime
def _find_profiler_dir(self, entry, summary_base_dir, relative_path):
"""Find profiler dir by the given relative path."""
profiler_pattern = re.search(self.PROFILER_DIRECTORY_REGEX, entry.name)
full_dir_path = os.path.join(summary_base_dir, relative_path, entry.name)
is_valid_profiler_dir, profiler_type = self._is_valid_profiler_directory(full_dir_path)
if profiler_pattern is None or not is_valid_profiler_dir:
return profiler_type, False
return profiler_type, True
def _find_cluster_profiler_dir(self, entry, summary_base_dir, relative_path):
"""Find profiler cluster dir by the given relative path."""
cluster_profiler_pattern = re.search(self.CLUSTER_PROFILER_DIRECTORY_REGEX, entry.name)
full_dir_path = os.path.join(summary_base_dir, relative_path, entry.name)
is_valid_cluster_profiler_dir, profiler_type = self._is_valid_cluster_profiler_directory(full_dir_path)
if cluster_profiler_pattern is None or not is_valid_cluster_profiler_dir:
return profiler_type, False
return profiler_type, True
def _is_valid_pattern_result(self, summary_pattern, pb_pattern, list_explain, entry):
"""Check the pattern result is valid."""
if summary_pattern is None and pb_pattern is None:
return False
if list_explain and not entry.name.endswith(EXPLAIN_SUMMARY_SUFFIX):
return False
if not list_explain and entry.name.endswith(EXPLAIN_SUMMARY_SUFFIX):
return False
return True
if relative_path not in summary_dict or summary_dict[relative_path]['ctime'] < ctime:
summary_dict[relative_path] = {
'ctime': ctime,
'mtime': mtime,
}
def is_summary_directory(self, summary_base_dir, relative_path):
"""
@@ -343,7 +224,7 @@ class SummaryWatcher:
>>> summary_watcher = SummaryWatcher()
>>> summaries = summary_watcher.is_summary_directory('/summary/base/dir', './job-01')
"""
if contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
if self._contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
return False
if not self._is_valid_summary_directory(summary_base_dir, relative_path):
@@ -357,55 +238,15 @@ class SummaryWatcher:
raise FileSystemPermissionError('Path of summary base directory is not accessible.')
for entry in entries:
if entry.is_symlink():
if entry.is_symlink() or not entry.is_file():
continue
summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
if summary_pattern is not None and entry.is_file():
return True
pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
if pb_pattern is not None and entry.is_file():
if summary_pattern or pb_pattern:
return True
if entry.is_dir():
profiler_pattern = re.search(self.PROFILER_DIRECTORY_REGEX, entry.name)
cluster_profiler_pattern = re.search(self.CLUSTER_PROFILER_DIRECTORY_REGEX, entry.name)
if profiler_pattern is not None or cluster_profiler_pattern is not None:
full_path = os.path.realpath(os.path.join(summary_directory, entry.name))
if self._is_valid_profiler_directory(full_path)[0] or \
self._is_valid_cluster_profiler_directory(full_path)[0]:
return True
if os.path.exists(os.path.join(summary_directory, os.path.join(entry.name, ".metadata"))):
return True
return False
def _is_valid_profiler_directory(self, directory):
profiler_type = ""
try:
from mindinsight.profiler.common.util import analyse_device_list_from_profiler_dir
device_list, profiler_type = analyse_device_list_from_profiler_dir(directory)
except ImportError:
device_list = []
return bool(device_list), profiler_type
def _is_valid_cluster_profiler_directory(self, directory):
"""Determine whether it is a valid cluster profiler."""
cluster_profiler_type = 'cluster'
entries = os.scandir(directory)
for entry in entries:
if entry.is_symlink():
continue
if entry.is_dir():
full_path = os.path.join(directory, entry.name, 'profiler')
is_profile, profiler_type = self._is_valid_profiler_directory(full_path)
if is_profile:
return is_profile, cluster_profiler_type + '_' + profiler_type
return False, cluster_profiler_type
def list_summary_directories_by_pagination(self, summary_base_dir, offset=0, limit=10):
"""
List summary directories within base directory.
@@ -459,7 +300,7 @@ class SummaryWatcher:
>>> summary_watcher = SummaryWatcher()
>>> summaries = summary_watcher.list_summaries('/summary/base/dir', './job-01')
"""
if contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
if self._contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
return []
if not self._is_valid_summary_directory(summary_base_dir, relative_path):
@@ -488,13 +329,8 @@ class SummaryWatcher:
except OverflowError:
continue
try:
stat = entry.stat()
except FileNotFoundError:
logger.warning('File %s not found.', entry.name)
continue
mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()
# extract modified time from filesystem
mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone()
summaries.append({
'file_name': entry.name,
@@ -506,70 +342,3 @@ class SummaryWatcher:
summaries.sort(key=lambda x: (-int(x['update_time'].timestamp()), x['file_name']))
return summaries
def list_explain_directories(self, summary_base_dir, offset=0, limit=None):
"""
List explain directories within base directory.
Args:
summary_base_dir (str): Path of summary base directory.
offset (int): An offset for page. Ex, offset is 0, mean current page is 1. Default value is 0.
limit (int): The max data items for per page. Default value is 10.
Returns:
tuple[total, directories], total indicates the overall number of explain directories and directories
indicate list of summary directory info including the following attributes.
- relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
starting with "./".
- create_time (datetime): Creation time of summary file.
- update_time (datetime): Modification time of summary file.
Raises:
ParamValueError, if offset < 0 or limit is out of valid value range.
ParamTypeError, if offset or limit is not valid integer.
Examples:
>>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
>>> summary_watcher = SummaryWatcher()
>>> total, directories = summary_watcher.list_explain_directories('/summary/base/dir', offset=0, limit=10)
"""
offset = Validation.check_offset(offset=offset)
limit = Validation.check_limit(limit, min_value=1, max_value=999, default_value=None)
directories = self.list_summary_directories(summary_base_dir, overall=False, list_explain=True)
if limit is None:
return len(directories), directories
return len(directories), directories[offset * limit:(offset + 1) * limit]
def _new_entry(ctime, mtime, profiler=None):
"""Create a new entry."""
return {
'create_time': ctime,
'update_time': mtime,
'summary_files': 0,
'lineage_files': 0,
'explain_files': 0,
'graph_files': 0,
'profiler': profiler,
'dump_dir': None
}
def _get_explain_job_info(summary_base_dir, relative_path, timestamp):
"""Get explain job info."""
if timestamp is None:
job_dict = {"saliency_map": False, "hierarchical_occlusion": False}
return job_dict
json_path = os.path.join(summary_base_dir, relative_path.lstrip("./"), f"_explain_{timestamp}",
"manifest.json")
if os.path.exists(json_path):
with open(json_path, "r") as f:
job_dict = json.load(f)
return job_dict
# Set default value to make it compatible with previous version
job_dict = {"saliency_map": True, "hierarchical_occlusion": False}
return job_dict

+ 0
- 157
mindinsight/datavisual/data_transform/tensor_container.py View File

@@ -1,157 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Tensor data container."""
import numpy as np

from mindinsight.datavisual.data_transform.histogram import Histogram, Bucket
from mindinsight.datavisual.utils.utils import calc_histogram_bins
from mindinsight.datavisual.common.exceptions import TensorTooLargeError
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.utils.tensor import TensorUtils

MAX_TENSOR_COUNT = 10000000
TENSOR_TOO_LARGE_ERROR = TensorTooLargeError("").error_code


def calc_original_buckets(np_value, stats):
"""
Calculate buckets from tensor data.

Args:
np_value (numpy.ndarray): An numpy.ndarray of tensor data.
stats (Statistics): An instance of Statistics about tensor data.

Returns:
list, a list of bucket about tensor data.

Raises:
ParamValueError, If np_value or stats is None.
"""
if np_value is None or stats is None:
raise ParamValueError("Invalid input. np_value or stats is None.")
valid_count = stats.count - stats.nan_count - stats.neg_inf_count - stats.pos_inf_count
if not valid_count:
return []

bins = calc_histogram_bins(valid_count)
first_edge, last_edge = stats.min, stats.max

if not first_edge < last_edge:
first_edge -= 0.5
last_edge += 0.5

bins = np.linspace(first_edge, last_edge, bins + 1, dtype=np_value.dtype)
hists, edges = np.histogram(np_value, bins=bins)

buckets = []
for hist, edge1, edge2 in zip(hists, edges, edges[1:]):
bucket = Bucket(edge1, edge2 - edge1, hist)
buckets.append(bucket)

return buckets


class TensorContainer:
"""
Tensor data container.

Args:
tensor_message (Summary.TensorProto): Tensor message in summary file.
"""

def __init__(self, tensor_message):
# Original dims can not be pickled to transfer to other process, so tuple is used.
self._dims = tuple(tensor_message.dims)
self._data_type = tensor_message.data_type
self._np_array = self.get_ndarray(tensor_message.float_data)
self._error_code = None
if self._np_array.size > MAX_TENSOR_COUNT:
self._error_code = TENSOR_TOO_LARGE_ERROR
self._np_array = np.array([])
self._stats = TensorUtils.get_statistics_from_tensor(self._np_array)
original_buckets = calc_original_buckets(self._np_array, self._stats)
self._count = sum(bucket.count for bucket in original_buckets)
self._max = self._stats.max
self._min = self._stats.min
self._histogram = Histogram(tuple(original_buckets), self._max, self._min, self._count)


@property
def size(self):
"""Get size of tensor."""
return self._np_array.size

@property
def error_code(self):
"""Get size of tensor."""
return self._error_code

@property
def dims(self):
"""Get dims of tensor."""
return self._dims

@property
def data_type(self):
"""Get data type of tensor."""
return self._data_type

@property
def ndarray(self):
"""Get ndarray of tensor."""
return self._np_array

@property
def max(self):
"""Get max value of tensor."""
return self._max

@property
def min(self):
"""Get min value of tensor."""
return self._min

@property
def stats(self):
"""Get statistics data of tensor."""
return self._stats

@property
def count(self):
"""Get count value of tensor."""
return self._count

@property
def histogram(self):
"""Get histogram data."""
return self._histogram

def buckets(self):
"""Get histogram buckets."""
if self._histogram is None:
return None
return self._histogram.buckets()

def get_ndarray(self, tensor):
"""
Get ndarray of tensor.

Args:
tensor (mindinsight_anf_ir.proto.DataType): tensor data.

Returns:
numpy.ndarray, ndarray of tensor.
"""
return np.array(tuple(tensor)).reshape(self.dims)

+ 0
- 1
mindinsight/datavisual/processors/__init__.py View File

@@ -12,4 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Processors module used to define all kinds of processor."""

+ 31
- 18
mindinsight/datavisual/processors/graph_processor.py View File

@@ -20,8 +20,9 @@ and the status of graph will be checked before calling `Graph` object.
from mindinsight.datavisual.common import exceptions
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.data_transform.graph import NodeTypeEnum
from mindinsight.datavisual.processors.base_processor import BaseProcessor
from mindinsight.datavisual.common.exceptions import NodeNotInGraphError
from mindinsight.utils.exceptions import ParamValueError


class GraphProcessor(BaseProcessor):
@@ -39,9 +40,9 @@ class GraphProcessor(BaseProcessor):

train_job = self._data_manager.get_train_job_by_plugin(train_id, PluginNameEnum.GRAPH.value)
if train_job is None:
raise exceptions.TrainJobNotExistError()
if not train_job['tags'] or (tag is not None and tag not in train_job['tags']):
raise exceptions.GraphNotExistError()
raise exceptions.SummaryLogPathInvalid()
if not train_job['tags']:
raise ParamValueError("Can not find any graph data in the train job.")

if tag is None:
tag = train_job['tags'][0]
@@ -49,12 +50,13 @@ class GraphProcessor(BaseProcessor):
tensors = self._data_manager.list_tensors(train_id, tag=tag)
self._graph = tensors[0].value

def list_nodes(self, scope):
def get_nodes(self, name, node_type):
"""
Get the nodes of every layer in graph.

Args:
scope (str): The name of a scope.
name (str): The name of a node.
node_type (Any): The type of node, either 'name_scope' or 'polymeric'.

Returns:
TypedDict('Nodes', {'nodes': list[Node]}), format is {'nodes': [<Node object>]}.
@@ -78,19 +80,33 @@ class GraphProcessor(BaseProcessor):
}
},
"output_i" : -1,
"proxy_input" : {},
"proxy_output" : {},
"independent_layout" : False,
"polymeric_input" : {},
"polymeric_output" : {},
"polymeric_scope_name" : "",
"subnode_count" : 0,
"type" : "Data"
}
]
}
"""
if scope and not self._graph.exist_node(scope):
raise NodeNotInGraphError(node_name=scope)
if node_type not in [NodeTypeEnum.NAME_SCOPE.value, NodeTypeEnum.POLYMERIC_SCOPE.value]:
raise ParamValueError(
'The node type is not support, only either %s or %s.'
'' % (NodeTypeEnum.NAME_SCOPE.value, NodeTypeEnum.POLYMERIC_SCOPE.value))

if name and not self._graph.exist_node(name):
raise ParamValueError("The node name is not in graph.")
nodes = []
if node_type == NodeTypeEnum.NAME_SCOPE.value:
nodes = self._graph.get_normal_nodes(name)

if node_type == NodeTypeEnum.POLYMERIC_SCOPE.value:
if not name:
raise ParamValueError('The node name "%s" not in graph, node type is %s.' %
(name, node_type))
polymeric_scope_name = name
nodes = self._graph.get_polymeric_nodes(polymeric_scope_name)

nodes = self._graph.list_node_by_scope(scope=scope)
return {'nodes': nodes}

def search_node_names(self, search_content, offset, limit):
@@ -103,15 +119,12 @@ class GraphProcessor(BaseProcessor):
limit (int): The max data items for per page.

Returns:
Dict, the searched nodes.
TypedDict('Names', {'names': list[str]}), {"names": ["node_names"]}.
"""
offset = Validation.check_offset(offset=offset)
limit = Validation.check_limit(limit, min_value=1, max_value=1000)
nodes = self._graph.search_nodes_by_pattern(search_content)
real_offset = offset * limit
search_nodes = self._graph.get_nodes(nodes[real_offset:real_offset + limit])

return {"nodes": search_nodes}
names = self._graph.search_node_names(search_content, offset, limit)
return {"names": names}

def search_single_node(self, name):
"""


+ 0
- 70
mindinsight/datavisual/processors/histogram_processor.py View File

@@ -1,70 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Histogram Processor APIs."""
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.common.exceptions import HistogramNotExistError
from mindinsight.datavisual.processors.base_processor import BaseProcessor


class HistogramProcessor(BaseProcessor):
"""Histogram Processor."""
def get_histograms(self, train_id, tag):
"""
Builds a JSON-serializable object with information about histogram data.

Args:
train_id (str): The ID of the events data.
tag (str): The name of the tag the histogram data all belong to.

Returns:
dict, a dict including the `train_id`, `tag`, and `histograms'.
{
"train_id": ****,
"tag": ****,
"histograms": [{
"wall_time": ****,
"step": ****,
"bucket": [[**, **, **]],
},
{...}
]
}
"""
Validation.check_param_empty(train_id=train_id, tag=tag)
logger.info("Start to process histogram data...")
try:
tensors = self._data_manager.list_tensors(train_id, tag)
except ParamValueError as err:
raise HistogramNotExistError(err.message)

histograms = []
for tensor in tensors:
histogram = tensor.value
buckets = histogram.buckets()
histograms.append({
"wall_time": tensor.wall_time,
"step": tensor.step,
"buckets": buckets
})

logger.info("Histogram data processing is finished!")
response = {
"train_id": train_id,
"tag": tag,
"histograms": histograms
}
return response

+ 13
- 27
mindinsight/datavisual/processors/images_processor.py View File

@@ -16,7 +16,6 @@
from mindinsight.datavisual.utils.tools import to_int
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.common.exceptions import ImageNotExistError
from mindinsight.datavisual.processors.base_processor import BaseProcessor


@@ -47,10 +46,7 @@ class ImageProcessor(BaseProcessor):
"""
Validation.check_param_empty(train_id=train_id, tag=tag)
result = []
try:
tensors = self._data_manager.list_tensors(train_id, tag)
except ParamValueError as ex:
raise ImageNotExistError(ex.message)
tensors = self._data_manager.list_tensors(train_id, tag)

for tensor in tensors:
# no tensor_proto in TensorEvent
@@ -70,7 +66,7 @@ class ImageProcessor(BaseProcessor):
Args:
train_id (str): The ID of the events data the image belongs to.
tag (str): The name of the tag the images belongs to.
step (int): The step of the image in the current reservoir. If step = -1, return image of final step.
step (int): The step of the image in the current reservoir.

Returns:
bytes, a byte string of the raw image bytes.
@@ -79,28 +75,18 @@ class ImageProcessor(BaseProcessor):
Validation.check_param_empty(train_id=train_id, tag=tag, step=step)
step = to_int(step, "step")

try:
tensors = self._data_manager.list_tensors(train_id, tag)
except ParamValueError as ex:
raise ImageNotExistError(ex.message)
tensors = self._data_manager.list_tensors(train_id, tag)

image = None
for tensor in tensors:
if tensor.step == step:
# Default value for bytes field is empty byte string normally,
# see also "Optional Fields And Default Values" in protobuf
# documentation.
image = tensor.value.encoded_image
break

image = _find_image(tensors, step)
if image is None:
raise ImageNotExistError("Can not find the step with given train job id and tag.")
raise ParamValueError("Can not find the step with given train job id and tag.")

return image


def _find_image(tensors, step):
"""Find the specific image by step from tensors. If step = -1, return image of final step."""
if not tensors:
return None
if step == -1:
return tensors[-1].value.encoded_image
for tensor in tensors:
if tensor.step == step:
# Default value for bytes field is empty byte string normally,
# see also "Optional Fields And Default Values" in protobuf
# documentation.
return tensor.value.encoded_image
return None

+ 1
- 74
mindinsight/datavisual/processors/scalars_processor.py View File

@@ -13,13 +13,6 @@
# limitations under the License.
# ============================================================================
"""Scalar Processor APIs."""
from urllib.parse import unquote

from mindinsight.utils.exceptions import ParamValueError, UrlDecodeError
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.utils.tools import if_nan_inf_to_none
from mindinsight.datavisual.common.exceptions import ScalarNotExistError
from mindinsight.datavisual.common.exceptions import TrainJobNotExistError
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.processors.base_processor import BaseProcessor

@@ -40,10 +33,7 @@ class ScalarsProcessor(BaseProcessor):
"""
Validation.check_param_empty(train_id=train_id, tag=tag)
job_response = []
try:
tensors = self._data_manager.list_tensors(train_id, tag)
except ParamValueError as ex:
raise ScalarNotExistError(ex.message)
tensors = self._data_manager.list_tensors(train_id, tag)

for tensor in tensors:
job_response.append({
@@ -51,66 +41,3 @@ class ScalarsProcessor(BaseProcessor):
'step': tensor.step,
'value': tensor.value})
return dict(metadatas=job_response)

def get_scalars(self, train_ids, tags):
"""
Get scalar data for given train_ids and tags.

Args:
train_ids (list): Specify list of train job ID.
tags (list): Specify list of tags.

Returns:
list[dict], a list of dictionaries containing the `wall_time`, `step`, `value` for each scalar.
"""
for index, train_id in enumerate(train_ids):
try:
train_id = unquote(train_id, errors='strict')
except UnicodeDecodeError:
raise UrlDecodeError('Unquote train id error with strict mode')
else:
train_ids[index] = train_id

scalars = []
for train_id in train_ids:
scalars += self._get_train_scalars(train_id, tags)

return scalars

def _get_train_scalars(self, train_id, tags):
"""
Get scalar data for given train_id and tags.

Args:
train_id (str): Specify train job ID.
tags (list): Specify list of tags.

Returns:
list[dict], a list of dictionaries containing the `wall_time`, `step`, `value` for each scalar.
"""
scalars = []
for tag in tags:
try:
tensors = self._data_manager.list_tensors(train_id, tag)
except ParamValueError:
continue
except TrainJobNotExistError:
logger.warning('Can not find the given train job in cache.')
return []

scalar = {
'train_id': train_id,
'tag': tag,
'values': [],
}

for tensor in tensors:
scalar['values'].append({
'wall_time': tensor.wall_time,
'step': tensor.step,
'value': if_nan_inf_to_none('scalar_value', tensor.value),
})

scalars.append(scalar)

return scalars

+ 0
- 311
mindinsight/datavisual/processors/tensor_processor.py View File

@@ -1,311 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Tensor Processor APIs."""
from urllib.parse import unquote

import numpy as np

from mindinsight.datavisual.utils.tools import to_int
from mindinsight.utils.exceptions import ParamValueError, UrlDecodeError
from mindinsight.utils.tensor import TensorUtils, MAX_DIMENSIONS_FOR_TENSOR
from mindinsight.conf.constants import MAX_TENSOR_RESPONSE_DATA_SIZE
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.common.exceptions import StepTensorDataNotInCacheError, TensorNotExistError
from mindinsight.datavisual.common.exceptions import ResponseDataExceedMaxValueError, TensorTooLargeError
from mindinsight.datavisual.data_transform.tensor_container import TensorContainer
from mindinsight.datavisual.processors.base_processor import BaseProcessor
from mindinsight.datavisual.proto_files import mindinsight_anf_ir_pb2 as anf_ir_pb2


class TensorProcessor(BaseProcessor):
"""Tensor Processor."""
def get_tensors(self, train_ids, tags, step, dims, detail):
"""
Get tensor data for given train_ids, tags, step, dims and detail.

Args:
train_ids (list): Specify list of train job ID.
tags (list): Specify list of tag.
step (int): Specify step of tag, it's necessary when detail is equal to 'data'.
dims (str): Specify dims of step, it's necessary when detail is equal to 'data'.
detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'.

Returns:
dict, a dict including the `tensors`.

Raises:
UrlDecodeError, If unquote train id error with strict mode.
"""
Validation.check_param_empty(train_id=train_ids, tag=tags)

try:
dims = unquote(dims, errors='strict') if dims else None
except UnicodeDecodeError:
raise UrlDecodeError('Unquote dims error with strict mode')

for index, train_id in enumerate(train_ids):
try:
train_id = unquote(train_id, errors='strict')
except UnicodeDecodeError:
raise UrlDecodeError('Unquote train id error with strict mode')
else:
train_ids[index] = train_id

tensors = []
for train_id in train_ids:
tensors += self._get_train_tensors(train_id, tags, step, dims, detail)

return {"tensors": tensors}

def _get_train_tensors(self, train_id, tags, step, dims, detail):
"""
Get tensor data for given train_id, tags, step, dims and detail.

Args:
train_id (str): Specify list of train job ID.
tags (list): Specify list of tag.
step (int): Specify step of tensor, it's necessary when detail is set to 'data'.
dims (str): Specify dims of tensor, it's necessary when detail is set to 'data'.
detail (str): Specify which data to query, available values: 'stats', 'histogram' and 'data'.

Returns:
list[dict], a list of dictionaries containing the `train_id`, `tag`, `values`.

Raises:
TensorNotExistError, If tensor with specific train_id and tag is not exist in cache.
ParamValueError, If the value of detail is not within available values:
'stats', 'histogram' and 'data'.
"""

tensors_response = []
for tag in tags:
try:
tensors = self._data_manager.list_tensors(train_id, tag)
except ParamValueError as err:
raise TensorNotExistError(err.message)

if tensors and not isinstance(tensors[0].value, TensorContainer):
raise TensorNotExistError("there is no tensor data in this tag: {}".format(tag))

if detail is None or detail == 'stats':
values = self._get_tensors_summary(detail, tensors)
elif detail == 'data':
Validation.check_param_empty(step=step, dims=dims)
# Limit to query max two dimensions for tensor in table view.
dims = TensorUtils.parse_shape(dims, limit=MAX_DIMENSIONS_FOR_TENSOR)
step = to_int(step, "step")
values = self._get_tensors_data(step, dims, tensors)
elif detail == 'histogram':
values = self._get_tensors_histogram(tensors)
else:
raise ParamValueError('Can not support this value: {} of detail.'.format(detail))

tensor = {
"train_id": train_id,
"tag": tag,
"values": values
}
tensors_response.append(tensor)

return tensors_response

def _get_tensors_summary(self, detail, tensors):
"""
Builds a JSON-serializable object with information about tensor summary.

Args:
detail (str): Specify which data to query, detail value is None or 'stats' at this method.
tensors (list): The list of _Tensor data.

Returns:
dict, a dict including the `wall_time`, `step`, and `value' for each tensor.
{
"wall_time": 0,
"step": 0,
"value": {
"dims": [1],
"data_type": "DT_FLOAT32"
"statistics": {
"max": 0,
"min": 0,
"avg": 0,
"count": 1,
"nan_count": 0,
"neg_inf_count": 0,
"pos_inf_count": 0
} This dict is being set when detail is equal to stats.
}
}
"""
values = []
for tensor in tensors:
# This value is an instance of TensorContainer
value = tensor.value
value_dict = {
"dims": value.dims,
"data_type": anf_ir_pb2.DataType.Name(value.data_type)
}
if detail and detail == 'stats':
stats = None
if value.error_code is None:
stats = TensorUtils.get_statistics_dict(stats=value.stats, overall_stats=value.stats)
value_dict.update({"statistics": stats})

values.append({
"wall_time": tensor.wall_time,
"step": tensor.step,
"value": value_dict
})

return values

def _get_tensors_data(self, step, dims, tensors):
"""
Builds a JSON-serializable object with information about tensor dims data.

Args:
step (int): Specify step of tensor.
dims (tuple): Specify dims of tensor.
tensors (list): The list of _Tensor data.

Returns:
dict, a dict including the `wall_time`, `step`, and `value' for each tensor.
{
"wall_time": 0,
"step": 0,
"value": {
"dims": [1],
"data_type": "DT_FLOAT32",
"data": [[0.1]]
"statistics": {
"max": 0,
"min": 0,
"avg": 0,
"count": 1,
"nan_count": 0,
"neg_inf_count": 0,
"pos_inf_count": 0
}
}
}

Raises:
ResponseDataExceedMaxValueError, If the size of response data exceed max value.
StepTensorDataNotInCacheError, If query step is not in cache.
"""
values = []
step_in_cache = False
for tensor in tensors:
# This value is an instance of TensorContainer
value = tensor.value
if step != tensor.step:
continue
step_in_cache = True
if value.error_code is not None:
raise TensorTooLargeError("Step: {}".format(tensor.step))
res_data = TensorUtils.get_specific_dims_data(value.ndarray, dims)
flatten_data = res_data.flatten().tolist()
if len(flatten_data) > MAX_TENSOR_RESPONSE_DATA_SIZE:
raise ResponseDataExceedMaxValueError("the size of response data: {} exceed max value: {}."
.format(len(flatten_data), MAX_TENSOR_RESPONSE_DATA_SIZE))

def transfer(array):
if not isinstance(array, np.ndarray):
# The list is used here so that len function can be used
# when the value of array is `NAN`、`-INF` or `INF`.
array = [array]
transfer_data = [None] * len(array)
for index, data in enumerate(array):
if isinstance(data, np.ndarray):
transfer_data[index] = transfer(data)
else:
if np.isnan(data):
transfer_data[index] = 'NAN'
elif np.isneginf(data):
transfer_data[index] = '-INF'
elif np.isposinf(data):
transfer_data[index] = 'INF'
else:
transfer_data[index] = float(data)
return transfer_data

stats = TensorUtils.get_statistics_from_tensor(res_data)
if stats.nan_count + stats.neg_inf_count + stats.pos_inf_count > 0:
tensor_data = transfer(res_data)
else:
tensor_data = res_data.tolist()
values.append({
"wall_time": tensor.wall_time,
"step": tensor.step,
"value": {
"dims": value.dims,
"data_type": anf_ir_pb2.DataType.Name(value.data_type),
"data": tensor_data,
"statistics": TensorUtils.get_statistics_dict(stats=stats, overall_stats=value.stats)
}
})
break
if not step_in_cache:
raise StepTensorDataNotInCacheError("this step: {} data may has been dropped.".format(step))

return values

def _get_tensors_histogram(self, tensors):
"""
Builds a JSON-serializable object with information about tensor histogram data.

Args:
tensors (list): The list of _Tensor data.

Returns:
dict, a dict including the `wall_time`, `step`, and `value' for each tensor.
{
"wall_time": 0,
"step": 0,
"value": {
"dims": [1],
"data_type": "DT_FLOAT32",
"histogram_buckets": [[0.1, 0.2, 3]]
"statistics": {
"max": 0,
"min": 0,
"avg": 0,
"count": 1,
"nan_count": 0,
"neg_inf_count": 0,
"pos_inf_count": 0
}
}
}
"""
values = []
for tensor in tensors:
# This value is an instance of TensorContainer
value = tensor.value
if value.error_code is not None:
raise TensorTooLargeError("Step: {}".format(tensor.step))
buckets = value.buckets()
values.append({
"wall_time": tensor.wall_time,
"step": tensor.step,
"value": {
"dims": value.dims,
"data_type": anf_ir_pb2.DataType.Name(value.data_type),
"histogram_buckets": buckets,
"statistics": TensorUtils.get_statistics_dict(stats=value.stats, overall_stats=value.stats)
}
})

return values

+ 5
- 141
mindinsight/datavisual/processors/train_task_manager.py View File

@@ -1,4 +1,4 @@
# Copyright 2019-2021 Huawei Technologies Co., Ltd
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,16 +14,10 @@
# ============================================================================
"""Train task manager."""

from mindinsight.utils.exceptions import ParamTypeError
from mindinsight.datavisual.common.log import logger
from mindinsight.datavisual.common import exceptions
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.enums import CacheStatus
from mindinsight.datavisual.common.exceptions import QueryStringContainsNullByteError
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.utils.utils import contains_null_byte
from mindinsight.datavisual.processors.base_processor import BaseProcessor
from mindinsight.datavisual.data_transform.data_manager import DATAVISUAL_PLUGIN_KEY, DATAVISUAL_CACHE_KEY


class TrainTaskManager(BaseProcessor):
@@ -44,7 +38,7 @@ class TrainTaskManager(BaseProcessor):
Validation.check_plugin_name(plugin_name=plugin_name)
train_job = self._data_manager.get_train_job_by_plugin(train_id=train_id, plugin_name=plugin_name)
if train_job is None:
raise exceptions.TrainJobNotExistError()
raise exceptions.SummaryLogPathInvalid()
return dict(train_jobs=[train_job])

def get_plugins(self, train_id, manual_update=True):
@@ -59,143 +53,13 @@ class TrainTaskManager(BaseProcessor):
dict, refer to restful api.
"""
Validation.check_param_empty(train_id=train_id)
if contains_null_byte(train_id=train_id):
raise QueryStringContainsNullByteError("train job id: {} contains null byte.".format(train_id))

if manual_update:
self._data_manager.cache_train_job(train_id)

train_job = self._data_manager.get_train_job(train_id)

try:
data_visual_content = train_job.get_detail(DATAVISUAL_CACHE_KEY)
plugins = data_visual_content.get(DATAVISUAL_PLUGIN_KEY)
except exceptions.TrainJobDetailNotInCacheError:
plugins = []

if not plugins:
train_job = self._data_manager.get_single_train_job(train_id, manual_update=manual_update)
if not train_job:
default_result = dict()
for plugin_name in PluginNameEnum.list_members():
default_result.update({plugin_name: list()})
return dict(plugins=default_result)

for plugin_name, value in plugins.items():
plugins[plugin_name] = sorted(value)

return dict(
plugins=plugins
plugins=train_job['tag_mapping']
)

def query_train_jobs(self, offset=0, limit=10, request_train_id=None):
"""
Query train jobs.

Args:
offset (int): Specify page number. Default is 0.
limit (int): Specify page size. Default is 10.
request_train_id (str): Specify train id. Default is None.

Returns:
tuple, return quantity of total train jobs and list of train jobs specified by offset and limit.
"""
if request_train_id is not None:
train_job_item = self._get_train_job_item(request_train_id)
if train_job_item is None:
return 0, []
return 1, [train_job_item]

brief_cache = self._data_manager.get_brief_cache()
brief_train_jobs = list(brief_cache.get_train_jobs().values())
brief_train_jobs.sort(key=lambda x: x.basic_info.update_time, reverse=True)
total = len(brief_train_jobs)

start = offset * limit
end = (offset + 1) * limit
train_jobs = []

train_ids = [train_job.basic_info.train_id for train_job in brief_train_jobs[start:end]]

for train_id in train_ids:
train_job_item = self._get_train_job_item(train_id)
if train_job_item is None:
continue
train_jobs.append(train_job_item)

return total, train_jobs

def _get_train_job_item(self, train_id):
"""
Get train job item.

Args:
train_id (str): Specify train id.

Returns:
dict, a dict of train job item.
"""
try:
train_job = self._data_manager.get_train_job(train_id)
except exceptions.TrainJobNotExistError:
logger.warning('Train job %s not existed', train_id)
return None

basic_info = train_job.get_basic_info()
train_job_item = dict(
train_id=basic_info.train_id,
relative_path=basic_info.train_id,
create_time=basic_info.create_time.strftime('%Y-%m-%d %H:%M:%S'),
update_time=basic_info.update_time.strftime('%Y-%m-%d %H:%M:%S'),
profiler_dir=basic_info.profiler_dir,
cache_status=train_job.cache_status.value,
profiler_type=basic_info.profiler_type,
summary_files=basic_info.summary_files,
graph_files=basic_info.graph_files,
lineage_files=basic_info.lineage_files,
dump_dir=basic_info.dump_dir
)

if train_job.cache_status != CacheStatus.NOT_IN_CACHE:
plugins = self.get_plugins(train_id, manual_update=False)
else:
plugins = dict(plugins={plugin: [] for plugin in PluginNameEnum.list_members()})

train_job_item.update(plugins)
return train_job_item

def cache_train_jobs(self, train_ids):
"""
Cache train jobs.

Args:
train_ids (list): Specify list of train_ids to be cached.

Returns:
dict, indicates train job ID and its current cache status.

Raises:
ParamTypeError, if the given train_ids parameter is not in valid type.
"""
if not isinstance(train_ids, list):
logger.error("train_ids must be list.")
raise ParamTypeError('train_ids', list)

cache_result = []
for train_id in train_ids:
if not isinstance(train_id, str):
logger.error("train_id must be str.")
raise ParamTypeError('train_id', str)

try:
train_job = self._data_manager.get_train_job(train_id)
except exceptions.TrainJobNotExistError:
logger.warning('Train job %s not existed', train_id)
continue

self._data_manager.cache_train_job(train_id)

cache_result.append(dict(
train_id=train_id,
cache_status=train_job.cache_status.value,
))

return cache_result

+ 0
- 1
mindinsight/datavisual/proto_files/__init__.py View File

@@ -12,4 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Proto files module used to define proto and created pb2 file by protoc according to these proto files."""

+ 0
- 45
mindinsight/datavisual/proto_files/lazy_read.proto View File

@@ -1,45 +0,0 @@
syntax = "proto2";

package mindinsight.summary;
option cc_enable_arenas = true;

// Event Protocol buffer, Top define
message Event {
// Timestamp
required double wall_time = 1;

// The step of train.
optional int64 step = 2;

oneof what {
// An event file was started, with the specified version.
// Now version is "Mindspore.Event:1"
string version = 3;

// Summary data
Summary summary = 5;

}
}


// A Summary is a set of named values that be produced regularly during training
message Summary {
message Image {
required bytes encoded_image = 4;
}

message Value {
// Tag name for the data.
required string tag = 1;

// Value associated with the tag.
oneof value {
float scalar_value = 3;
Image image = 4;
}
}

// Set of values for the summary.
repeated Value value = 1;
}

+ 0
- 242
mindinsight/datavisual/proto_files/lazy_read_pb2.py View File

@@ -1,242 +0,0 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: lazy_read.proto

from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()




DESCRIPTOR = _descriptor.FileDescriptor(
name='lazy_read.proto',
package='mindinsight.summary',
syntax='proto2',
serialized_options=b'\370\001\001',
serialized_pb=b'\n\x0flazy_read.proto\x12\x13mindinsight.summary\"t\n\x05\x45vent\x12\x11\n\twall_time\x18\x01 \x02(\x01\x12\x0c\n\x04step\x18\x02 \x01(\x03\x12\x11\n\x07version\x18\x03 \x01(\tH\x00\x12/\n\x07summary\x18\x05 \x01(\x0b\x32\x1c.mindinsight.summary.SummaryH\x00\x42\x06\n\x04what\"\xc8\x01\n\x07Summary\x12\x31\n\x05value\x18\x01 \x03(\x0b\x32\".mindinsight.summary.Summary.Value\x1a\x1e\n\x05Image\x12\x15\n\rencoded_image\x18\x04 \x02(\x0c\x1aj\n\x05Value\x12\x0b\n\x03tag\x18\x01 \x02(\t\x12\x16\n\x0cscalar_value\x18\x03 \x01(\x02H\x00\x12\x33\n\x05image\x18\x04 \x01(\x0b\x32\".mindinsight.summary.Summary.ImageH\x00\x42\x07\n\x05valueB\x03\xf8\x01\x01'
)




_EVENT = _descriptor.Descriptor(
name='Event',
full_name='mindinsight.summary.Event',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='wall_time', full_name='mindinsight.summary.Event.wall_time', index=0,
number=1, type=1, cpp_type=5, label=2,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='step', full_name='mindinsight.summary.Event.step', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='version', full_name='mindinsight.summary.Event.version', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='summary', full_name='mindinsight.summary.Event.summary', index=3,
number=5, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
_descriptor.OneofDescriptor(
name='what', full_name='mindinsight.summary.Event.what',
index=0, containing_type=None, fields=[]),
],
serialized_start=40,
serialized_end=156,
)


_SUMMARY_IMAGE = _descriptor.Descriptor(
name='Image',
full_name='mindinsight.summary.Summary.Image',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='encoded_image', full_name='mindinsight.summary.Summary.Image.encoded_image', index=0,
number=4, type=12, cpp_type=9, label=2,
has_default_value=False, default_value=b"",
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=221,
serialized_end=251,
)

_SUMMARY_VALUE = _descriptor.Descriptor(
name='Value',
full_name='mindinsight.summary.Summary.Value',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='tag', full_name='mindinsight.summary.Summary.Value.tag', index=0,
number=1, type=9, cpp_type=9, label=2,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='scalar_value', full_name='mindinsight.summary.Summary.Value.scalar_value', index=1,
number=3, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='image', full_name='mindinsight.summary.Summary.Value.image', index=2,
number=4, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
_descriptor.OneofDescriptor(
name='value', full_name='mindinsight.summary.Summary.Value.value',
index=0, containing_type=None, fields=[]),
],
serialized_start=253,
serialized_end=359,
)

_SUMMARY = _descriptor.Descriptor(
name='Summary',
full_name='mindinsight.summary.Summary',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='value', full_name='mindinsight.summary.Summary.value', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[_SUMMARY_IMAGE, _SUMMARY_VALUE, ],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=159,
serialized_end=359,
)

_EVENT.fields_by_name['summary'].message_type = _SUMMARY
_EVENT.oneofs_by_name['what'].fields.append(
_EVENT.fields_by_name['version'])
_EVENT.fields_by_name['version'].containing_oneof = _EVENT.oneofs_by_name['what']
_EVENT.oneofs_by_name['what'].fields.append(
_EVENT.fields_by_name['summary'])
_EVENT.fields_by_name['summary'].containing_oneof = _EVENT.oneofs_by_name['what']
_SUMMARY_IMAGE.containing_type = _SUMMARY
_SUMMARY_VALUE.fields_by_name['image'].message_type = _SUMMARY_IMAGE
_SUMMARY_VALUE.containing_type = _SUMMARY
_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
_SUMMARY_VALUE.fields_by_name['scalar_value'])
_SUMMARY_VALUE.fields_by_name['scalar_value'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
_SUMMARY_VALUE.fields_by_name['image'])
_SUMMARY_VALUE.fields_by_name['image'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
_SUMMARY.fields_by_name['value'].message_type = _SUMMARY_VALUE
DESCRIPTOR.message_types_by_name['Event'] = _EVENT
DESCRIPTOR.message_types_by_name['Summary'] = _SUMMARY
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

Event = _reflection.GeneratedProtocolMessageType('Event', (_message.Message,), {
'DESCRIPTOR' : _EVENT,
'__module__' : 'lazy_read_pb2'
# @@protoc_insertion_point(class_scope:mindinsight.summary.Event)
})
_sym_db.RegisterMessage(Event)

Summary = _reflection.GeneratedProtocolMessageType('Summary', (_message.Message,), {

'Image' : _reflection.GeneratedProtocolMessageType('Image', (_message.Message,), {
'DESCRIPTOR' : _SUMMARY_IMAGE,
'__module__' : 'lazy_read_pb2'
# @@protoc_insertion_point(class_scope:mindinsight.summary.Summary.Image)
})
,

'Value' : _reflection.GeneratedProtocolMessageType('Value', (_message.Message,), {
'DESCRIPTOR' : _SUMMARY_VALUE,
'__module__' : 'lazy_read_pb2'
# @@protoc_insertion_point(class_scope:mindinsight.summary.Summary.Value)
})
,
'DESCRIPTOR' : _SUMMARY,
'__module__' : 'lazy_read_pb2'
# @@protoc_insertion_point(class_scope:mindinsight.summary.Summary)
})
_sym_db.RegisterMessage(Summary)
_sym_db.RegisterMessage(Summary.Image)
_sym_db.RegisterMessage(Summary.Value)


DESCRIPTOR._options = None
# @@protoc_insertion_point(module_scope)

+ 2
- 7
mindinsight/datavisual/proto_files/mindinsight_anf_ir.proto View File

@@ -85,11 +85,9 @@ enum DataType {
DT_BASE_FLOAT = 38; // type generate float
DT_TYPE = 39; // type type
DT_ANYTHING = 40; // type anything
DT_REFKEY = 41; // type refkey
DT_REF = 42; // type ref
}

// Value definition for attribute value or parameter default value
// Value definiton for attribute value or parameter default value
message ValueProto {
// data type of value
optional DataType dtype = 1; // discriminator that indicates which field below is in use
@@ -225,9 +223,6 @@ message NodeProto {

// other fields for debug
optional uint64 output_i = 7;

// The full_name_with_scope of CNode
optional string full_name = 8;
}

// Models
@@ -254,7 +249,7 @@ message ModelProto {
// The parameterized graph that is evaluated to execute the model.
optional GraphProto graph = 4;

// metadata info of operators
// metadata info of opeartors
optional OperatorSetProto metadata_operators = 5;
};



+ 16
- 33
mindinsight/datavisual/proto_files/mindinsight_anf_ir_pb2.py
File diff suppressed because it is too large
View File


+ 0
- 129
mindinsight/datavisual/proto_files/mindinsight_lineage.proto View File

@@ -1,129 +0,0 @@
// Copyright 2020 Huawei Technologies Co., Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package mindinsight;
option cc_enable_arenas = true;


// Event Protocol buffer, Top define
message LineageEvent {
// Timestamp
required double wall_time = 1;

// The step of train.
optional int64 step = 2;

oneof what {
// An event file was started, with the specified version.
// Now version is "Mindspore.Event:1"
string version = 3;

// Train lineage
TrainLineage train_lineage = 6;

// Evaluation lineage
EvaluationLineage evaluation_lineage = 7;

// Dataset graph
DatasetGraph dataset_graph = 9;

// User defined info
UserDefinedInfo user_defined_info = 10;
}
}

// User defined info
message UserDefinedInfo{
// repeated user defined info
repeated UserDefinedInfo user_info = 1;

// key/value which contains both scalar and dict
map<string, UserDefinedInfo> map_dict = 2;
map<string, int32> map_int32 = 3;
map<string, string> map_str = 4;
map<string, double> map_double = 5;
}

// TrainLineage records infos of a train.
message TrainLineage{
message HyperParameters{
optional string optimizer = 1;
optional float learning_rate = 2;
optional string loss_function = 3;
optional int32 epoch = 4;
optional string parallel_mode = 5;
optional int32 device_num = 6;
optional int32 batch_size = 8;
}

message TrainDataset{
optional string train_dataset_path = 1;
optional int32 train_dataset_size = 2;
}

message Algorithm{
optional string network = 1;
optional float loss = 2;
}

message Model{
optional string path = 3;
optional int64 size = 4;
}

optional HyperParameters hyper_parameters = 1;
optional TrainDataset train_dataset = 2;
optional Algorithm algorithm = 3;
optional Model model = 4;
}

//EvalLineage records infos of evaluation.
message EvaluationLineage{
message ValidDataset{
optional string valid_dataset_path = 1;
optional int32 valid_dataset_size = 2;
}

optional string metric = 2;
optional ValidDataset valid_dataset = 3;
}


// DatasetGraph
message DatasetGraph {
repeated DatasetGraph children = 1;
optional OperationParameter parameter = 2;
repeated Operation operations = 3;
optional Operation sampler = 4;
}

message Operation {
optional OperationParameter operationParam = 1;
repeated int32 size = 2;
repeated float weights = 3;
}

message OperationParameter{
map<string, string> mapStr = 1;
map<string, StrList> mapStrList = 2;
map<string, bool> mapBool = 3;
map<string, int32> mapInt = 4;
map<string, double> mapDouble = 5;
}

message StrList {
repeated string strValue = 1;
}

+ 0
- 1246
mindinsight/datavisual/proto_files/mindinsight_lineage_pb2.py
File diff suppressed because it is too large
View File


+ 70
- 79
mindinsight/datavisual/proto_files/mindinsight_summary.proto View File

@@ -39,12 +39,60 @@ message Event {
// Summary data
Summary summary = 5;

// Train lineage
TrainLineage train_lineage = 6;

Explain explain = 6;
// Evaluation lineage
EvaluationLineage evaluation_lineage = 7;

// dataset graph
DatasetGraph dataset_graph = 9;
}
}

// TrainLineage records infos of a train.
message TrainLineage{
message HyperParameters{
optional string optimizer = 1;
optional float learning_rate = 2;
optional string loss_function = 3;
optional int32 epoch = 4;
optional string parallel_mode = 5;
optional int32 device_num = 6;
optional int32 batch_size = 8;
}

message TrainDataset{
optional string train_dataset_path = 1;
optional int32 train_dataset_size = 2;
}

message Algorithm{
optional string network = 1;
optional float loss = 2;
}

message Model{
optional string path = 3;
optional int64 size = 4;
}

optional HyperParameters hyper_parameters = 1;
optional TrainDataset train_dataset = 2;
optional Algorithm algorithm = 3;
optional Model model = 4;
}

//EvalLineage records infos of evaluation.
message EvaluationLineage{
message ValidDataset{
optional string valid_dataset_path = 1;
optional int32 valid_dataset_size = 2;
}

optional string metric = 2;
optional ValidDataset valid_dataset = 3;
}

// A Summary is a set of named values that be produced regularly during training
message Summary {
@@ -64,30 +112,6 @@ message Summary {
required bytes encoded_image = 4;
}

message Histogram {
message bucket{
// Counting number of values fallen in [left, left + width).
// For the rightmost bucket, the range is [left, left + width].
required double left = 1;
required double width = 2;
required int64 count = 3;
}

repeated bucket buckets = 1;
optional int64 nan_count = 2;
optional int64 pos_inf_count = 3;
optional int64 neg_inf_count = 4;

// max, min, sum will not take nan and inf into account.
// If there is no valid value in tensor, max and min will be nan, sum will be 0.
optional double max = 5;
optional double min = 6;
optional double sum = 7;

// total number of values. including nan and inf.
optional int64 count = 8;
}

message Value {
// Tag name for the data.
required string tag = 1;
@@ -97,7 +121,6 @@ message Summary {
float scalar_value = 3;
Image image = 4;
TensorProto tensor = 8;
Histogram histogram = 9;
}
}

@@ -105,60 +128,28 @@ message Summary {
repeated Value value = 1;
}

// DatasetGraph
message DatasetGraph {
repeated DatasetGraph children = 1;
optional OperationParameter parameter = 2;
repeated Operation operations = 3;
optional Operation sampler = 4;
}

message Explain {
message Inference{
repeated float ground_truth_prob = 1;
repeated int32 predicted_label = 2;
repeated float predicted_prob = 3;
repeated float ground_truth_prob_sd = 4;
repeated float ground_truth_prob_itl95_low = 5;
repeated float ground_truth_prob_itl95_hi = 6;
repeated float predicted_prob_sd = 7;
repeated float predicted_prob_itl95_low = 8;
repeated float predicted_prob_itl95_hi = 9;
}

message Explanation{
optional string explain_method = 1;
optional int32 label = 2;
optional string heatmap_path = 3;
}

message Benchmark{
optional string benchmark_method = 1;
optional string explain_method = 2;
optional float total_score = 3;
repeated float label_score = 4;
}

message Metadata{
repeated string label = 1;
repeated string explain_method = 2;
repeated string benchmark_method = 3;
}

message HocLayer{
optional float prob = 1;
repeated int32 box = 2; // List of repeated x, y, w, h
}

message Hoc {
optional int32 label = 1;
optional string mask = 2;
repeated HocLayer layer = 3;
}

optional int32 sample_id = 1; // The Metadata and sample id must have one fill in
optional string image_path = 2;
repeated int32 ground_truth_label = 3;

optional Inference inference = 4;
repeated Explanation explanation = 5;
repeated Benchmark benchmark = 6;
message Operation {
optional OperationParameter operationParam = 1;
repeated int32 size = 2;
repeated float weights = 3;
}

optional Metadata metadata = 7;
optional string status = 8; // enum value: run, end
message OperationParameter{
map<string, string> mapStr = 1;
map<string, StrList> mapStrList = 2;
map<string, bool> mapBool = 3;
map<string, int32> mapInt = 4;
map<string, double> mapDouble = 5;
}

repeated Hoc hoc = 9; // hierarchical occlusion counterfactual
message StrList {
repeated string strValue = 1;
}

+ 561
- 314
mindinsight/datavisual/proto_files/mindinsight_summary_pb2.py
File diff suppressed because it is too large
View File


+ 57
- 0
mindinsight/datavisual/utils/crc32/base.h View File

@@ -0,0 +1,57 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef DATAVISUAL_UTILS_CRC32_BASE_H_
#define DATAVISUAL_UTILS_CRC32_BASE_H_

#include <memory>
#include <string>
#include "securec/include/securec.h"

using string = std::string;

using int8 = int8_t;
using int16 = int16_t;
using int32 = int32_t;
using int64 = int64_t;

using uint8 = uint8_t;
using uint16 = uint16_t;
using uint32 = uint32_t;
using uint64 = uint64_t;

// check the null point, Only log it in if(): The value is null
#define EXCEPT_CHECK_NULL(value) \
do { \
if (value == nullptr) { \
break; \
} \
} while (0)

// implement common define function
// Get the 32 bits align value
inline uint32 DecodeFixed32(const char* ptr) {
uint32 result = 0;
if (EOK != memcpy_s(&result, sizeof(result), ptr, sizeof(result))) {
return result;
}
return result;
}

// Used to fetch a naturally-aligned 32-bit word in little endian byte-order
inline uint32 LE_LOAD32(const uint8_t* p) { return DecodeFixed32(reinterpret_cast<const char*>(p)); }

#endif // DATAVISUAL_UTILS_CRC32_BASE_H_

+ 26
- 23
mindinsight/datavisual/utils/crc32/crc32.cc View File

@@ -14,10 +14,12 @@
* limitations under the License.
*/

#include "datavisual/utils/crc32/crc32.h"
#include "crc32/crc32.h"
#include <stdint.h>

const unsigned int CRC_TABLE_SIZE = 256;

static const uint32_t crc_table_o32[CRC_TABLE_SIZE] = {
static const uint32 crc_table_o32[CRC_TABLE_SIZE] = {
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, 0x8AD958CF,
0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, 0x105EC76F, 0xE235446C,
0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57,
@@ -48,7 +50,7 @@ static const uint32_t crc_table_o32[CRC_TABLE_SIZE] = {
0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351};

static const uint32_t crc_table_o40[CRC_TABLE_SIZE] = {
static const uint32 crc_table_o40[CRC_TABLE_SIZE] = {
0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945, 0x9D14C3B8,
0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, 0x3FC5F181, 0x2C6769F6,
0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4, 0xA2D13239, 0xB173AA4E, 0x859402D7,
@@ -79,7 +81,7 @@ static const uint32_t crc_table_o40[CRC_TABLE_SIZE] = {
0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B, 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F,
0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483};

static const uint32_t crc_table_o48[CRC_TABLE_SIZE] = {
static const uint32 crc_table_o48[CRC_TABLE_SIZE] = {
0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469, 0x38513EC5,
0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, 0x70A27D8A, 0xD5E3EFF4,
0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3, 0x48F3434F, 0xEDB2D131, 0x079C1142,
@@ -110,7 +112,7 @@ static const uint32_t crc_table_o48[CRC_TABLE_SIZE] = {
0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D, 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2,
0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8};

static const uint32_t crc_table_o56[CRC_TABLE_SIZE] = {
static const uint32 crc_table_o56[CRC_TABLE_SIZE] = {
0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA, 0xF64463E6,
0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, 0xE964B13D, 0x34211B85,
0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7, 0x1F20D2DB, 0xC2657863, 0xA047F15A,
@@ -141,7 +143,7 @@ static const uint32_t crc_table_o56[CRC_TABLE_SIZE] = {
0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4, 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1,
0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842};

static const uint32_t crc_table_o64[CRC_TABLE_SIZE] = {
static const uint32 crc_table_o64[CRC_TABLE_SIZE] = {
0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44, 0xC5670B91,
0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, 0x8F2261D3, 0xB7330E7F,
0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97, 0x4A456A42, 0x725405EE, 0x3A67B51A,
@@ -172,7 +174,7 @@ static const uint32_t crc_table_o64[CRC_TABLE_SIZE] = {
0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532, 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013,
0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3};

static const uint32_t crc_table_o72[CRC_TABLE_SIZE] = {
static const uint32 crc_table_o72[CRC_TABLE_SIZE] = {
0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD, 0x6006181F,
0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, 0xC00C303E, 0x2F3C5B27,
0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93, 0xA00A2821, 0x4F3A4338, 0x7B8688E2,
@@ -203,7 +205,7 @@ static const uint32_t crc_table_o72[CRC_TABLE_SIZE] = {
0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623, 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B,
0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C};

static const uint32_t crc_table_o80[CRC_TABLE_SIZE] = {
static const uint32 crc_table_o80[CRC_TABLE_SIZE] = {
0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089, 0x4E2DFD53,
0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, 0x9C5BFAA6, 0xF458D66E,
0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F, 0xD27607F5, 0xBA752B3D, 0x02705E65,
@@ -234,7 +236,7 @@ static const uint32_t crc_table_o80[CRC_TABLE_SIZE] = {
0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C, 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E,
0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F};

static const uint32_t crc_table_o88[CRC_TABLE_SIZE] = {
static const uint32 crc_table_o88[CRC_TABLE_SIZE] = {
0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504, 0x423B04DA,
0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, 0x847609B4, 0xCD4A7493,
0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0, 0xC64D0D6E, 0x8F717049, 0x5435F720,
@@ -266,21 +268,22 @@ static const uint32_t crc_table_o88[CRC_TABLE_SIZE] = {
0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5};

// Use the 8 table to calc crc32c value
inline void CRC32T8(uint32_t *crc, const uint8_t **p) {
auto c = static_cast<uint32_t>(*crc ^ LE_LOAD32(*p));
inline void CRC32T8(uint32 *crc, const uint8_t **p) {
auto c = static_cast<uint32>(*crc ^ LE_LOAD32(*p));
*p += 4;
*crc = crc_table_o88[c & 0xFF] ^ crc_table_o80[(c >> 8) & 0xFF] ^ crc_table_o72[(c >> 16) & 0xFF] ^
crc_table_o64[(c >> 24) & 0xFF];
c = static_cast<uint32_t>(LE_LOAD32(*p));
*crc = (*crc) ^ crc_table_o56[c & 0xFF] ^ crc_table_o48[(c >> 8) & 0xFF] ^ crc_table_o40[(c >> 16) & 0xFF] ^
crc_table_o32[(c >> 24) & 0xFF];
*crc = crc_table_o88[c & 0xff] ^ crc_table_o80[(c >> 8) & 0xff] ^ crc_table_o72[(c >> 16) & 0xff] ^
crc_table_o64[(c >> 24) & 0xff];
c = static_cast<uint32>(LE_LOAD32(*p));
*crc = (*crc) ^ crc_table_o56[c & 0xff] ^ crc_table_o48[(c >> 8) & 0xff] ^ crc_table_o40[(c >> 16) & 0xff] ^
crc_table_o32[(c >> 24) & 0xff];
*p += 4;
}

// calc the crc32c value
uint32_t MakeCrc32c(uint32_t init_crc, const char *data, size_t size) {
uint32_t crc = init_crc ^ 0xFFFFFFFFU;
const int OFFSET = 8;
uint32 MakeCrc32c(uint32 init_crc, const char *data, size_t size) {
EXCEPT_CHECK_NULL(data);
uint32 crc = init_crc ^ 0xffffffffu;
const unsigned int OFFSET = 8;

// Get the origin begin and end address(not alignment)
auto *bp = reinterpret_cast<const uint8_t *>(data);
@@ -293,10 +296,10 @@ uint32_t MakeCrc32c(uint32_t init_crc, const char *data, size_t size) {
auto *bp_align = reinterpret_cast<const uint8_t *>(MEM_ALIGN(pval, 2));

// process the not alignment bits when size < 4 byte
if (bp_align <= ep && bp < bp_align) {
if (bp_align <= ep) {
// Process bytes until finished or p is 4-byte aligned
while (bp != bp_align) {
crc = crc_table_o32[(crc ^ (*bp++)) & 0xFF] ^ (crc >> 8);
crc = crc_table_o32[(crc & 0xff) ^ (*bp++)] ^ (crc >> 8);
}
}

@@ -307,7 +310,7 @@ uint32_t MakeCrc32c(uint32_t init_crc, const char *data, size_t size) {

// Process the last not alignment bytes
while (bp < ep) {
crc = crc_table_o32[(crc ^ (*bp++)) & 0xFF] ^ (crc >> 8);
crc = crc_table_o32[(crc & 0xff) ^ (*bp++)] ^ (crc >> 8);
}
return crc ^ 0xFFFFFFFFU;
return crc ^ 0xffffffffu;
}

+ 14
- 37
mindinsight/datavisual/utils/crc32/crc32.h View File

@@ -17,61 +17,38 @@
#ifndef DATAVISUAL_UTILS_CRC32_CRC32_H_
#define DATAVISUAL_UTILS_CRC32_CRC32_H_

#include <cstddef>
#include <pybind11/pybind11.h>
#include <stddef.h>
#include <cstdint>
#include "pybind11/pybind11.h"
#include "securec/include/securec.h"

#define CRC_TABLE_SIZE 256
#define RIGHT_SHIFT 15
#define LEFT_SHIFT 17
#include "crc32/base.h"

// Align n to (1 << m) byte boundary
#define MEM_ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1))

// implement common define function
// Get the 32 bits align value
inline uint32_t DecodeFixed32(const char* ptr) {
uint32_t result = 0;
if (EOK != memcpy_s(&result, sizeof(result), ptr, sizeof(result))) {
// `0` indicates that something wrong happened
return 0;
}
return result;
}

// Used to fetch a naturally-aligned 32-bit word in little endian byte-order
inline uint32_t LE_LOAD32(const uint8_t* p) { return DecodeFixed32(reinterpret_cast<const char*>(p)); }

// Masked for crc.
static constexpr uint32_t kMaskDelta = 0xA282EAD8U;
static constexpr uint32 kMaskDelta = 0xa282ead8ul;

// Provide the Crc32c function

// Calculate the crc32c value, use the 8 table method
uint32_t MakeCrc32c(uint32_t init_crc, const char* data, size_t size);
uint32 MakeCrc32c(uint32 init_crc, const char* data, size_t size);

// A function return the crc32c value
uint32_t GetMaskCrc32cValue(const char* data, size_t n) {
if (data == nullptr) {
// Return early to prevent MakeCrc32c resulting in segmentfault
return 0;
}
uint32_t crc = MakeCrc32c(0, data, n);
return ((crc >> RIGHT_SHIFT) | (crc << LEFT_SHIFT)) + kMaskDelta;
uint32 GetMaskCrc32cValue(const char* data, size_t n) {
auto crc = MakeCrc32c(0, data, n);
return crc;
}

// A function check the crc32c value against data
bool CheckValueAgainstData(const char* crc_str, const char* data, size_t size) {
uint32_t crc_new = GetMaskCrc32cValue(data, size);
uint32_t crc_old = DecodeFixed32(crc_str);
return crc_new == crc_old;
uint32 GetValueFromStr(const char* crc_str) {
uint32 crc = DecodeFixed32(crc_str);
uint32 rot = crc - kMaskDelta;
return ((rot >> 17) | (rot << 15));
}

PYBIND11_MODULE(crc32, m) {
m.doc() = "crc util";
m.def("MakeCrc32c", &MakeCrc32c, "A function calculating the crc32c value, use the 8 table method");
m.def("GetMaskCrc32cValue", &GetMaskCrc32cValue, "A function return the crc32c value");
m.def("CheckValueAgainstData", &CheckValueAgainstData, "A function check the crc32c value against data");
m.def("GetValueFromStr", &GetValueFromStr, "A function return the crc32c value from string");
}

#endif // DATAVISUAL_UTILS_CRC32_CRC32_H_

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save