| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
3beccdc6d3 |
!1296 update the version from 1.2.0-rc1 to 1.2.0
From: @luopengting Reviewed-by: @yelihua,@ouwenchang,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
7e7036282a |
!1294 update the version from 1.2.0-rc1 to 1.2.0 in ReleaseNotes
From: @luopengting Reviewed-by: @yelihua,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
d6ae14253a | update the version from 1.2.0-rc1 to 1.2.0 | 4 years ago |
|
|
08e7835940 | update the version from 1.2.0-rc1 to 1.2.0 in ReleaseNotes | 4 years ago |
|
|
823ed4ae96 |
!1293 UI modify the Map node matching type to exact match of data map page
From: @huang_wei_feng4 Reviewed-by: @yelihua,@wenkai_dist Signed-off-by: @lilongfei15 |
4 years ago |
|
|
29fd514bc0 | UI modify the Map node matching type to exact match of data map page | 4 years ago |
|
|
92fdd50de3 |
!1291 UI configure the fields returned by the interface of data map page
From: @huang_wei_feng4 Reviewed-by: @yelihua,@ouwenchang Signed-off-by: @ouwenchang |
4 years ago |
|
|
708029d21a | UI configure the fields returned by the interface of data map page | 4 years ago |
|
|
79ef500ab9 |
!1283 replace the status with '/' on supported_model_list_cn.md
From: @shenghong96 Reviewed-by: @wenkai_dist,@ouwenchang Signed-off-by: @lilongfei15 |
4 years ago |
|
|
b406985173 | replace the status with '/' | 4 years ago |
|
|
690b8117fb |
!1278 Modify the prompt message that step trace does not support heterogeneous training
From: @feng_xue_feng Reviewed-by: @yelihua,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
d4723c056c | Modify the prompt message that step trace does not support heterogeneous training | 4 years ago |
|
|
cc29188882 |
!1275 StepTrace of UI shows not support hetergeneous training scene
From: @gzhcv Reviewed-by: @ouwenchang,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
064d7e0425 | StepTrace of UI shows not support hetergeneous training scenen | 4 years ago |
|
|
2225bc0932 |
!1267 fix release notes
From: @luopengting Reviewed-by: @ouwenchang,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
13f52b8697 | fix release notes | 4 years ago |
|
|
26bfbf9a4c |
!1265 update release notes
From: @luopengting Reviewed-by: @lvmingfu,@ouwenchang,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
a6370bcf80 | update release notes | 4 years ago |
|
|
4e69e7a345 |
!1264 modify version from 1.2.0 to 1.2.0-rc1
From: @luopengting Reviewed-by: @yelihua,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
6c46c78bab | modify version to 1.2.0-rc1 | 4 years ago |
|
|
7008415192 |
!1262 move the contributor section to the end
From: @luopengting Reviewed-by: @ouwenchang,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
5462c8e6f1 | move the contributor section to the end | 4 years ago |
|
|
576a2c4681 |
!1259 Return empty step trace data to UI in heterogeneous training scene
From: @gzhcv Reviewed-by: @ouwenchang,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
cadc562f38 |
!1260 add release notes for r1.2-rc1
From: @luopengting Reviewed-by: @ouwenchang,@lilongfei15 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
281396270c | add release notes for r1.2-rc1 | 4 years ago |
|
|
439f135448 | Return empty step trace data to UI in heterogeneous training scene | 4 years ago |
|
|
7c4b86bea6 |
!1257 Add security warning of model file
From: @liuchongming74 Reviewed-by: @lilongfei15,@tangjr14 Signed-off-by: @lilongfei15 |
4 years ago |
|
|
7a2632b081 | Add security warning of MindConverter. | 4 years ago |
|
|
d1b5ef8639 |
!1253 Remove PTH port in mindconverter.
From: @moran3 Reviewed-by: @ouwenchang,@yelihua Signed-off-by: @yelihua |
4 years ago |
|
|
fd4c50f634 |
!1254 fix the timeout limitation from 5s to 3s.
From: @jiang-shuqiang Reviewed-by: @ouwenchang,@yelihua Signed-off-by: @yelihua |
4 years ago |
|
|
85eed7661c | fix the timeout limitation from 5s to 3s. | 4 years ago |
|
|
9eb2cac739 | remove PTH port in mindconverter & fix README | 4 years ago |
|
|
ce94be7025 |
!1252 refactored the computing resource manager to ease the elegant exiting procedure
From: @wenkai_dist Reviewed-by: @ouwenchang,@yelihua,@ouwenchang Signed-off-by: @ouwenchang |
4 years ago |
|
|
2d553f2e79 |
Refactored the computing resource manager to:
1. Ease the elegant exiting procedure by providing a singleton access pattern. Now the exiting procedure can easily call the exit_non_block method to notify the executors to start exiting worker processes. If the worker processes not exit after a timeout, the mindinsight stop still can kill these processes. 2. Build a framework for managing computing resources used by different businesses. Different businesses can now call the get_executor() API to get their process pool to execute compute heavy tasks outside the mindinsight guinicorn worker process, which will make the restful api respond more quickly. |
4 years ago |
|
|
b5d624bccc |
!1250 fix the content and format of project description on pypi platform
From: @shenghong96 Reviewed-by: @ouwenchang,@yelihua Signed-off-by: @yelihua |
4 years ago |
|
|
d8664ab6c7 |
!1249 add tutorial for adding mapper in MindConvertor in r1.2
From: @moran3 Reviewed-by: @yelihua,@liuchongming74 Signed-off-by: |
4 years ago |
|
|
10d2590d8a | add tutorial for adding mapper in MindConverter | 4 years ago |
|
|
9f591d847d | fix the content and format of package description on pypi platform | 4 years ago |
| @@ -1,60 +1,124 @@ | |||
| # MindInsight 1.1.0 Release Notes | |||
| # MindInsight 1.2.0 | |||
| ## Major Features and Improvements | |||
| ## MindInsight 1.2.0 Release Notes | |||
| ### Precision tuning framework | |||
| ### Major Features and Improvements | |||
| * Support useful checks on weights, activations, gradients and tensors, such as: | |||
| * check unchanged weight | |||
| * check weight change above threshold | |||
| * check activation range | |||
| * check gradient vanishing | |||
| * check tensor overflow | |||
| * Support rechecking with new watch points on the same data. | |||
| * Newly designed tensor view with fix suggestions and tensor context to quickly locate root cause of problems. | |||
| * Support recommending watch points to find common precision problems. | |||
| * Support debugger on multigraph network. | |||
| #### Profiling | |||
| ### Profiler | |||
| - [STABLE] Support memory profiling.(Ascend) | |||
| - [STABLE] Support host cpu utilization profiling.(Ascend/GPU) | |||
| - [STABLE] Support timeline for Host&Device Hybrid Training.(Ascend/GPU) | |||
| - [STABLE] Support show step breakdown information(Step Interval, Forward and Backward Propagation, and Step Tail) of each device in cluster profiling ui page.(Ascend) | |||
| * Support GPU step trace profiling. | |||
| * Support GPU minddata profiling. | |||
| #### MindConverter | |||
| ### MindConverter | |||
| - [STABLE] Support both classic computer vision and bert model definition script and trained weights migration from TensorFlow or PyTorch. | |||
| - [STABLE] Support ONNX model migration to improve the usability of PyTorch model migration. | |||
| * Support TensorFlow model definition script to MindSpore for CV field. | |||
| * Conversion capability of PyTorch is enhanced. | |||
| #### Model Explanation | |||
| ### Model Explanation | |||
| - [STABLE] Support counterfactual explanation for image classification. | |||
| ### API Change | |||
| #### Backwards Compatible Change | |||
| ##### Python API | |||
| ###### add parameter `export_options` for `SummaryCollector` and `SummaryRecord`([!10881](https://gitee.com/mindspore/mindspore/pulls/10881)) | |||
| Perform custom operations on the export data. You can customize the export data with a dictionary. For example, you can set `{'tensor_format': 'npy'}` to export tensor as npy file. | |||
| ###### add parameter `raise_exception` for `SummaryRecord`([!10436](https://gitee.com/mindspore/mindspore/pulls/10436)) | |||
| The parameter `raise_exception` determines whether to throw an exception when an exception occurs. | |||
| ###### add API `register_uncertainty` for `explainer.ImageClassificationRunner`([!11309](https://gitee.com/mindspore/mindspore/pulls/11309)) | |||
| `register_uncertainty` helps register uncertainty instance to compute the epistemic uncertainty base on the Bayes’ theorem. | |||
| ###### add API `register_hierarchical_occlusion` for `explainer.ImageClassificationRunner`([!11309](https://gitee.com/mindspore/mindspore/pulls/11309)) | |||
| `register_hierarchical_occlusion` helps register hierarchical occlusion instances. | |||
| ##### Command Line Interface | |||
| ###### `MindConverter` removes support for pth format model, `--project_path` deleted([!1253](https://gitee.com/mindspore/mindinsight/pulls/1253)) | |||
| The pth format model is not supported anymore, please use ONNX to migrate. | |||
| ### Bug fixes | |||
| - Error information missing when running on an unsupported device (e.g, cpu). [!11801](https://gitee.com/mindspore/mindspore/pulls/11801) | |||
| ### Contributors | |||
| Thanks goes to these wonderful people: | |||
| Congli Gao, Longfei Li, Yongxiong Liang, Chongming Liu, Pengting Luo, Yanming Miao, Gongchang Ou, Kai Wen, Yue Wang, Lihua Ye, Ximiao Yu, Yunshu Zhang, Ning Ma, Yihui Zhang, Hong Sheng, Ran Mo, Zhaohong Guo, Tianshu Liang, Shuqiang Jiang, Yanjun Peng, Haitao Yang, Jiabin Liu, Han Gao, Xiaohui Li, Ngaifai Ng, Hui Pan, Weifeng Huang, Yifan Xia, Xuefeng Feng, Yanxi Wei. | |||
| Contributions of any kind are welcome! | |||
| # MindInsight 1.1.0 | |||
| ## MindInsight 1.1.0 Release Notes | |||
| ### Major Features and Improvements | |||
| #### Precision tuning framework | |||
| - Support useful checks on weights, activations, gradients and tensors, such as: | |||
| - check unchanged weight | |||
| - check weight change above threshold | |||
| - check activation range | |||
| - check gradient vanishing | |||
| - check tensor overflow | |||
| - Support rechecking with new watch points on the same data. | |||
| - Newly designed tensor view with fix suggestions and tensor context to quickly locate root cause of problems. | |||
| - Support recommending watch points to find common precision problems. | |||
| - Support debugger on multigraph network. | |||
| #### Profiler | |||
| - Support GPU step trace profiling. | |||
| - Support GPU minddata profiling. | |||
| #### MindConverter | |||
| - Support TensorFlow model definition script to MindSpore for CV field. | |||
| - Conversion capability of PyTorch is enhanced. | |||
| #### Model Explanation | |||
| Provide explanations and their benchmarks for image classification deep CNN models. | |||
| * Support 6 explanation methods: Gradient, Deconvolution, GuidedBackprop, GradCAM, RISE, Occlusion | |||
| * Support 4 benchmark methods: Localization, Faithfulness, Class Sensitivity, Robustness | |||
| * Provide a high-level API (ImageClassificationRunner) for users to execute explanation methods and benchmark methods and store the results easily. | |||
| - Support 6 explanation methods: Gradient, Deconvolution, GuidedBackprop, GradCAM, RISE, Occlusion | |||
| - Support 4 benchmark methods: Localization, Faithfulness, Class Sensitivity, Robustness | |||
| - Provide a high-level API (ImageClassificationRunner) for users to execute explanation methods and benchmark methods and store the results easily. | |||
| ## API Change | |||
| ### API Change | |||
| ### Improvements | |||
| #### Improvements | |||
| #### Command Line Interface | |||
| ##### Command Line Interface | |||
| * `--enable_debugger`: Support both 1 and True ([!1051](https://gitee.com/mindspore/mindinsight/pulls/1051)) | |||
| * `ENABLE_MS_DEBUGGER`: Support both 1 and True ([!10199](https://gitee.com/mindspore/mindspore/pulls/10199)) | |||
| * `parse_summary`: Add parse_summary function to convert summary file to image file and csv file ([!774](https://gitee.com/mindspore/mindinsight/pulls/774)) | |||
| - `--enable_debugger`: Support both 1 and True ([!1051](https://gitee.com/mindspore/mindinsight/pulls/1051)) | |||
| - `ENABLE_MS_DEBUGGER`: Support both 1 and True ([!10199](https://gitee.com/mindspore/mindspore/pulls/10199)) | |||
| - `parse_summary`: Add parse_summary function to convert summary file to image file and csv file ([!774](https://gitee.com/mindspore/mindinsight/pulls/774)) | |||
| ## Bugfixes | |||
| ### Bugfixes | |||
| ### Profiler | |||
| #### Profiler | |||
| * Fix parser framework file error if the profiling data of one op is saved separately to two files.([!7824](https://gitee.com/mindspore/mindspore/pulls/7824)) | |||
| - Fix parser framework file error if the profiling data of one op is saved separately to two files.([!7824](https://gitee.com/mindspore/mindspore/pulls/7824)) | |||
| ### Model Explanation | |||
| #### Model Explanation | |||
| * Add reset_offset when CRCLengthError and CRCError happen([!955](https://gitee.com/mindspore/mindinsight/pulls/955)) | |||
| * FIx the bug which ignore the sample_event when sample_id == 0.([!968](https://gitee.com/mindspore/mindinsight/pulls/968)) | |||
| - Add reset_offset when CRCLengthError and CRCError happen([!955](https://gitee.com/mindspore/mindinsight/pulls/955)) | |||
| - FIx the bug which ignore the sample_event when sample_id == 0.([!968](https://gitee.com/mindspore/mindinsight/pulls/968)) | |||
| ## Thanks to our Contributors | |||
| ### Thanks to our Contributors | |||
| Thanks goes to these wonderful people: | |||
| @@ -62,22 +126,24 @@ Congli Gao, Jianfeng Zhu, Zhenzhong Kou, Longfei Li, Yongxiong Liang, Chongming | |||
| Contributions of any kind are welcome! | |||
| # MindInsight 1.0.0 Release Notes | |||
| # MindInsight 1.0.0 | |||
| ## MindInsight 1.0.0 Release Notes | |||
| ## Major Features and Improvements | |||
| ### Major Features and Improvements | |||
| * Release MindSpore Debugger. | |||
| * MindConverter ability is enhanced, supporting scripts generation based on PyTorch model. | |||
| * Support training hyper-parameter importance visualization. | |||
| * Support GPU timeline. | |||
| - Release MindSpore Debugger. | |||
| - MindConverter ability is enhanced, supporting scripts generation based on PyTorch model. | |||
| - Support training hyper-parameter importance visualization. | |||
| - Support GPU timeline. | |||
| ## Bugfixes | |||
| ### Bugfixes | |||
| * Optimize aicpu display method. ([!595](https://gitee.com/mindspore/mindinsight/pulls/595/files)) | |||
| * Add the summary loading switch mechanism. ([!601](https://gitee.com/mindspore/mindinsight/pulls/601/files)) | |||
| * Detect a summary dir having summary files or not. ([!632](https://gitee.com/mindspore/mindinsight/pulls/632/files)) | |||
| - Optimize aicpu display method. ([!595](https://gitee.com/mindspore/mindinsight/pulls/595/files)) | |||
| - Add the summary loading switch mechanism. ([!601](https://gitee.com/mindspore/mindinsight/pulls/601/files)) | |||
| - Detect a summary dir having summary files or not. ([!632](https://gitee.com/mindspore/mindinsight/pulls/632/files)) | |||
| ## Thanks to our Contributors | |||
| ### Thanks to our Contributors | |||
| Thanks goes to these wonderful people: | |||
| @@ -85,21 +151,23 @@ Congli Gao, Jianfeng Zhu, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Lia | |||
| Contributions of any kind are welcome! | |||
| # MindInsight 0.7.0-beta Release Notes | |||
| # MindInsight 0.7.0-beta | |||
| ## Major Features and Improvements | |||
| ## MindInsight 0.7.0 Release Notes | |||
| * Optimize node name display in computation graph. | |||
| * MindSpore Profiler supports network training with GPU operators. | |||
| * MindWizard generates classic network scripts according to user preference. | |||
| * Web UI supports language internationalization, including both Chinese and English. | |||
| ### Major Features and Improvements | |||
| ## Bugfixes | |||
| - Optimize node name display in computation graph. | |||
| - MindSpore Profiler supports network training with GPU operators. | |||
| - MindWizard generates classic network scripts according to user preference. | |||
| - Web UI supports language internationalization, including both Chinese and English. | |||
| * Optimize UI page initialization to handle timeout requests. ([!503](https://gitee.com/mindspore/mindinsight/pulls/503)) | |||
| * Fix the line break problem when the profiling file number is too long. ([!532](https://gitee.com/mindspore/mindinsight/pulls/532)) | |||
| ### Bugfixes | |||
| ## Thanks to our Contributors | |||
| - Optimize UI page initialization to handle timeout requests. ([!503](https://gitee.com/mindspore/mindinsight/pulls/503)) | |||
| - Fix the line break problem when the profiling file number is too long. ([!532](https://gitee.com/mindspore/mindinsight/pulls/532)) | |||
| ### Thanks to our Contributors | |||
| Thanks goes to these wonderful people: | |||
| @@ -107,22 +175,24 @@ Congli Gao, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Li | |||
| Contributions of any kind are welcome! | |||
| # MindInsight 0.6.0-beta Release Notes | |||
| # MindInsight 0.6.0-beta | |||
| ## MindInsight 0.6.0 Release Notes | |||
| ## Major Features and Improvements | |||
| ### Major Features and Improvements | |||
| * Provide monitoring capabilities for each of Ascend AI processor and other hardware resources, including CPU and memory. | |||
| * Visualization of weight, gradient and other tensor data in model training. | |||
| * Provide tabular from presentation of tensor data. | |||
| * Provide histogram to show the distribution of tensor data and its change over time. | |||
| - Provide monitoring capabilities for each of Ascend AI processor and other hardware resources, including CPU and memory. | |||
| - Visualization of weight, gradient and other tensor data in model training. | |||
| - Provide tabular from presentation of tensor data. | |||
| - Provide histogram to show the distribution of tensor data and its change over time. | |||
| ## Bugfixes | |||
| ### Bugfixes | |||
| * UI fix for the error message display mode of the tensor during real-time training. ([!465](https://gitee.com/mindspore/mindinsight/pulls/465)) | |||
| * The summary file size is larger than max_file_size. ([!3481](https://gitee.com/mindspore/mindspore/pulls/3481)) | |||
| * Fix real-time training error when disk is full. ([!3058](https://gitee.com/mindspore/mindspore/pulls/3058)) | |||
| - UI fix for the error message display mode of the tensor during real-time training. ([!465](https://gitee.com/mindspore/mindinsight/pulls/465)) | |||
| - The summary file size is larger than max_file_size. ([!3481](https://gitee.com/mindspore/mindspore/pulls/3481)) | |||
| - Fix real-time training error when disk is full. ([!3058](https://gitee.com/mindspore/mindspore/pulls/3058)) | |||
| ## Thanks to our Contributors | |||
| ### Thanks to our Contributors | |||
| Thanks goes to these wonderful people: | |||
| @@ -130,31 +200,33 @@ Congli Gao, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Longfei Li, Yongxiong Li | |||
| Contributions of any kind are welcome! | |||
| # MindInsight 0.5.0-beta Release Notes | |||
| # MindInsight 0.5.0-beta | |||
| ## Major Features and Improvements | |||
| ## MindInsight 0.5.0 Release Notes | |||
| * MindSpore Profiler | |||
| * Provide performance analyse tool for the input data pipeline. | |||
| * Provide timeline analyse tool, which can show the detail of the streams/tasks. | |||
| * Provide a tool to visualize the step trace information, which can be used to analyse the general performance of the neural network in each phase. | |||
| * Provide profiling guides for the users to find the performance bottlenecks quickly. | |||
| * CPU summary operations support for CPU summary data. | |||
| * Over threshold warn support in scalar training dashboard. | |||
| * Provide more user-friendly callback function for visualization | |||
| * Provide unified callback `SummaryCollector` to log most commonly visualization event. | |||
| * Discard the original visualization callback `SummaryStep`, `TrainLineage` and `EvalLineage`. | |||
| * `SummaryRecord` provide new API `add_value` to collect data into cache for summary persistence. | |||
| * `SummaryRecord` provide new API `set_mode` to distinguish summary persistence mode at different stages. | |||
| * MindConverter supports conversion of more operators and networks, and improves its ease of use. | |||
| ### Major Features and Improvements | |||
| ## Bugfixes | |||
| - MindSpore Profiler | |||
| - Provide performance analyse tool for the input data pipeline. | |||
| - Provide timeline analyse tool, which can show the detail of the streams/tasks. | |||
| - Provide a tool to visualize the step trace information, which can be used to analyse the general performance of the neural network in each phase. | |||
| - Provide profiling guides for the users to find the performance bottlenecks quickly. | |||
| - CPU summary operations support for CPU summary data. | |||
| - Over threshold warn support in scalar training dashboard. | |||
| - Provide more user-friendly callback function for visualization | |||
| - Provide unified callback `SummaryCollector` to log most commonly visualization event. | |||
| - Discard the original visualization callback `SummaryStep`, `TrainLineage` and `EvalLineage`. | |||
| - `SummaryRecord` provide new API `add_value` to collect data into cache for summary persistence. | |||
| - `SummaryRecord` provide new API `set_mode` to distinguish summary persistence mode at different stages. | |||
| - MindConverter supports conversion of more operators and networks, and improves its ease of use. | |||
| * Fix FileNotFound exception by adding robust check for summary watcher ([!281](https://gitee.com/mindspore/mindinsight/pulls/281)). | |||
| * UI fix operator table sort jump problem ([!283](https://gitee.com/mindspore/mindinsight/pulls/283)). | |||
| * Dataset serializer return schema json str when schema type is `mindspore.dataset.engine.Schema` ([!2185](https://gitee.com/mindspore/mindspore/pulls/2185)). | |||
| ### Bugfixes | |||
| ## Thanks to our Contributors | |||
| - Fix FileNotFound exception by adding robust check for summary watcher ([!281](https://gitee.com/mindspore/mindinsight/pulls/281)). | |||
| - UI fix operator table sort jump problem ([!283](https://gitee.com/mindspore/mindinsight/pulls/283)). | |||
| - Dataset serializer return schema json str when schema type is `mindspore.dataset.engine.Schema` ([!2185](https://gitee.com/mindspore/mindspore/pulls/2185)). | |||
| ### Thanks to our Contributors | |||
| Thanks goes to these wonderful people: | |||
| @@ -162,31 +234,33 @@ Chao Chen, Congli Gao, Ye Huang, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Lon | |||
| Contributions of any kind are welcome! | |||
| # MindInsight 0.3.0-alpha Release Notes | |||
| # MindInsight 0.3.0-alpha | |||
| ## MindInsight 0.3.0 Release Notes | |||
| ## Major Features and Improvements | |||
| ### Major Features and Improvements | |||
| * Profiling | |||
| * Provide easy to use apis for profiling start/stop and profiling data analyse (on Ascend only). | |||
| * Provide operators performance display and analysis on MindInsight UI. | |||
| * Large scale network computation graph visualization. | |||
| * Optimize summary record implementation and improve its performance. | |||
| * Improve lineage usability | |||
| * Optimize lineage display and enrich tabular operation. | |||
| * Decouple lineage callback from `SummaryRecord`. | |||
| * Support scalar compare of multiple runs. | |||
| * Scripts conversion from other frameworks | |||
| * Support for converting PyTorch scripts within TorchVision to MindSpore scripts automatically. | |||
| - Profiling | |||
| - Provide easy to use apis for profiling start/stop and profiling data analyse (on Ascend only). | |||
| - Provide operators performance display and analysis on MindInsight UI. | |||
| - Large scale network computation graph visualization. | |||
| - Optimize summary record implementation and improve its performance. | |||
| - Improve lineage usability | |||
| - Optimize lineage display and enrich tabular operation. | |||
| - Decouple lineage callback from `SummaryRecord`. | |||
| - Support scalar compare of multiple runs. | |||
| - Scripts conversion from other frameworks | |||
| - Support for converting PyTorch scripts within TorchVision to MindSpore scripts automatically. | |||
| ## Bugfixes | |||
| ### Bugfixes | |||
| * Fix pb files loaded problem when files are modified at the same time ([!53](https://gitee.com/mindspore/mindinsight/pulls/53)). | |||
| * Fix load data thread stuck in `LineageCacheItemUpdater` ([!114](https://gitee.com/mindspore/mindinsight/pulls/114)). | |||
| * Fix samples from previous steps erased due to tags size too large problem ([!86](https://gitee.com/mindspore/mindinsight/pulls/86)). | |||
| * Fix image and histogram event package error ([!1143](https://gitee.com/mindspore/mindspore/pulls/1143)). | |||
| * Equally distribute histogram ignoring actual step number to avoid large white space ([!66](https://gitee.com/mindspore/mindinsight/pulls/66)). | |||
| - Fix pb files loaded problem when files are modified at the same time ([!53](https://gitee.com/mindspore/mindinsight/pulls/53)). | |||
| - Fix load data thread stuck in `LineageCacheItemUpdater` ([!114](https://gitee.com/mindspore/mindinsight/pulls/114)). | |||
| - Fix samples from previous steps erased due to tags size too large problem ([!86](https://gitee.com/mindspore/mindinsight/pulls/86)). | |||
| - Fix image and histogram event package error ([!1143](https://gitee.com/mindspore/mindspore/pulls/1143)). | |||
| - Equally distribute histogram ignoring actual step number to avoid large white space ([!66](https://gitee.com/mindspore/mindinsight/pulls/66)). | |||
| ## Thanks to our Contributors | |||
| ### Thanks to our Contributors | |||
| Thanks goes to these wonderful people: | |||
| @@ -194,27 +268,29 @@ Chao Chen, Congli Gao, Ye Huang, Weifeng Huang, Zhenzhong Kou, Hongzhang Li, Lon | |||
| Contributions of any kind are welcome! | |||
| # MindInsight 0.2.0-alpha Release Notes | |||
| # MindInsight 0.2.0-alpha | |||
| ## Major Features and Improvements | |||
| ## MindInsight 0.2.0 Release Notes | |||
| * Parameter distribution graph (Histogram). | |||
| ### Major Features and Improvements | |||
| - Parameter distribution graph (Histogram). | |||
| Now you can use [`HistogramSummary`](https://www.mindspore.cn/doc/api_python/en/master/mindspore/mindspore.ops.html#mindspore.ops.HistogramSummary) and MindInsight to record and visualize distribution info of tensors. See our [tutorial](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/visualization_tutorials.html). | |||
| * Lineage support Custom information | |||
| * GPU support | |||
| * Model and dataset tracking linkage support | |||
| - Lineage support Custom information | |||
| - GPU support | |||
| - Model and dataset tracking linkage support | |||
| ## Bugfixes | |||
| ### Bugfixes | |||
| * Reduce cyclomatic complexity of `list_summary_directories` ([!11](https://gitee.com/mindspore/mindinsight/pulls/11)). | |||
| * Fix unsafe functions and duplication files and redundant codes ([!14](https://gitee.com/mindspore/mindinsight/pulls/14)). | |||
| * Fix sha256 checksum missing bug ([!24](https://gitee.com/mindspore/mindinsight/pulls/24)). | |||
| * Fix graph bug when node name is empty ([!34](https://gitee.com/mindspore/mindinsight/pulls/34)). | |||
| * Fix start/stop command error code incorrect ([!44](https://gitee.com/mindspore/mindinsight/pulls/44)). | |||
| - Reduce cyclomatic complexity of `list_summary_directories` ([!11](https://gitee.com/mindspore/mindinsight/pulls/11)). | |||
| - Fix unsafe functions and duplication files and redundant codes ([!14](https://gitee.com/mindspore/mindinsight/pulls/14)). | |||
| - Fix sha256 checksum missing bug ([!24](https://gitee.com/mindspore/mindinsight/pulls/24)). | |||
| - Fix graph bug when node name is empty ([!34](https://gitee.com/mindspore/mindinsight/pulls/34)). | |||
| - Fix start/stop command error code incorrect ([!44](https://gitee.com/mindspore/mindinsight/pulls/44)). | |||
| ## Thanks to our Contributors | |||
| ### Thanks to our Contributors | |||
| Thanks goes to these wonderful people: | |||
| @@ -222,10 +298,12 @@ Ye Huang, Weifeng Huang, Zhenzhong Kou, Pengting Luo, Hongzhang Li, Yongxiong Li | |||
| Contributions of any kind are welcome! | |||
| # MindInsight 0.1.0-alpha Release Notes | |||
| # MindInsight 0.1.0-alpha | |||
| ## MindInsight 0.1.0 Release Notes | |||
| * Training process observation | |||
| * Provides and displays training process information, including computational graphs and training process indicators. | |||
| - Training process observation | |||
| - Provides and displays training process information, including computational graphs and training process indicators. | |||
| * Training result tracing | |||
| * Provides functions of tracing and visualizing model training parameter information, including filtering and sorting of training data, model accuracy and training hyperparameters. | |||
| - Training result tracing | |||
| - Provides functions of tracing and visualizing model training parameter information, including filtering and sorting of training data, model accuracy and training hyperparameters. | |||
| @@ -17,6 +17,7 @@ | |||
| import os | |||
| import time | |||
| import signal | |||
| import sys | |||
| import multiprocessing | |||
| import threading | |||
| from importlib import import_module | |||
| @@ -51,6 +52,11 @@ def on_starting(server): | |||
| threading.Thread(target=hook.on_startup, args=(server.log,)).start() | |||
| # This global variable is to manage the listen process so that we can close the | |||
| # process when gunicorn is exiting. | |||
| LISTEN_PROCESS = None | |||
| def post_worker_init(worker): | |||
| """ | |||
| Launch a process to listen worker after gunicorn worker is initialized. | |||
| @@ -62,7 +68,9 @@ def post_worker_init(worker): | |||
| worker (ThreadWorker): worker instance. | |||
| """ | |||
| def murder_worker_children_processes(): | |||
| signal.signal(signal.SIGTERM, signal.SIG_IGN) | |||
| signal.signal( | |||
| signal.SIGTERM, | |||
| lambda signal_num, handler: sys.exit(0)) | |||
| processes_to_kill = [] | |||
| # sleep 3 seconds so that all worker children processes have been launched. | |||
| time.sleep(3) | |||
| @@ -91,18 +99,15 @@ def post_worker_init(worker): | |||
| listen_process = multiprocessing.Process(target=murder_worker_children_processes, | |||
| name="murder_worker_children_processes") | |||
| listen_process.start() | |||
| global LISTEN_PROCESS | |||
| LISTEN_PROCESS = listen_process | |||
| worker.log.info("Server pid: %d, start to listening.", worker.ppid) | |||
| def worker_int(worker): | |||
| """Terminate child processes when worker is interrupted.""" | |||
| terminate() | |||
| process = psutil.Process(worker.pid) | |||
| children = process.children(recursive=True) | |||
| for child in children: | |||
| try: | |||
| child.send_signal(signal.SIGTERM) | |||
| except psutil.NoSuchProcess: | |||
| continue | |||
| except psutil.Error as ex: | |||
| worker.log.error("Stop process %d failed. Detail: %s.", child.pid, str(ex)) | |||
| global LISTEN_PROCESS | |||
| if LISTEN_PROCESS is not None: | |||
| LISTEN_PROCESS.terminate() | |||
| worker.log.info("Worker int processed.") | |||
| @@ -156,6 +156,13 @@ def get_training_trace_graph(): | |||
| }}) | |||
| graph_info['summary'] = analyser.summary | |||
| graph_info['point_info'] = analyser.point_info | |||
| graph_info['is_heterogeneous'] = False | |||
| # In heterogeneous training scene, do not display step trace data. | |||
| cpu_op_type_file_name = f"cpu_op_type_info_{device_id}.csv" | |||
| if cpu_op_type_file_name in os.listdir(profiler_dir_abs): | |||
| graph_info = {'is_heterogeneous': True} | |||
| return jsonify(graph_info) | |||
| @@ -912,18 +912,16 @@ class DataManager: | |||
| return | |||
| self.status = DataManagerStatus.LOADING.value | |||
| with ComputingResourceManager(executors_cnt=1, | |||
| max_processes_cnt=settings.MAX_PROCESSES_COUNT) as computing_resource_mgr: | |||
| with computing_resource_mgr.get_executor() as executor: | |||
| self._brief_cache.update_cache(executor) | |||
| brief_cache_update = time.time() | |||
| for _ in self._detail_cache.update_cache(executor): | |||
| update_interval = time.time() - brief_cache_update | |||
| logger.debug('Loading one round of detail cache taking %ss.', update_interval) | |||
| if update_interval > 3: # Use 3 seconds as threshold to avoid updating too often | |||
| self._brief_cache.update_cache(executor) | |||
| brief_cache_update += update_interval | |||
| executor.wait_all_tasks_finish() | |||
| with ComputingResourceManager.get_instance().get_executor( | |||
| max_processes_cnt=settings.MAX_PROCESSES_COUNT) as executor: | |||
| self._brief_cache.update_cache(executor) | |||
| brief_cache_update = time.time() | |||
| for _ in self._detail_cache.update_cache(executor): | |||
| update_interval = time.time() - brief_cache_update | |||
| logger.debug('Loading one round of detail cache taking %ss.', update_interval) | |||
| if update_interval > 3: # Use 3 seconds as threshold to avoid updating too often | |||
| self._brief_cache.update_cache(executor) | |||
| brief_cache_update += update_interval | |||
| with self._status_mutex: | |||
| if not self._brief_cache.has_content() and not self._detail_cache.has_content(): | |||
| self.status = DataManagerStatus.INVALID.value | |||
| @@ -102,12 +102,10 @@ class MSDataLoader: | |||
| if executor is not None: | |||
| raise TypeError("'executor' should be an Executor instance or None.") | |||
| with ComputingResourceManager() as mgr: | |||
| with mgr.get_executor() as new_executor: | |||
| while not self._load(new_executor): | |||
| pass | |||
| new_executor.wait_all_tasks_finish() | |||
| return True | |||
| with ComputingResourceManager.get_instance().get_executor() as new_executor: | |||
| while not self._load(new_executor): | |||
| pass | |||
| return True | |||
| def _load(self, executor): | |||
| """ | |||
| @@ -15,7 +15,6 @@ | |||
| - [Example](#example) | |||
| - [AST-Based Conversion](#ast-based-conversion) | |||
| - [Graph-Based Conversion](#graph-based-conversion) | |||
| - [PyTorch Model Scripts Conversion](#pytorch-model-scripts-conversion) | |||
| - [TensorFlow Model Scripts Conversion](#tensorflow-model-scripts-conversion) | |||
| - [ONNX Model File Conversion](#onnx-model-file-conversion) | |||
| - [Caution](#caution) | |||
| @@ -48,7 +47,6 @@ usage: mindconverter [-h] [--version] [--in_file IN_FILE] | |||
| [--input_nodes INPUT_NODES [INPUT_NODES ...]] | |||
| [--output_nodes OUTPUT_NODES [OUTPUT_NODES ...]] | |||
| [--output OUTPUT] [--report REPORT] | |||
| [--project_path PROJECT_PATH] | |||
| optional arguments: | |||
| -h, --help show this help message and exit | |||
| @@ -56,10 +54,10 @@ optional arguments: | |||
| --in_file IN_FILE Specify path for script file to use AST schema to do | |||
| script conversation. | |||
| --model_file MODEL_FILE | |||
| PyTorch(.pth), Tensorflow(.pb) or ONNX(.onnx) model | |||
| file path is expected to do script generation based on | |||
| graph schema. When `--in_file` and `--model_file` are | |||
| both provided, use AST schema as default. | |||
| Tensorflow(.pb) or ONNX(.onnx) model file path is | |||
| expected to do script generation based on graph | |||
| schema. When `--in_file` and `--model_file` are both | |||
| provided, use AST schema as default. | |||
| --shape SHAPE [SHAPE ...] | |||
| Optional, expected input tensor shape of | |||
| `--model_file`. It is required when use graph based | |||
| @@ -67,46 +65,31 @@ optional arguments: | |||
| with `--input_nodes`. Usage: --shape 1,512 1,512 | |||
| --input_nodes INPUT_NODES [INPUT_NODES ...] | |||
| Optional, input node(s) name of `--model_file`. It is | |||
| required when use TensorFlow and ONNX model. Both | |||
| order and number should be consistent with `--shape`. | |||
| Usage: --input_nodes input_1:0 input_2:0 | |||
| required when use graph based schema. Both order and | |||
| number should be consistent with `--shape`. Usage: | |||
| --input_nodes input_1:0 input_2:0 | |||
| --output_nodes OUTPUT_NODES [OUTPUT_NODES ...] | |||
| Optional, output node(s) name of `--model_file`. It is | |||
| required when use TensorFlow and ONNX model. Usage: | |||
| required when use graph based schema. Usage: | |||
| --output_nodes output_1:0 output_2:0 | |||
| --output OUTPUT Optional, specify path for converted script file | |||
| directory. Default output directory is `output` folder | |||
| in the current working directory. | |||
| --report REPORT Optional, specify report directory. Default is | |||
| converted script directory. | |||
| --project_path PROJECT_PATH | |||
| Optional, PyTorch scripts project path. If PyTorch | |||
| project is not in PYTHONPATH, please assign | |||
| `--project_path` when use graph based schema. Usage: | |||
| --project_path ~/script_file/ | |||
| ``` | |||
| ### PyTorch Model Scripts Migration | |||
| #### MindConverter Provides Two Modes for PyTorch: | |||
| #### MindConverter Provides AST for PyTorch: | |||
| 1. **Abstract Syntax Tree (AST) based conversion**: Use the argument `--in_file` will enable the AST mode. | |||
| 2. **Computational Graph based conversion**: Use `--model_file` and `--shape` arguments will enable the Graph mode. | |||
| **Abstract Syntax Tree (AST) based conversion**: Use the argument `--in_file` will enable the AST mode. | |||
| > The AST mode will be enabled, if both `--in_file` and `--model_file` are specified. | |||
| For the Graph mode, `--shape` is mandatory. | |||
| `--output` and `--report` is optional. MindConverter creates an `output` folder under the current working directory, and outputs generated scripts to it. | |||
| For the AST mode, `--shape` is ignored. | |||
| `--output` and `--report` is optional. MindConverter creates an `output` folder under the current working directory, and outputs generated scripts, converted checkpoint file, weight map file and conversion reports to it. | |||
| Please note that your original PyTorch project is included in the module search path (PYTHONPATH). Use the python interpreter and test your module can be successfully loaded by `import` command. Use `--project_path` instead if your project is not in the PYTHONPATH to ensure MindConverter can load it. | |||
| > Assume the project is located at `/home/user/project/model_training`, users can use this command to add the project to `PYTHONPATH` : `export PYTHONPATH=/home/user/project/model_training:$PYTHONPATH` | |||
| > MindConverter needs the original PyTorch scripts because of the reverse serialization. | |||
| PyTorch(.pth) conversion only supports one input and one output model, it is recommended to convert multi-input or multi-output PyTorch script using ONNX conversion after converting PyTorch script to ONNX file. | |||
| > While computational graph based conversion is required, it is recommended to use ONNX file after converting PyTorch model scripts to ONNX file, and the tutorial is [PyTorch instruction](https://pytorch.org/docs/stable/onnx.html). | |||
| ### TensorFlow Model Scripts Migration | |||
| @@ -114,12 +97,16 @@ PyTorch(.pth) conversion only supports one input and one output model, it is rec | |||
| > AST mode is not supported for TensorFlow, only computational graph based mode is available. | |||
| If both `output` and `report` are not set, mindConverter creates an `output` folder under the current working directory, and outputs generated scripts, converted checkpoint file, weight map file and conversion reports to it. | |||
| ### ONNX Model File Migration | |||
| **MindConverter provides computational graph based conversion for ONNX**: Transformation will be done given `--model_file`, `--shape`, `--input_nodes` and `--output_nodes`. | |||
| > AST mode is not supported for ONNX, only computational graph based mode is available. | |||
| If both `output` and `report` are not set, mindConverter creates an `output` folder under the current working directory, and outputs generated scripts, converted checkpoint file, weight map file and conversion reports to it. | |||
| ## Scenario | |||
| MindConverter provides two modes for different migration demands. | |||
| @@ -133,7 +120,7 @@ For the second demand, the Graph mode is recommended. As the computational graph | |||
| Some typical image classification networks have been tested for the Graph mode. Note that: | |||
| > 1. The Dropout operator will be lost after conversion because the inference mode is used to load the PyTorch or TensorFlow model. Manually re-implement is necessary. | |||
| > 1. The Dropout operator will be lost after conversion because the inference mode is used to load the ONNX or TensorFlow model. Manually re-implement is necessary. | |||
| > 2. The Graph-based mode will be continuously developed and optimized with further updates. | |||
| [Supported models list (Models in below table have been tested based on PyTorch 1.5.0 and TensorFlow 1.15.0, X86 Ubuntu released version)](./docs/supported_model_list.md). | |||
| @@ -174,18 +161,23 @@ For non-transformed operators, suggestions are provided in the report. For insta | |||
| ### Graph-Based Conversion | |||
| #### PyTorch Model Scripts Conversion | |||
| #### TensorFlow Model Scripts Conversion | |||
| To use TensorFlow model script migration, users need to export TensorFlow model to Pb format first, and obtain the model input node and output node name. For exporting pb model, please refer to [TensorFlow Pb model exporting](#tensorflow-pb-model-exporting). | |||
| Assume the PyTorch model (.pth file) is located at `/home/user/model.pth`, with input shape (1, 3, 224, 224) and the original PyTorch script is at `/home/user/project/model_training`. Output the transformed MindSpore script, MindSpore checkpoint file and weight map file to `/home/user/output`, with the conversion report to `/home/user/output/report`. Use the following command: | |||
| Suppose the input node name is `input_1:0`, output node name is `predictions/Softmax:0`, the input shape of model is `1,224,224,3` and the original TensorFlow model is at `/home/user/xxx/frozen_model.pb`. Output the transformed MindSpore script and MindSpore checkpoint file to `/home/user/output`, with the conversion report and weight map file to `/home/user/output/report`. Use the following command: | |||
| ```bash | |||
| mindconverter --model_file /home/user/model.pth --shape 1,3,224,224 \ | |||
| mindconverter --model_file /home/user/xxx/frozen_model.pb --shape 1,224,224,3 \ | |||
| --input_nodes input_1:0 \ | |||
| --output_nodes predictions/Softmax:0 \ | |||
| --output /home/user/output \ | |||
| --report /home/user/output/report \ | |||
| --project_path /home/user/project/model_training | |||
| --report /home/user/output/report | |||
| ``` | |||
| The Graph mode has the same conversion report as the AST mode. However, the line number and column number refer to the transformed scripts since no original scripts are used in the process. | |||
| After executed, MindSpore script, MindSpore checkpoint file, weight map file and report file can be found in corresponding directory. | |||
| Since the graph based scheme is a generative method, the original TensorFlow script is not referenced in the conversion process. Therefore, the code line and column numbers involved in the generated conversion report refer to the generated script. | |||
| In addition, input and output Tensor shape of unconverted operators shows explicitly (`input_shape` and `output_shape`) as comments in converted scripts to help further manual modifications. Here is an example of the `Reshape` operator (Not supported in current version): | |||
| @@ -265,26 +257,6 @@ Here is an example of the weight map: | |||
| Weight information in MindSpore (`converted_weight`) and that in source framework(`source_weight`) are saved in weight map separately. | |||
| #### TensorFlow Model Scripts Conversion | |||
| To use TensorFlow model script migration, users need to export TensorFlow model to Pb format first, and obtain the model input node and output node name. For exporting pb model, please refer to [TensorFlow Pb model exporting](#tensorflow-pb-model-exporting). | |||
| Suppose the model is saved to `/home/user/xxx/frozen_model.pb`, corresponding input node name is `input_1:0`, output node name is `predictions/Softmax:0`, the input shape of model is `1,224,224,3`, the following command can be used to generate the script: | |||
| ```bash | |||
| mindconverter --model_file /home/user/xxx/frozen_model.pb --shape 1,224,224,3 \ | |||
| --input_nodes input_1:0 \ | |||
| --output_nodes predictions/Softmax:0 \ | |||
| --output /home/user/output \ | |||
| --report /home/user/output/report | |||
| ``` | |||
| After executed, MindSpore script, MindSpore checkpoint file, weight map file and report file can be found in corresponding directory. | |||
| Since the graph based scheme is a generative method, the original TensorFlow script is not referenced in the conversion process. Therefore, the code line and column numbers involved in the generated conversion report refer to the generated script. | |||
| In addition, for operators that are not converted successfully, the input and output shape of tensor of the node will be identified in the code by `input_shape` and `output_shape`. For example, please refer to the example in **PyTorch Model Scripts Conversion** section. | |||
| #### ONNX Model File Conversion | |||
| To use ONNX model file migration, user needs to obtain the model input node and output node name from ONNX model. To get input node and output node name, [Netron](https://github.com/lutzroeder/netron) is recommended. | |||
| @@ -303,14 +275,15 @@ After executed, MindSpore script, MindSpore checkpoint file, weight map file and | |||
| Since the graph based scheme is a generative method, the original ONNX model is not referenced in the conversion process. Therefore, the code line and column numbers involved in the generated conversion report refer to the generated script. | |||
| In addition, for operators that are not converted successfully, the input and output shape of tensor of the node will be identified in the code by `input_shape` and `output_shape`. For example, please refer to the example in **PyTorch Model Scripts Conversion** section. | |||
| In addition, for operators that are not converted successfully, the input and output shape of tensor of the node will be identified in the code by `input_shape` and `output_shape`. For example, please refer to the example in **TensorFlow Model Scripts Conversion** section. | |||
| ## Caution | |||
| 1. PyTorch, TensorFlow are not an explicitly stated dependency libraries in MindInsight. The Graph conversion requires the consistent PyTorch or TensorFlow version as the model is trained. (For MindConverter, PyTorch 1.5.0 is supported while PyTorch 1.4.x is unsupported; PyTorch 1.6.x and PyTorch 1.7.x are untested.). | |||
| 2. This script conversion tool relies on operators which supported by MindConverter and MindSpore. Unsupported operators may not be successfully mapped to MindSpore operators. You can manually edit, or implement the mapping based on MindConverter, and contribute to our MindInsight repository. We appreciate your support for the MindSpore community. | |||
| 1. TensorFlow are not an explicitly stated dependency libraries in MindInsight. The Graph conversion requires the consistent TensorFlow version as the model is trained. | |||
| 2. This script conversion tool relies on operators which supported by ONNX and MindSpore. Unsupported operators may not be successfully mapped to MindSpore operators. You can manually edit, or implement the mapping based on MindConverter, and contribute to our MindInsight repository. We appreciate your support for the MindSpore community. | |||
| 3. MindConverter converts dynamic input shape to constant one based on `--shape` while using graph based scheme, as a result, it is required that inputs shape used to retrain or inference in MindSpore are the same as that used to convert using MindConverter. If inputs shape has changed, rerunning MindConverter with new `--shape` or fixing shape related parameters in old script manually is necessary. | |||
| 4. MindSpore script, MindSpore checkpoint file and weight map file are saved in the same file folder path. | |||
| 4. MindSpore script and MindSpore checkpoint file are saved in the same file folder path, while report file and weight map are saved in the same one. | |||
| 5. The security and consistency of the model file should be guaranteed by the user. | |||
| ## Unsupported situation of AST mode | |||
| @@ -345,7 +318,7 @@ class ConvBNReLU(nn.Sequential): | |||
| ## Requirements | |||
| For users using MindConverter, in addition to install the TensorFlow or PyTorch that can satisfy the model loading, inference and training requirements, users also need to pip install the following third party package (tf2onnx is not required for users that convert PyTorch model definition script to MindSpore): | |||
| For users using MindConverter, in addition to install the TensorFlow that can satisfy the model loading, inference and training requirements, users also need to pip install the following third party package (tf2onnx is not required for users that convert ONNX model definition file to MindSpore): | |||
| ```text | |||
| onnx>=1.8.0 | |||
| @@ -359,16 +332,13 @@ For some models, if the onnx or tf2onnx error message appears during the convers | |||
| ## Frequently asked questions | |||
| Q1. `terminate called after throwing an instance of 'std::system_error', what(): Resource temporarily unavailable, Aborted (core dumped)`: | |||
| > Answer: This problem is caused by TensorFlow. First step of conversion process is loading TensorFlow model into memory using TensorFlow module, and TensorFlow starts to apply for needed resource. When required resource is unavailable, such as exceeding max process number of Linux system limit, etc., TensorFlow will raise an error from its C/C++ layer. For more detail, please refer to TensorFlow official repository. There are some known issue for reference only: | |||
| > Answer: This problem is caused by TsorFlow. First step of conversion process is loading TensorFlow model into memory using TensorFlow module, and TensorFlow starts to apply for needed resource. When required resource is unavailable, such as exceeding max process number of Linux system limit, etc., TensorFlow will raise an error from its C/C++ layer. For more detail, please refer to TensorFlow official repository. There are some known issue for reference only: | |||
| [TF ISSUE 14885](https://github.com/tensorflow/tensorflow/issues/14885), [TF ISSUE 37449](https://github.com/tensorflow/tensorflow/issues/37449) | |||
| Q2. Can MindConverter run on ARM platform? | |||
| > Answer: MindConverter supports both x86 and ARM platform. Please ensure all required dependencies and environments installed in the ARM platform. | |||
| Q3. Why did I get message of `Error detail: [NodeInputMissing] ...` when converting PyTorch model? | |||
| > Answer: For PyTorch model, if operations in `torch.nn.functional.xxx`, `torch.xxx`, `torch.Tensor.xxx` were used, node parsing could be failed. It's better to replace those operations with `torch.nn.xxx`. | |||
| Q4. Why does the conversion process take a lot of time (more than 10 minutes), but the model is not so large? | |||
| Q3. Why does the conversion process take a lot of time (more than 10 minutes), but the model is not so large? | |||
| > Answer: When converting, MindConverter needs to use protobuf to deserialize the model file. Please make sure that the protobuf installed in Python environment is implemented by C++ backend. The validation method is as follows. If the output is "python", you need to install Python protobuf implemented by C++ (download the protobuf source code, enter the "python" subdirectory in the source code, and use `python setup.py install --cpp_implementation` to install). If the output is "cpp" and the conversion process still takes a long time, please add environment variable `export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp` before conversion. | |||
| ```python | |||
| @@ -15,7 +15,6 @@ | |||
| - [使用示例](#使用示例) | |||
| - [基于AST的脚本转换示例](#基于ast的脚本转换示例) | |||
| - [基于图结构的脚本生成示例](#基于图结构的脚本生成示例) | |||
| - [PyTorch模型脚本生成示例](#pytorch模型脚本生成示例) | |||
| - [TensorFlow模型脚本生成示例](#tensorflow模型脚本生成示例) | |||
| - [ONNX模型文件生成示例](#onnx模型文件生成示例) | |||
| - [注意事项](#注意事项) | |||
| @@ -48,7 +47,6 @@ usage: mindconverter [-h] [--version] [--in_file IN_FILE] | |||
| [--input_nodes INPUT_NODES [INPUT_NODES ...]] | |||
| [--output_nodes OUTPUT_NODES [OUTPUT_NODES ...]] | |||
| [--output OUTPUT] [--report REPORT] | |||
| [--project_path PROJECT_PATH] | |||
| optional arguments: | |||
| -h, --help show this help message and exit | |||
| @@ -56,10 +54,10 @@ optional arguments: | |||
| --in_file IN_FILE Specify path for script file to use AST schema to do | |||
| script conversation. | |||
| --model_file MODEL_FILE | |||
| PyTorch(.pth), Tensorflow(.pb) or ONNX(.onnx) model | |||
| file path is expected to do script generation based on | |||
| graph schema. When `--in_file` and `--model_file` are | |||
| both provided, use AST schema as default. | |||
| Tensorflow(.pb) or ONNX(.onnx) model file path is | |||
| expected to do script generation based on graph | |||
| schema. When `--in_file` and `--model_file` are both | |||
| provided, use AST schema as default. | |||
| --shape SHAPE [SHAPE ...] | |||
| Optional, expected input tensor shape of | |||
| `--model_file`. It is required when use graph based | |||
| @@ -67,45 +65,30 @@ optional arguments: | |||
| with `--input_nodes`. Usage: --shape 1,512 1,512 | |||
| --input_nodes INPUT_NODES [INPUT_NODES ...] | |||
| Optional, input node(s) name of `--model_file`. It is | |||
| required when use TensorFlow and ONNX model. Both | |||
| order and number should be consistent with `--shape`. | |||
| Usage: --input_nodes input_1:0 input_2:0 | |||
| required when use graph based schema. Both order and | |||
| number should be consistent with `--shape`. Usage: | |||
| --input_nodes input_1:0 input_2:0 | |||
| --output_nodes OUTPUT_NODES [OUTPUT_NODES ...] | |||
| Optional, output node(s) name of `--model_file`. It is | |||
| required when use TensorFlow and ONNX model. Usage: | |||
| required when use graph based schema. Usage: | |||
| --output_nodes output_1:0 output_2:0 | |||
| --output OUTPUT Optional, specify path for converted script file | |||
| directory. Default output directory is `output` folder | |||
| in the current working directory. | |||
| --report REPORT Optional, specify report directory. Default is | |||
| converted script directory. | |||
| --project_path PROJECT_PATH | |||
| Optional, PyTorch scripts project path. If PyTorch | |||
| project is not in PYTHONPATH, please assign | |||
| `--project_path` when use graph based schema. Usage: | |||
| --project_path ~/script_file/ | |||
| ``` | |||
| ### PyTorch模型脚本迁移 | |||
| #### MindConverter提供两种PyTorch模型脚本迁移方案: | |||
| 1. **基于抽象语法树(Abstract syntax tree, AST)的脚本转换**:指定`--in_file`的值,将使用基于AST的脚本转换方案; | |||
| 2. **基于图结构的脚本生成**:指定`--model_file`与`--shape`将使用基于图结构的脚本生成方案。 | |||
| #### MindConverter仅提供使用AST进行PyTorch模型脚本迁移方案: | |||
| **基于抽象语法树(Abstract syntax tree, AST)的脚本转换**:指定`--in_file`的值,将使用基于AST的脚本转换方案; | |||
| > 若同时指定了`--in_file`,`--model_file`将默认使用AST方案进行脚本迁移。 | |||
| 当使用基于图结构的脚本生成方案时,要求必须指定`--shape`的值;当使用基于AST的脚本转换方案时,`--shape`会被忽略。 | |||
| 其中,`--output`与`--report`参数可省略。若省略,MindConverter将在当前工作目录(Working directory)下自动创建`output`目录,将生成的脚本、转换报告、权重文件、权重映射表输出至该目录。 | |||
| 另外,当使用基于图结构的脚本生成方案时,请确保原PyTorch项目已在Python包搜索路径中,可通过CLI进入Python交互式命令行,通过import的方式判断是否已满足;若未加入,可通过`--project_path` | |||
| 命令手动将项目路径传入,以确保MindConverter可引用到原PyTorch脚本。 | |||
| 其中,`--output`与`--report`参数可省略。若省略,MindConverter将在当前工作目录(Working directory)下自动创建`output`目录,将生成的脚本、转换报告输出至该目录。 | |||
| > 假设用户项目目录为`/home/user/project/model_training`,用户可通过如下命令手动将项目添加至包搜索路径中:`export PYTHONPATH=/home/user/project/model_training:$PYTHONPATH`; | |||
| > 此处MindConverter需要引用原PyTorch脚本,是因为PyTorch模型反向序列化过程中会引用原脚本。 | |||
| PyTorch(.pth)模型转换仅支持单输入、单输出的PyTorch模型,如需转换多输入、多输出模型,建议转换为ONNX之后,使用ONNX进行转换。 | |||
| > 若需要基于图模式进行PyTorch模型脚本迁移,建议将PyTorch模型转换为ONNX之后,使用ONNX文件进行模型模型脚本迁移,详情见 [PyTorch使用说明](https://pytorch.org/docs/stable/onnx.html) 。 | |||
| ### TensorFlow模型脚本迁移 | |||
| @@ -113,12 +96,16 @@ PyTorch(.pth)模型转换仅支持单输入、单输出的PyTorch模型,如需 | |||
| > AST方案不支持TensorFlow模型脚本迁移,TensorFlow脚本迁移仅支持基于图结构的方案。 | |||
| 若省略`--output`与`--report`参数,MindConverter将在当前工作目录(Working directory)下自动创建`output`目录,将生成的脚本、转换报告、权重文件、权重映射表输出至该目录。 | |||
| ### ONNX模型文件迁移 | |||
| **MindConverter提供基于图结构的脚本生成方案**:指定`--model_file`、`--shape`、`--input_nodes`、`--output_nodes`进行脚本迁移。 | |||
| > AST方案不支持ONNX模型文件迁移,ONNX文件迁移仅支持基于图结构的方案。 | |||
| 若省略`--output`与`--report`参数,MindConverter将在当前工作目录(Working directory)下自动创建`output`目录,将生成的脚本、转换报告、权重文件、权重映射表输出至该目录。 | |||
| ## 使用场景 | |||
| MindConverter提供两种技术方案,以应对不同脚本迁移场景: | |||
| @@ -132,7 +119,7 @@ MindConverter提供两种技术方案,以应对不同脚本迁移场景: | |||
| 目前已基于典型图像分类网络对图结构的脚本转换方案进行测试。 | |||
| > 1. 基于图结构的脚本生成方案,由于要加载PyTorch、TensorFlow模型,会导致转换后网络中Dropout算子丢失,需要用户手动补齐。 | |||
| > 1. 基于图结构的脚本生成方案,由于要以推理模式加载ONNX,TensorFlow模型,会导致转换后网络中Dropout算子丢失,需要用户手动补齐。 | |||
| > 2. 基于图结构的脚本生成方案持续优化中。 | |||
| [支持的模型列表(如下模型已基于x86 Ubuntu发行版,PyTorch 1.5.0以及TensorFlow 1.15.0测试通过)](./docs/supported_model_list_cn.md)。 | |||
| @@ -175,26 +162,27 @@ line x:y: [UnConvert] 'operator' didn't convert. ... | |||
| ### 基于图结构的脚本生成示例 | |||
| #### PyTorch模型脚本生成示例 | |||
| #### TensorFlow模型脚本生成示例 | |||
| 使用TensorFlow模型脚本迁移,需要先将TensorFlow模型导出为pb格式,并且获取模型输入节点、输出节点名称。TensorFlow pb模型导出可参考[TensorFlow Pb模型导出](#tensorflow-pb模型导出) | |||
| 。 | |||
| 若用户已将PyTorch模型保存为.pth格式,假设模型绝对路径为`/home/uer/model.pth`,该模型期望的输入shape为(1, 3, 224, 224) | |||
| ,原PyTorch脚本位于`/home/user/project/model_training`,希望将脚本、权重文件和权重映射表输出至`/home/user/output` | |||
| ,转换报告输出至`/home/user/output/report` | |||
| 。<br /> 则脚本生成命令为: | |||
| 假设输入节点名称为`input_1:0`、输出节点名称为`predictions/Softmax:0`,模型输入样本尺寸为`1,224,224,3`,模型绝对路径为`/home/user/xxx/frozen_model.pb`,希望将脚本、权重文件输出至`/home/user/output`,转换报告以及权重映射表输出至`/home/user/output/report`,则脚本生成命令为: | |||
| ```bash | |||
| mindconverter --model_file /home/user/model.pth --shape 1,3,224,224 \ | |||
| mindconverter --model_file /home/user/xxx/frozen_model.pb --shape 1,224,224,3 \ | |||
| --input_nodes input_1:0 \ | |||
| --output_nodes predictions/Softmax:0 \ | |||
| --output /home/user/output \ | |||
| --report /home/user/output/report \ | |||
| --project_path /home/user/project/model_training | |||
| --report /home/user/output/report | |||
| ``` | |||
| 执行该命令,MindSpore代码文件、权重文件、权重映射表和转换报告生成至相应目录。 | |||
| 基于图结构的脚本生成方案产生的转换报告格式与AST方案相同。然而,由于基于图结构方案属于生成式方法,转换过程中未参考原PyTorch脚本,因此生成的转换报告中涉及的代码行、列号均指生成后脚本。 | |||
| 由于基于图结构方案属于生成式方法,转换过程中未参考原TensorFlow脚本,因此生成的转换报告中涉及的代码行、列号均指生成后脚本。 | |||
| 另外对于未成功转换的算子,在代码中会相应的标识该节点输入、输出Tensor的shape(以`input_shape`, `output_shape` | |||
| 标识),便于用户手动修改。以Reshape算子为例(暂不支持Reshape),将生成如下代码: | |||
| 标识),便于用户手动修改。以Reshape算子为例,将生成如下代码: | |||
| ```python | |||
| class Classifier(nn.Cell): | |||
| @@ -274,27 +262,6 @@ class Classifier(nn.Cell): | |||
| 映射表中分别保存算子在MindSpore中的权重信息(`converted_weight`)和在原始框架中的权重信息(`source_weight`)。 | |||
| #### TensorFlow模型脚本生成示例 | |||
| 使用TensorFlow模型脚本迁移,需要先将TensorFlow模型导出为pb格式,并且获取模型输入节点、输出节点名称。TensorFlow pb模型导出可参考[TensorFlow Pb模型导出](#tensorflow-pb模型导出) | |||
| 。 | |||
| 假设输入节点名称为`input_1:0`、输出节点名称为`predictions/Softmax:0`,模型输入样本尺寸为`1,224,224,3`,则可使用如下命令进行脚本生成: | |||
| ```bash | |||
| mindconverter --model_file /home/user/xxx/frozen_model.pb --shape 1,224,224,3 \ | |||
| --input_nodes input_1:0 \ | |||
| --output_nodes predictions/Softmax:0 \ | |||
| --output /home/user/output \ | |||
| --report /home/user/output/report | |||
| ``` | |||
| 执行该命令,MindSpore代码文件、权重文件、权重映射表和转换报告生成至相应目录。 | |||
| 由于基于图结构方案属于生成式方法,转换过程中未参考原TensorFlow脚本,因此生成的转换报告中涉及的代码行、列号均指生成后脚本。 | |||
| 另外,对于未成功转换的算子,在代码中会相应的标识该节点输入、输出Tensor的shape(以`input_shape`、`output_shape`标识),便于用户手动修改,示例见**PyTorch模型脚本生成示例**。 | |||
| #### ONNX模型文件生成示例 | |||
| 使用ONNX模型文件迁移,需要先从.onnx文件中获取模型输入节点、输出节点名称。获取ONNX模输入、输出节点名称,可使用 [Netron](https://github.com/lutzroeder/netron) 工具查看。 | |||
| @@ -313,15 +280,15 @@ mindconverter --model_file /home/user/xxx/model.onnx --shape 1,3,224,224 \ | |||
| 由于基于图结构方案属于生成式方法,转换过程中未参考ONNX文件,因此生成的转换报告中涉及的代码行、列号均指生成后脚本。 | |||
| 另外,对于未成功转换的算子,在代码中会相应的标识该节点输入、输出Tensor的shape(以`input_shape`、`output_shape`标识),便于用户手动修改,示例见**PyTorch模型脚本生成示例**。 | |||
| 另外,对于未成功转换的算子,在代码中会相应的标识该节点输入、输出Tensor的shape(以`input_shape`、`output_shape`标识),便于用户手动修改,示例见**TensorFlow模型脚本生成示例**。 | |||
| ## 注意事项 | |||
| 1. PyTorch、TensorFlow不作为MindInsight明确声明的依赖库。若想使用基于图结构的脚本生成工具,需要用户手动安装与生成PyTorch模型版本一致的PyTorch库(MindConverter使用PyTorch | |||
| 1.5.0进行测试,不支持PyTorch 1.4.x; PyTorch 1.6.x、PyTorch 1.7.x未进行测试。),或TensorFlow。 | |||
| 2. 脚本转换工具本质上为算子驱动,对于MindConverter未维护的PyTorch或ONNX算子与MindSpore算子映射,将会出现相应的算子无法转换的问题,对于该类算子,用户可手动修改,或基于MindConverter实现映射关系,向MindInsight仓库贡献。 | |||
| 1. TensorFlow不作为MindInsight明确声明的依赖库。若想使用基于图结构的脚本生成工具,需要用户手动安装TensorFlow。 | |||
| 2. 脚本转换工具本质上为算子驱动,对于MindConverter未维护的ONNX算子与MindSpore算子映射,将会出现相应的算子无法转换的问题,对于该类算子,用户可手动修改,或基于MindConverter实现映射关系,向MindInsight仓库贡献。 | |||
| 3. 在使用基于计算图的迁移时,MindConverter会根据`--shape`参数将模型输入的批次大小(batch size)、句子长度(sequence length)、图片尺寸(image shape)等尺寸相关参数固定下来,用户需要保证基于MindSpore重训练、推理时输入shape与转换时一致;若需要调整输入尺寸,请重新指定`--shape`进行转换或修改转换后脚本中涉及张量尺寸变更操作相应的操作数。 | |||
| 4. 脚本文件、权重文件和权重映射表输出于同一个目录下。 | |||
| 4. 脚本文件和权重文件输出于同一个目录下,转换报告和权重映射表输出于同一个目录下。 | |||
| 5. 模型文件的安全性与一致性请用户自行保证。 | |||
| ## AST方案不支持场景 | |||
| @@ -357,8 +324,8 @@ class ConvBNReLU(nn.Sequential): | |||
| ## 三方库依赖 | |||
| 用户在使用MindConverter时,下列三方库未在MindInsight依赖列表(requirements.txt)中声明。用户除安装可满足模型加载、训练、推理的TensorFlow或PyTorch外,还需要安装(pip | |||
| install)如下依赖库(PyTorch模型脚本转MindSpore的用户无需安装tf2onnx): | |||
| 用户在使用MindConverter时,下列三方库未在MindInsight依赖列表(requirements.txt)中声明。用户除安装可满足模型加载、训练、推理的TensorFlow外,还需要安装(pip | |||
| install)如下依赖库(ONNX模型文件转MindSpore的用户无需安装tf2onnx): | |||
| ```text | |||
| onnx>=1.8.0 | |||
| @@ -379,11 +346,7 @@ Q2. MindConverter是否可以在ARM平台运行? | |||
| > 答:MindConverter同时支持X86、ARM平台,若在ARM平台运行需要用户自行安装模型所需的依赖包和运行环境。 | |||
| Q3. PyTorch模型转换时为什么提示`Error detail: [NodeInputMissing] ...`? | |||
| > 答:对于PyTorch模型,若网络中存在`torch.nn.functional.xxx`, `torch.xxx`, `torch.Tensor.xxx`层算子,可能存在节点解析失败的情况,需要用户手动替换为torch.nn层算子。 | |||
| Q4. 为什么使用MindConverter进行模型转换需要很长时间(超过十分钟),而模型并不大? | |||
| Q3. 为什么使用MindConverter进行模型转换需要很长时间(超过十分钟),而模型并不大? | |||
| > 答:MindConverter进行转换时,需要使用Protobuf对模型文件进行反序列化,请确保Python环境中安装的Protobuf采用C++后端实现,检查方法如下,若输出为python,则需要安装采用C++实现的Python Protobuf(下载Protobuf源码并进入源码中的python子目录,使用python setup.py install --cpp_implementation进行安装);若输出为cpp,转换过程仍耗时较长,请在转换前使用添加环境变量`export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp`。 | |||
| @@ -124,31 +124,6 @@ class OutputDirAction(argparse.Action): | |||
| setattr(namespace, self.dest, output) | |||
| class ProjectPathAction(argparse.Action): | |||
| """Project directory action class definition.""" | |||
| def __call__(self, parser_in, namespace, values, option_string=None): | |||
| """ | |||
| Inherited __call__ method from argparse.Action. | |||
| Args: | |||
| parser_in (ArgumentParser): Passed-in argument parser. | |||
| namespace (Namespace): Namespace object to hold arguments. | |||
| values (object): Argument values with type depending on argument definition. | |||
| option_string (str): Optional string for specific argument name. Default: None. | |||
| """ | |||
| ArgsCheck.check_repeated(namespace, self.dest, self.default, option_string, parser_in) | |||
| outfile_dir = FileDirAction.check_path(parser_in, values, option_string) | |||
| if not os.path.exists(outfile_dir): | |||
| parser_in.error(f'{option_string} {outfile_dir} not exists') | |||
| if not os.path.isdir(outfile_dir): | |||
| parser_in.error(f'{option_string} [{outfile_dir}] should be a directory.') | |||
| setattr(namespace, self.dest, outfile_dir) | |||
| class InFileAction(argparse.Action): | |||
| """Input File action class definition.""" | |||
| @@ -202,8 +177,8 @@ class ModelFileAction(argparse.Action): | |||
| frame_type = get_framework_type(outfile_dir) | |||
| if frame_type == FrameworkType.UNKNOWN.value: | |||
| parser_in.error(f'{option_string} {outfile_dir} should be an valid ' | |||
| f'TensorFlow pb or PyTorch pth model file') | |||
| parser_in.error(f'{option_string} {outfile_dir} should be ' | |||
| f'a valid TensorFlow(.pb) or an ONNX(.onnx) model file.') | |||
| setattr(namespace, self.dest, outfile_dir) | |||
| @@ -277,7 +252,6 @@ class NodeAction(argparse.Action): | |||
| namespace (Namespace): Namespace object to hold arguments. | |||
| values (list): Argument values with type depending on argument definition. | |||
| option_string (str): Optional string for specific argument name. Default: None. | |||
| """ | |||
| ArgsCheck.check_repeated(namespace, self.dest, self.default, option_string, parser_in) | |||
| @@ -326,7 +300,7 @@ parser.add_argument( | |||
| action=ModelFileAction, | |||
| required=False, | |||
| help=""" | |||
| PyTorch(.pth), Tensorflow(.pb) or ONNX(.onnx) model file path | |||
| Tensorflow(.pb) or ONNX(.onnx) model file path | |||
| is expected to do script generation based on graph schema. When | |||
| `--in_file` and `--model_file` are both provided, | |||
| use AST schema as default. | |||
| @@ -354,7 +328,7 @@ parser.add_argument( | |||
| required=False, | |||
| nargs="+", | |||
| help=""" | |||
| Optional, input node(s) name of `--model_file`. It is required when use TensorFlow and ONNX model. | |||
| Optional, input node(s) name of `--model_file`. It is required when use graph based schema. | |||
| Both order and number should be consistent with `--shape`. Usage: --input_nodes input_1:0 input_2:0 | |||
| """) | |||
| @@ -366,7 +340,7 @@ parser.add_argument( | |||
| required=False, | |||
| nargs="+", | |||
| help=""" | |||
| Optional, output node(s) name of `--model_file`. It is required when use TensorFlow and ONNX model. | |||
| Optional, output node(s) name of `--model_file`. It is required when use graph based schema. | |||
| Usage: --output_nodes output_1:0 output_2:0 | |||
| """) | |||
| @@ -391,19 +365,6 @@ parser.add_argument( | |||
| converted script directory. | |||
| """) | |||
| parser.add_argument( | |||
| '--project_path', | |||
| type=str, | |||
| action=ProjectPathAction, | |||
| required=False, | |||
| default=None, | |||
| help=""" | |||
| Optional, PyTorch scripts project path. If PyTorch | |||
| project is not in PYTHONPATH, please assign | |||
| `--project_path` when use graph based schema. | |||
| Usage: --project_path ~/script_file/ | |||
| """) | |||
| def cli_entry(): | |||
| """Entry point for mindconverter CLI.""" | |||
| @@ -425,23 +386,21 @@ def cli_entry(): | |||
| _run(args.in_file, args.model_file, | |||
| args.shape, | |||
| args.input_nodes, args.output_nodes, | |||
| args.output, args.report, | |||
| args.project_path) | |||
| args.output, args.report) | |||
| def _run(in_files, model_file, shape, input_nodes, output_nodes, out_dir, report, project_path): | |||
| def _run(in_files, model_file, shape, input_nodes, output_nodes, out_dir, report): | |||
| """ | |||
| Run converter command. | |||
| Args: | |||
| in_files (str): The file path or directory to convert. | |||
| model_file(str): The pytorch .pth to convert on graph based schema. | |||
| model_file(str): The model to convert on graph based schema. | |||
| shape(list): The input tensor shape of module_file. | |||
| input_nodes(str): The input node(s) name of Tensorflow model, split by ','. | |||
| output_nodes(str): The output node(s) name of Tensorflow model, split by ','. | |||
| input_nodes(str): The input node(s) name of model. | |||
| output_nodes(str): The output node(s) name of model. | |||
| out_dir (str): The output directory to save converted file. | |||
| report (str): The report file path. | |||
| project_path(str): Pytorch scripts project path. | |||
| """ | |||
| if in_files: | |||
| files_config = { | |||
| @@ -470,10 +429,6 @@ def _run(in_files, model_file, shape, input_nodes, output_nodes, out_dir, report | |||
| 'outfile_dir': out_dir, | |||
| 'report_dir': report if report else out_dir | |||
| } | |||
| if project_path: | |||
| paths = sys.path | |||
| if project_path not in paths: | |||
| sys.path.append(project_path) | |||
| main_graph_base_converter(file_config) | |||
| log_console.info("MindConverter: conversion is completed.") | |||
| @@ -6,34 +6,34 @@ | |||
| | 模型 | PyTorch脚本 | TensorFlow脚本 | 备注 | PyTorch权重迁移 | TensorFlow权重迁移 | | |||
| | :----: | :-----: | :----: | :----: | :----: | :----: | | |||
| | ResNet18 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | 暂未测试 | | 已测试 | / | | |||
| | ResNet34 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | 暂未测试 | | 已测试 | / | | |||
| | ResNet18 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | / | | 已测试 | / | | |||
| | ResNet34 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | / | | 已测试 | / | | |||
| | ResNet50 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/resnet.py) | | 已测试 | 已测试 | | |||
| | ResNet50V2 | 暂未测试 | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/resnet_v2.py) | | / | 已测试 | | |||
| | ResNet50V2 | / | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/resnet_v2.py) | | / | 已测试 | | |||
| | ResNet101 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/resnet.py) | | 未测试 | 已测试 | | |||
| | ResNet101V2 | 暂未测试 | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/resnet_v2.py) | | / | 已测试 | | |||
| | ResNet101V2 | / | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/resnet_v2.py) | | / | 已测试 | | |||
| | ResNet152 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/resnet.py) | | 已测试 | 已测试 | | |||
| | ResNet152V2 | 暂未测试 | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/resnet_v2.py) | | / | 已测试 | | |||
| | Wide ResNet50 2 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | 暂未测试 | | 已测试 | / | | |||
| | Wide ResNet101 2 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | 暂未测试 | | 已测试 | / | | |||
| | VGG11/11BN | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/vgg.py) | 暂未测试 | | 已测试 | / | | |||
| | VGG13/13BN | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/vgg.py) | 暂未测试 | | 已测试 | / | | |||
| | ResNet152V2 | / | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/resnet_v2.py) | | / | 已测试 | | |||
| | Wide ResNet50 2 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | / | | 已测试 | / | | |||
| | Wide ResNet101 2 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/resnet.py) | / | | 已测试 | / | | |||
| | VGG11/11BN | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/vgg.py) | / | | 已测试 | / | | |||
| | VGG13/13BN | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/vgg.py) | / | | 已测试 | / | | |||
| | VGG16 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/vgg.py) | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/vgg16.py) | | 已测试 | 已测试 | | |||
| | VGG16BN | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/vgg.py) | 暂未测试 | | 已测试 | / | | |||
| | VGG16BN | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/vgg.py) | / | | 已测试 | / | | |||
| | VGG19 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/vgg.py) | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/vgg19.py) | | 已测试 | 已测试 | | |||
| | VGG19BN | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/vgg.py) | 暂未测试 | | 已测试 | / | | |||
| | AlexNet | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/alexnet.py) | 暂未测试 | | 已测试 | / | | |||
| | GoogLeNet | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/googlenet.py) | 暂未测试 | | 已测试 | / | | |||
| | Xception | 暂未测试 | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/xception.py) | | / | 已测试 | | |||
| | VGG19BN | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/vgg.py) | / | | 已测试 | / | | |||
| | AlexNet | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/alexnet.py) | / | | 已测试 | / | | |||
| | GoogLeNet | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/googlenet.py) | / | | 已测试 | / | | |||
| | Xception | / | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/xception.py) | | / | 已测试 | | |||
| | InceptionV3 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/inception.py) | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/inception_v3.py) | | 已测试 | 已测试 | | |||
| | InceptionResNetV2 | 暂未测试 | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/inception_resnet_v2.py) | | / | 已测试 | | |||
| | MobileNetV1 | 暂未测试 | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/mobilenet.py) | | / | 已测试 | | |||
| | InceptionResNetV2 | / | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/inception_resnet_v2.py) | | / | 已测试 | | |||
| | MobileNetV1 | / | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/mobilenet.py) | | / | 已测试 | | |||
| | MobileNetV2 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/mobilenet.py) | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/mobilenet_v2.py) | | 已测试 | 已测试 | | |||
| | MNASNet | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/mnasnet.py) | 暂未测试 | | mnasnet0_5:已测试 mnasnet0_75:未测试 mnasnet1_0:已测试 mnasnet1_3:未测试 | / | | |||
| | SqueezeNet | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/squeezenet.py) | 暂未测试 | | 已测试 | / | | |||
| | MNASNet | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/mnasnet.py) | / | | mnasnet0_5:已测试 mnasnet0_75:未测试 mnasnet1_0:已测试 mnasnet1_3:未测试 | / | | |||
| | SqueezeNet | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/squeezenet.py) | / | | 已测试 | / | | |||
| | DenseNet121/169/201 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/densenet.py) | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/densenet.py) | | 已测试 | 已测试 | | |||
| | DenseNet161 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/densenet.py) | 暂未测试 | | 已测试 | / | | |||
| | NASNetMobile/Large | 暂未测试 | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/nasnet.py) | | / | 已测试 | | |||
| | DenseNet161 | [脚本链接](https://github.com/pytorch/vision/blob/v0.5.0/torchvision/models/densenet.py) | / | | 已测试 | / | | |||
| | NASNetMobile/Large | / | [脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/nasnet.py) | | / | 已测试 | | |||
| | EfficientNetB0~B7 | [脚本链接](https://github.com/lukemelas/EfficientNet-PyTorch) | [TF1.15<br />脚本链接](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet) <br />[TF2.3<br />脚本链接](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/efficientnet.py) | | 已测试 | 已测试(TF1.15) 已测试(TF2.3)| | |||
| | Unet | [脚本链接](https://github.com/milesial/Pytorch-UNet) | [脚本链接](https://github.com/zhixuhao/unet) | 由于算子`mindspore.ops.ResizeBilinear`在GPU上暂未实现,所以当运行在GPU设备上时,算子`mindspore.ops.ResizeBilinear`需要被替换为算子`mindspore.ops.ResizeNearestNeighbor` | 已测试 | 已测试 | | |||
| | Bert | [脚本链接](https://huggingface.co/bert-base-uncased) | [脚本链接](https://github.com/google-research/bert) | | 已测试 | 已测试 | | |||
| @@ -13,7 +13,7 @@ | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """Graph based scripts converter definition.""" | |||
| __all__ = ["graph_based_converter_pytorch_to_ms", "graph_based_converter_tf_to_ms"] | |||
| __all__ = ["graph_based_converter_onnx_to_ms", "graph_based_converter_tf_to_ms"] | |||
| from mindinsight.mindconverter.graph_based_converter.framework import graph_based_converter_pytorch_to_ms | |||
| from mindinsight.mindconverter.graph_based_converter.framework import graph_based_converter_onnx_to_ms | |||
| from mindinsight.mindconverter.graph_based_converter.framework import graph_based_converter_tf_to_ms | |||
| @@ -23,10 +23,9 @@ from typing import List, Tuple, Mapping | |||
| import numpy as np | |||
| from mindinsight.mindconverter.common.exceptions import ScriptGenerationError, ReportGenerationError, \ | |||
| UnknownModelError, CheckPointGenerationError, WeightMapGenerationError | |||
| from mindinsight.mindconverter.common.log import logger as log | |||
| from mindinsight.mindconverter.graph_based_converter.constant import SEPARATOR_IN_ONNX_OP, BINARY_HEADER_PYTORCH_BITS, \ | |||
| FrameworkType, BINARY_HEADER_PYTORCH_FILE, TENSORFLOW_MODEL_SUFFIX, THIRD_PART_VERSION | |||
| CheckPointGenerationError, WeightMapGenerationError | |||
| from mindinsight.mindconverter.graph_based_converter.constant import SEPARATOR_IN_ONNX_OP, FrameworkType, \ | |||
| TENSORFLOW_MODEL_SUFFIX, THIRD_PART_VERSION, ONNX_MODEL_SUFFIX, DTYPE_MAP | |||
| def is_converted(operation: str): | |||
| @@ -73,7 +72,7 @@ def check_dependency_integrity(*packages): | |||
| def build_feed_dict(onnx_model, input_nodes: dict): | |||
| """Build feed dict for onnxruntime.""" | |||
| dtype_mapping = getattr(import_module("tf2onnx.utils"), "ONNX_TO_NUMPY_DTYPE") | |||
| dtype_mapping = DTYPE_MAP | |||
| input_nodes_types = { | |||
| node.name: dtype_mapping[node.type.tensor_type.elem_type] | |||
| for node in onnx_model.graph.input | |||
| @@ -170,7 +169,7 @@ def save_code_file_and_report(model_name: str, code_lines: Mapping[str, Tuple], | |||
| except TypeError as error: | |||
| raise CheckPointGenerationError(str(error)) | |||
| weight_map_path = os.path.realpath(os.path.join(out_folder, f"weight_map_of_{model_name}.json")) | |||
| weight_map_path = os.path.realpath(os.path.join(report_folder, f"weight_map_of_{model_name}.json")) | |||
| try: | |||
| if os.path.exists(weight_map_path): | |||
| raise WeightMapGenerationError("Weight map file with the same name already exists.") | |||
| @@ -248,22 +247,14 @@ def convert_bytes_string_to_string(bytes_str): | |||
| def get_framework_type(model_path): | |||
| """Get framework type.""" | |||
| if model_path.endswith('.onnx'): | |||
| return FrameworkType.PYTORCH.value | |||
| try: | |||
| with open(model_path, 'rb') as f: | |||
| if f.read(BINARY_HEADER_PYTORCH_BITS) == BINARY_HEADER_PYTORCH_FILE: | |||
| framework_type = FrameworkType.PYTORCH.value | |||
| elif os.path.basename(model_path).split(".")[-1].lower() == TENSORFLOW_MODEL_SUFFIX: | |||
| framework_type = FrameworkType.TENSORFLOW.value | |||
| else: | |||
| framework_type = FrameworkType.UNKNOWN.value | |||
| except IOError: | |||
| error_msg = "Get UNSUPPORTED model." | |||
| error = UnknownModelError(error_msg) | |||
| log.error(str(error)) | |||
| raise error | |||
| model_suffix = os.path.basename(model_path).split(".")[-1].lower() | |||
| if model_suffix == ONNX_MODEL_SUFFIX: | |||
| framework_type = FrameworkType.ONNX.value | |||
| elif model_suffix == TENSORFLOW_MODEL_SUFFIX: | |||
| framework_type = FrameworkType.TENSORFLOW.value | |||
| else: | |||
| framework_type = FrameworkType.UNKNOWN.value | |||
| return framework_type | |||
| @@ -47,7 +47,6 @@ ONNXRUNTIME_MIN_VER = "1.5.2" | |||
| ONNXOPTIMIZER_MIN_VER = "0.1.2" | |||
| ONNXOPTIMIZER_MAX_VER = "0.1.2" | |||
| TORCH_MIN_VER = "1.5.0" | |||
| DTYPE_MAP = { | |||
| 1: np.float32, | |||
| @@ -111,8 +110,7 @@ class ExchangeMessageKeywords(Enum): | |||
| GROUP_INPUTS = "group_inputs" | |||
| BINARY_HEADER_PYTORCH_FILE = \ | |||
| b'\x80\x02\x8a\nl\xfc\x9cF\xf9 j\xa8P\x19.\x80\x02M\xe9\x03.\x80\x02}q\x00(X\x10\x00\x00\x00' | |||
| ONNX_MODEL_SUFFIX = "onnx" | |||
| TENSORFLOW_MODEL_SUFFIX = "pb" | |||
| BINARY_HEADER_PYTORCH_BITS = 32 | |||
| @@ -128,7 +126,6 @@ MIN_SCOPE_LENGTH = 2 | |||
| ONNX_OPSET_VERSION = 11 | |||
| MODEL_INPUT_NAME = 'input.1' | |||
| NO_CONVERTED_OPERATORS = [ | |||
| "onnx::Constant", | |||
| @@ -136,7 +133,6 @@ NO_CONVERTED_OPERATORS = [ | |||
| ] | |||
| THIRD_PART_VERSION = { | |||
| "torch": (TORCH_MIN_VER,), | |||
| "onnx": (ONNX_MIN_VER,), | |||
| "onnxruntime": (ONNXRUNTIME_MIN_VER,), | |||
| "onnxoptimizer": (ONNXOPTIMIZER_MIN_VER,), | |||
| @@ -161,7 +157,7 @@ class InputType(Enum): | |||
| @unique | |||
| class FrameworkType(Enum): | |||
| PYTORCH = 0 | |||
| ONNX = 0 | |||
| TENSORFLOW = 1 | |||
| UNKNOWN = 2 | |||
| @@ -181,6 +177,6 @@ def get_imported_module(): | |||
| """ | |||
| return f"import numpy as np{NEW_LINE}" \ | |||
| f"import mindspore{NEW_LINE}" \ | |||
| f"import mindspore.ops as P{NEW_LINE}" \ | |||
| f"from mindspore import nn{NEW_LINE}" \ | |||
| f"from mindspore import Tensor, Parameter{NEW_LINE}" \ | |||
| f"from mindspore.ops import operations as P{NEW_LINE * 3}" | |||
| f"from mindspore import Tensor, Parameter{NEW_LINE * 3}" | |||
| @@ -13,9 +13,7 @@ | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """Graph based scripts converter workflow.""" | |||
| import multiprocessing as mp | |||
| import os | |||
| import re | |||
| import sys | |||
| from typing import List | |||
| from importlib import import_module | |||
| @@ -26,7 +24,7 @@ from mindinsight.mindconverter.graph_based_converter.common.global_context impor | |||
| from mindinsight.mindconverter.graph_based_converter.common.utils import lib_version_satisfied, onnx_satisfied, \ | |||
| save_code_file_and_report, get_framework_type, check_dependency_integrity, get_third_part_lib_validation_error_info | |||
| from mindinsight.mindconverter.graph_based_converter.constant import FrameworkType, \ | |||
| ONNX_MIN_VER, TF2ONNX_MIN_VER, ONNXRUNTIME_MIN_VER, ONNXOPTIMIZER_MIN_VER, TORCH_MIN_VER | |||
| ONNX_MIN_VER, TF2ONNX_MIN_VER, ONNXRUNTIME_MIN_VER, ONNXOPTIMIZER_MIN_VER | |||
| from mindinsight.mindconverter.graph_based_converter.generator import batch_add_nodes | |||
| from mindinsight.mindconverter.graph_based_converter.mapper import ONNXToMindSporeMapper | |||
| from mindinsight.mindconverter.common.log import logger as log, logger_console as log_console | |||
| @@ -60,17 +58,7 @@ def _print_error(err): | |||
| log_console.error(str(err)) | |||
| def torch_version_satisfied(output_queue): | |||
| """Check Torch version whether is satisfied.""" | |||
| satisfied = False | |||
| pattern = r"\d+\.\d+\.\d+" | |||
| torch_version = re.findall(pattern, getattr(import_module('torch'), "__version__")) | |||
| if torch_version: | |||
| satisfied = lib_version_satisfied(torch_version[0], TORCH_MIN_VER) | |||
| output_queue.put(satisfied) | |||
| def torch_installation_validation(func): | |||
| def onnx_installation_validation(func): | |||
| """ | |||
| Validate args of func. | |||
| @@ -83,40 +71,16 @@ def torch_installation_validation(func): | |||
| def _f(graph_path: str, input_nodes: dict, output_nodes: List[str], | |||
| output_folder: str, report_folder: str = None): | |||
| # Check whether pytorch is installed. | |||
| error_info = None | |||
| torch_version_validation = False | |||
| if graph_path.endswith('.onnx'): | |||
| if not onnx_satisfied() or not check_common_dependency_integrity(): | |||
| error_info = f"{get_third_part_lib_validation_error_info(['onnx', 'onnxruntime', 'onnxoptimizer'])} " \ | |||
| f"are required when using graph based scripts converter." | |||
| else: | |||
| if not find_spec("torch") or not onnx_satisfied() or not check_common_dependency_integrity(): | |||
| error_info = \ | |||
| f"{get_third_part_lib_validation_error_info(['torch', 'onnx', 'onnxruntime', 'onnxoptimizer'])} " \ | |||
| f"are required when using graph based scripts converter, and PyTorch version must " \ | |||
| f"be consisted with model generation runtime." | |||
| if not error_info: | |||
| output_queue = mp.Queue() | |||
| process = mp.Process(target=torch_version_satisfied, args=(output_queue,)) | |||
| process.start() | |||
| torch_version_validation = output_queue.get() | |||
| process.join() | |||
| if error_info: | |||
| # Check whether onnx is installed. | |||
| error_info = f"{get_third_part_lib_validation_error_info(['onnx', 'onnxruntime', 'onnxoptimizer'])} " \ | |||
| f"are required when using graph based scripts converter or ONNX conversion." | |||
| if not onnx_satisfied() or not check_common_dependency_integrity(): | |||
| _print_error(RuntimeIntegrityError(error_info)) | |||
| sys.exit(0) | |||
| if (not torch_version_validation and not graph_path.endswith('.onnx')) or not onnx_lib_version_satisfied(): | |||
| lib_check_list = ['onnx', 'onnxruntime', 'onnxoptimizer'] | |||
| if not graph_path.endswith('.onnx'): | |||
| lib_check_list.insert(0, 'torch') | |||
| error = RuntimeIntegrityError( | |||
| f"{get_third_part_lib_validation_error_info(lib_check_list)} " | |||
| f"are required when using graph based scripts converter." | |||
| ) | |||
| _print_error(error) | |||
| if not onnx_lib_version_satisfied(): | |||
| _print_error(RuntimeIntegrityError(error_info)) | |||
| sys.exit(0) | |||
| func(graph_path=graph_path, | |||
| @@ -194,16 +158,16 @@ def _extract_model_name(model_path): | |||
| return model_name | |||
| @torch_installation_validation | |||
| @onnx_installation_validation | |||
| @GraphInitError.uniform_catcher() | |||
| @TreeCreationError.uniform_catcher() | |||
| @SourceFilesSaveError.uniform_catcher() | |||
| @GeneratorError.uniform_catcher() | |||
| def graph_based_converter_pytorch_to_ms(graph_path: str, | |||
| input_nodes: dict, output_nodes: List[str], | |||
| output_folder: str, report_folder: str = None): | |||
| def graph_based_converter_onnx_to_ms(graph_path: str, | |||
| input_nodes: dict, output_nodes: List[str], | |||
| output_folder: str, report_folder: str = None): | |||
| """ | |||
| PyTorch to MindSpore based on Graph. | |||
| ONNX to MindSpore based on Graph. | |||
| Args: | |||
| graph_path (str): Graph file path. | |||
| @@ -271,12 +235,8 @@ def main_graph_base_converter(file_config): | |||
| if not file_config.get("shape"): | |||
| raise ParamMissingError("Param missing, `--shape` is required when using graph mode.") | |||
| if graph_path.endswith("pth") and not file_config.get("input_nodes", []) and \ | |||
| file_config.get("shape") and len(file_config.get("shape", ())) == 1: | |||
| file_config['input_nodes'] = ["input.1"] | |||
| else: | |||
| check_params = ['input_nodes', 'output_nodes'] | |||
| check_params_exist(check_params, file_config) | |||
| check_params = ['input_nodes', 'output_nodes'] | |||
| check_params_exist(check_params, file_config) | |||
| if len(file_config['shape']) != len(file_config.get("input_nodes", [])): | |||
| raise BadParamError("`--shape` and `--input_nodes` must have the same length, " | |||
| @@ -286,19 +246,13 @@ def main_graph_base_converter(file_config): | |||
| for shape, node in zip(file_config['shape'], file_config['input_nodes']): | |||
| input_nodes[node] = shape | |||
| if frame_type == FrameworkType.PYTORCH.value: | |||
| if graph_path.endswith('.onnx'): | |||
| graph_based_converter_pytorch_to_ms(graph_path=graph_path, | |||
| input_nodes=input_nodes, | |||
| output_nodes=file_config['output_nodes'], | |||
| output_folder=file_config['outfile_dir'], | |||
| report_folder=file_config['report_dir']) | |||
| else: | |||
| graph_based_converter_pytorch_to_ms(graph_path=graph_path, | |||
| input_nodes=input_nodes, | |||
| output_nodes=[], | |||
| output_folder=file_config['outfile_dir'], | |||
| report_folder=file_config['report_dir']) | |||
| if frame_type == FrameworkType.ONNX.value: | |||
| graph_based_converter_onnx_to_ms(graph_path=graph_path, | |||
| input_nodes=input_nodes, | |||
| output_nodes=file_config['output_nodes'], | |||
| output_folder=file_config['outfile_dir'], | |||
| report_folder=file_config['report_dir']) | |||
| elif frame_type == FrameworkType.TENSORFLOW.value: | |||
| graph_based_converter_tf_to_ms(graph_path=graph_path, | |||
| input_nodes=input_nodes, | |||
| @@ -23,7 +23,7 @@ class MatMulMapper(ONNXToMindSporeMapper): | |||
| @staticmethod | |||
| def _operation_name_in_ms(*args, **kwargs): | |||
| return "nn.MatMul" | |||
| return "P.matmul" | |||
| @staticmethod | |||
| def _convert_params(**kwargs): | |||
| @@ -32,34 +32,37 @@ class MatMulMapper(ONNXToMindSporeMapper): | |||
| @staticmethod | |||
| def _convert_trained_weights(**kwargs): | |||
| weights = kwargs['weights'] | |||
| weight = MatMulMapper._find_val_by_index(0, weights) | |||
| onnx_name = MatMulMapper._find_onnx_name_by_index(0, weights) | |||
| return {'w': {'data': weight, 'type': WeightType.PARAMETER.value, 'onnx_name': onnx_name}} | |||
| if weights: | |||
| weight = MatMulMapper._find_val_by_index(0, weights) | |||
| onnx_name = MatMulMapper._find_onnx_name_by_index(0, weights) | |||
| return {'w': {'data': weight, 'type': WeightType.PARAMETER.value, 'onnx_name': onnx_name}} | |||
| return dict() | |||
| @staticmethod | |||
| def _generate_snippet_template(**kwargs): | |||
| template, exchange_msg, outputs_list, outputs_mapping = ONNXToMindSporeMapper._generate_snippet_template( | |||
| **kwargs) | |||
| op = kwargs.get("operation") | |||
| args = kwargs.get("converted_params") | |||
| weights = kwargs.get("weights") | |||
| trainable_params = kwargs.get('trainable_params', dict()) | |||
| if not op: | |||
| raise ValueError("Can not get MindSpore operation name.") | |||
| if not weights: | |||
| return template, exchange_msg, outputs_list, outputs_mapping | |||
| variable_slot = "var_0" | |||
| init_template = f"self.{{{variable_slot}}} = {op}({', '.join(['%s={%s}' % (p, p) for p in args])})" | |||
| # Note: adding weight shape to args is now deprecated due to conflict of partial weights share processing. | |||
| variable_slot_param_name = f"{variable_slot}/w" | |||
| init_tensor = f"self.{{{variable_slot}}}_w = {{{variable_slot_param_name}}}" | |||
| construct_template = f"opt_{{{variable_slot}}} = self.{{{variable_slot}}}" \ | |||
| f"({{{ExchangeMessageKeywords.VariableScope.value.INPUTS.value}}}," \ | |||
| f"self.{{{variable_slot}}}_w)" | |||
| w_location = MatMulMapper._find_location_by_index(0, weights) | |||
| init_tensor_list = list() | |||
| inputs_in_construct = [f"{{{ExchangeMessageKeywords.VariableScope.value.INPUTS.value}}}"] | |||
| if w_location != -1: | |||
| # Note: adding weight shape to args is now deprecated due to conflict of partial weights share processing. | |||
| variable_slot_param_name = f"{variable_slot}/w" | |||
| init_tensor_list.append(f"self.{{{variable_slot}}}_w = {{{variable_slot_param_name}}}") | |||
| inputs_in_construct.insert(w_location, f"self.{{{variable_slot}}}_w") | |||
| construct_template = f"opt_{{{variable_slot}}} = {op}({', '.join(inputs_in_construct)})" | |||
| template = { | |||
| variable_slot: { | |||
| TemplateKeywords.INIT.value: [init_template, init_tensor], | |||
| TemplateKeywords.INIT.value: init_tensor_list, | |||
| TemplateKeywords.CONSTRUCT.value: [construct_template] | |||
| } | |||
| } | |||
| @@ -72,12 +75,13 @@ class MatMulMapper(ONNXToMindSporeMapper): | |||
| ExchangeMessageKeywords.VariableScope.value.INPUTS.value: [], | |||
| ExchangeMessageKeywords.VariableScope.value.ARGS.value: args, | |||
| ExchangeMessageKeywords.VariableScope.value.WEIGHTS.value: weights, | |||
| ExchangeMessageKeywords.VariableScope.value.TRAINABLE_PARAMS.value: trainable_params, | |||
| ExchangeMessageKeywords.VariableScope.value.PARAMETERS_DECLARED.value: { | |||
| "w": "" | |||
| } | |||
| ExchangeMessageKeywords.VariableScope.value.TRAINABLE_PARAMS.value: trainable_params | |||
| } | |||
| } | |||
| if w_location != -1: | |||
| exchange_msg[variable_slot][ExchangeMessageKeywords.VariableScope.value.PARAMETERS_DECLARED.value] = { | |||
| "w": "" | |||
| } | |||
| outputs_list = [f"opt_{{{variable_slot}}}"] | |||
| outputs_mapping = ((0, 0),) | |||
| return template, exchange_msg, outputs_list, outputs_mapping | |||
| @@ -13,7 +13,7 @@ | |||
| "onnx::Concat": "mindinsight.mindconverter.graph_based_converter.mapper.impl.ops.concat_mapper.ConcatMapper", | |||
| "onnx::Clip": "mindinsight.mindconverter.graph_based_converter.mapper.impl.nn.relu_mapper.ReLUMapper", | |||
| "onnx::Transpose": "mindinsight.mindconverter.graph_based_converter.mapper.impl.ops.transpose_mapper.TransposeMapper", | |||
| "onnx::MatMul": "mindinsight.mindconverter.graph_based_converter.mapper.impl.nn.mat_mul_mapper.MatMulMapper", | |||
| "onnx::MatMul": "mindinsight.mindconverter.graph_based_converter.mapper.impl.ops.mat_mul_mapper.MatMulMapper", | |||
| "onnx::Softmax": "mindinsight.mindconverter.graph_based_converter.mapper.impl.nn.softmax_mapper.SoftmaxMapper", | |||
| "onnx::OneHot": "mindinsight.mindconverter.graph_based_converter.mapper.impl.nn.one_hot_mapper.OneHotMapper", | |||
| "onnx::Neg": "mindinsight.mindconverter.graph_based_converter.mapper.impl.ops.neg_mapper.NegMapper", | |||
| @@ -21,7 +21,6 @@ from mindinsight.mindconverter.common.log import logger as log | |||
| from mindinsight.mindconverter.graph_based_converter.third_party_graph.base import Graph | |||
| from mindinsight.mindconverter.graph_based_converter.third_party_graph.input_node import InputNode | |||
| from mindinsight.mindconverter.graph_based_converter.third_party_graph.onnx_graph_node import OnnxGraphNode | |||
| from mindinsight.mindconverter.graph_based_converter.third_party_graph.pytorch_graph_parser import PyTorchGraphParser | |||
| from mindinsight.mindconverter.graph_based_converter.third_party_graph.tf_graph_parser import TFGraphParser | |||
| from mindinsight.mindconverter.graph_based_converter.third_party_graph.onnx_utils import OnnxDataLoader, \ | |||
| NodeWeight, NodeOutputShape | |||
| @@ -206,11 +205,10 @@ class OnnxGraph(Graph): | |||
| onnx_model = TFGraphParser.parse(graph_path, | |||
| input_nodes=input_nodes, | |||
| output_nodes=output_nodes) | |||
| elif graph_path.endswith('.onnx'): | |||
| else: | |||
| onnx = import_module('onnx') | |||
| onnx_model = onnx.load(graph_path) | |||
| else: | |||
| onnx_model = PyTorchGraphParser.parse(graph_path, **kwargs) | |||
| onnx_inputs = [onnx_input.name for onnx_input in onnx_model.graph.input] | |||
| invalid_input_node_name = list() | |||
| @@ -27,7 +27,7 @@ from mindinsight.mindconverter.graph_based_converter.common.global_context impor | |||
| from mindinsight.mindconverter.graph_based_converter.third_party_graph.optimizer import OnnxSimplify | |||
| from mindinsight.mindconverter.graph_based_converter.constant import ONNX_TYPE_INT, ONNX_TYPE_INTS, ONNX_TYPE_STRING, \ | |||
| ONNX_TYPE_FLOATS, ONNX_TYPE_FLOAT, SCALAR_WITHOUT_SHAPE, DYNAMIC_SHAPE, UNKNOWN_DIM_VAL, DTYPE_MAP | |||
| ONNX_TYPE_FLOATS, ONNX_TYPE_FLOAT, SCALAR_WITHOUT_SHAPE, DYNAMIC_SHAPE, UNKNOWN_DIM_VAL | |||
| from mindinsight.mindconverter.common.exceptions import GraphInitError | |||
| @@ -382,11 +382,6 @@ class OnnxDataLoader: | |||
| def _get_outputs_using_onnxruntime(self, output_nodes_name): | |||
| """Get outputs using onnxruntime.""" | |||
| onnx_inputs = self.inferred_model.graph.input | |||
| dtype_dict = dict() | |||
| for onnx_input in onnx_inputs: | |||
| dtype_dict[onnx_input.name] = DTYPE_MAP[onnx_input.type.tensor_type.elem_type] | |||
| feed_dict = build_feed_dict(self.inferred_model, self.input_nodes) | |||
| outputs_infer = fetch_output_from_onnx_model(self.model, feed_dict, output_nodes_name) | |||
| @@ -1,136 +0,0 @@ | |||
| # Copyright 2020-2021 Huawei Technologies Co., Ltd.All Rights Reserved. | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """Third party graph parser.""" | |||
| import multiprocessing as mp | |||
| import os | |||
| from importlib import import_module | |||
| from mindinsight.mindconverter.common.log import logger as log | |||
| from mindinsight.mindconverter.graph_based_converter.third_party_graph.base import GraphParser | |||
| from mindinsight.mindconverter.common.exceptions import ModelLoadingError | |||
| class PyTorchGraphParser(GraphParser): | |||
| """Define pytorch graph parser.""" | |||
| @classmethod | |||
| @ModelLoadingError.check_except( | |||
| "Error occurs when loading model with given params, please check `--shape`, " | |||
| "`--input_nodes`, `--output_nodes`, `--model_file` or runtime environment integrity." | |||
| ) | |||
| def parse(cls, model_path: str, **kwargs): | |||
| """ | |||
| Parser pytorch graph. | |||
| Args: | |||
| model_path (str): Model file path. | |||
| Returns: | |||
| object, torch model. | |||
| """ | |||
| if not os.path.exists(model_path): | |||
| error = FileNotFoundError("`model_path` must be assigned with " | |||
| "an existed file path.") | |||
| log.error(str(error)) | |||
| raise error | |||
| try: | |||
| onnx_model_sim = cls._convert_pytorch_graph_to_onnx( | |||
| model_path, kwargs['input_nodes'], opset_version=11) | |||
| return onnx_model_sim | |||
| except ModuleNotFoundError: | |||
| error_msg = "Cannot find model scripts in system path, " \ | |||
| "set `--project_path` to the path of model scripts folder correctly." | |||
| error = ModuleNotFoundError(error_msg) | |||
| raise error | |||
| @staticmethod | |||
| def _convert_pytorch_graph_to_onnx(model_path, input_nodes, opset_version=None): | |||
| """ | |||
| Convert Pytorch model to ONNX model. | |||
| Args: | |||
| model_path (str): Path to the Pytorch model. | |||
| input_nodes (dict): Input nodes to generate onnx model. | |||
| opset_version (int): Op set version of onnx. | |||
| """ | |||
| output_queue = mp.Queue() | |||
| process = mp.Process(target=PyTorchGraphParser._pytorch_graph_to_proto, | |||
| args=(output_queue, model_path, input_nodes, opset_version)) | |||
| process.start() | |||
| proto = output_queue.get() | |||
| process.join() | |||
| onnx = import_module('onnx') | |||
| onnx_model = onnx.load_model_from_string(proto) | |||
| return onnx_model | |||
| @staticmethod | |||
| def _pytorch_graph_to_proto(output_queue, model_path, input_nodes, opset_version): | |||
| """ | |||
| Convert pytorch graph to pytorch proto. | |||
| Args: | |||
| output_queue (Queue): Output queue from multi-processing. | |||
| model_path (str): Path to the Pytorch model. | |||
| input_nodes (dict): Input nodes to generate onnx model. | |||
| opset_version (int): Op set version of onnx. | |||
| """ | |||
| try: | |||
| torch = import_module('torch') | |||
| has_cuda = torch.cuda.is_available() | |||
| dump_inputs = dict() | |||
| if has_cuda: | |||
| model = torch.load(f=model_path).cuda() | |||
| for node_name, node_shape in input_nodes.items(): | |||
| dump_inputs[node_name] = torch.randn(*node_shape, device='cuda') | |||
| else: | |||
| model = torch.load(f=model_path, map_location="cpu") | |||
| for node_name, node_shape in input_nodes.items(): | |||
| dump_inputs[node_name] = torch.randn(*node_shape, device='cpu') | |||
| if isinstance(model, torch.nn.DataParallel): | |||
| raise ValueError('torch.nn.DataParallel is not supported by ONNX exporter.') | |||
| torch_onnx = import_module('torch.onnx') | |||
| operator_export_types = getattr(torch_onnx, 'OperatorExportTypes') | |||
| utils = import_module('torch.onnx.utils') | |||
| model_to_graph = getattr(utils, '_model_to_graph') | |||
| symbolic_helper = import_module('torch.onnx.symbolic_helper') | |||
| default_onnx_opset_version = getattr(symbolic_helper, '_default_onnx_opset_version') | |||
| set_opset_version = getattr(symbolic_helper, '_set_opset_version') | |||
| set_operator_export_type = getattr(symbolic_helper, '_set_operator_export_type') | |||
| if not opset_version: | |||
| opset_version = default_onnx_opset_version | |||
| operator_export_type = operator_export_types.ONNX | |||
| set_opset_version(opset_version) | |||
| set_operator_export_type(operator_export_type) | |||
| graph, params_dict, _ = model_to_graph(model, args=tuple(dump_inputs.values()), | |||
| input_names=list(dump_inputs.keys()), _retain_param_name=True) | |||
| export_onnx = getattr(graph, '_export_onnx') | |||
| proto, _ = export_onnx( | |||
| params_dict, opset_version, dict(), False, | |||
| operator_export_type, True, False, dict(), | |||
| True, False) | |||
| output_queue.put(proto) | |||
| except ModelLoadingError.raise_from() as e: | |||
| output_queue.put(e) | |||
| @@ -0,0 +1,586 @@ | |||
| { | |||
| "cells": [ | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": { | |||
| "collapsed": true, | |||
| "pycharm": { | |||
| "name": "#%% md\n" | |||
| } | |||
| }, | |||
| "source": [ | |||
| "# 添加算子映射关系高级教程\n", | |||
| "`Linux` `Ascend` `GPU` `CPU` `模型迁移` `高级`\n", | |||
| "\n", | |||
| "[](https://gitee.com/mindspore/mindinsight/blob/master/mindinsight/mindconverter/tutorial/add_operator_mapper_advanced_tutorial.ipynb)" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "## 概述" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "在确定ONNX算子到MindSpore算子的映射关系时,会遇到两者之间不存在相似实现或者参数差异过大难以直接转换的算子的问题。本文将在[初级教程](https://gitee.com/mindspore/mindinsight/blob/master/mindinsight/mindconverter/tutorial/add_operator_mapper_base_tutorial.ipynb)的基础上,以该类算子映射关系为例,来描述添加算子映射关系文件的方法。" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "## 环境准备\n", | |||
| "\n", | |||
| "本案例需安装以下Python三方库:\n", | |||
| "```bash\n", | |||
| "pip install mindspore==1.2.0\n", | |||
| "pip install mindinsight==1.2.0\n", | |||
| "```" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "## 自定义添加算子映射脚本\n", | |||
| "\n", | |||
| "以`onnx::AveragePool`算子为例进行演示。\n", | |||
| "\n", | |||
| "分别查阅[ONNX算子API文档](https://github.com/onnx/onnx/blob/master/docs/Operators.md)和[MindSpore算子API文档](http://www.mindspore.cn/doc/api_python/zh-CN/master/index.html),\n", | |||
| "找到与ONNX算子`onnx::AveragePool`功能相同或相近的MindSpore算子`mindspore.nn.AvgPool2d`。\n", | |||
| "\n", | |||
| "|算子名|`onnx::AveragePool`|`mindspore.nn.AvgPool2d`|\n", | |||
| "|:----:|:----|:----|\n", | |||
| "|算法实现|`output_shape[i] = floor((input_shape[i]+pad_shape[i]-kernel_shape[i])/strides_shape[i])`<br>OR<br>`output_shape[i] = ceil((input_shape[i]+pad_shape[i]-kernel_shape[i])/strides_shape[i])` based on `ceil_mode`|`output_shape[i] = ceil((input_shape[i]-kernel_size[i]+1)/stride_shape[i])`<br>OR<br>`output_shape[i] = ceil(input_shape[i]/stride_shape[i])` based on `pad_mode`|\n", | |||
| "|参数|`auto_pad`: DEPRECATED<br>`ceil_mode`: optional<br>`count_include_pad`: optional<br>`kernel_shape`: optional<br>`pads`: optional<br>`strides`: optional|`kernel_size`: optional<br>`stride`: optional<br>`pad_mode`: optional<br>`data_format`: optional<br>|\n", | |||
| "|输入|`X`: required|`input`: required|\n", | |||
| "|输出|`Y`|`output`|\n", | |||
| "\n", | |||
| "<br>\n", | |||
| "依据双方算子中参数(Attributes/Parameters)和输入(Inputs)进行ONNX到MindSpore的算子映射。" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 1, | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%%\n" | |||
| } | |||
| }, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import math\n", | |||
| "\n", | |||
| "import numpy as np\n", | |||
| "\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.mapper.base import ONNXToMindSporeMapper\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.constant import ExchangeMessageKeywords, TemplateKeywords\n", | |||
| "\n", | |||
| "\n", | |||
| "class PoolMapper(ONNXToMindSporeMapper):\n", | |||
| " \"\"\"Pool mapper.\"\"\"\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _operation_name_in_ms(*args, **kwargs):\n", | |||
| " if kwargs['op_name'] == 'onnx::AveragePool':\n", | |||
| " op_name = 'nn.AvgPool{}d'\n", | |||
| " else:\n", | |||
| " op_name = 'nn.MaxPool{}d'\n", | |||
| " dim = len(kwargs['params']['strides'])\n", | |||
| " return op_name.format(dim)\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _convert_params(**kwargs):\n", | |||
| " params = kwargs['params']\n", | |||
| " transformed_params = dict()\n", | |||
| " transformed_params[\"kernel_size\"] = tuple(params['kernel_shape'])\n", | |||
| " transformed_params[\"stride\"] = tuple(params['strides'])\n", | |||
| "\n", | |||
| " return transformed_params\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _convert_trained_weights(**kwargs):\n", | |||
| " return dict()\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _get_ms_opt_shape(**kwargs):\n", | |||
| " \"\"\"用于计算MindSpore算子在使用ONNX参数时,由`input_shape`得到的`output_shape`。\"\"\"\n", | |||
| " params = kwargs['raw_params']\n", | |||
| " input_shape = params['input_shape']\n", | |||
| " kernel_shape = params['kernel_shape']\n", | |||
| " strides = params['strides']\n", | |||
| " dilations = params.get('dilations', (1, 1))\n", | |||
| " ms_opt_shape = np.true_divide(np.subtract(np.array(input_shape[-len(kernel_shape):], dtype=np.float32),\n", | |||
| " ((np.array(kernel_shape, dtype=np.float32) - 1) *\n", | |||
| " np.array(dilations, dtype=np.float32) + 1)) + 1,\n", | |||
| " np.array(strides, dtype=np.float32)).tolist()\n", | |||
| " ms_opt_shape_ceil = tuple(math.ceil(ms_opt_shape_axis) for ms_opt_shape_axis in ms_opt_shape)\n", | |||
| " return ms_opt_shape_ceil\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _generate_snippet_template(**kwargs):\n", | |||
| " \"\"\"\n", | |||
| " 对于无法直接使用`_convert_params`方法进行参数映射的算子,重写此方法通过自定义的模板\n", | |||
| " 来生成算子在转换脚本中的定义(`init`)和调用(`construct`)。\n", | |||
| "\n", | |||
| " Args:\n", | |||
| " operation (str): MindSpore中的对应算子名。\n", | |||
| " converted_params (dict): 由`_convert_params`方法转换得到的MindSpore算子的参数。\n", | |||
| " raw_params (dict): ONNX算子的参数(`raw_params`),`input_shape`和`output_shape`。\n", | |||
| " \"\"\"\n", | |||
| "\n", | |||
| " op = kwargs.get(\"operation\")\n", | |||
| " args = kwargs.get(\"converted_params\", dict())\n", | |||
| "\n", | |||
| " ms_opt_shape = PoolMapper._get_ms_opt_shape(**kwargs)\n", | |||
| " tensor_opt_shape = kwargs['raw_params']['output_shape']\n", | |||
| " tensor_ipt_shape = kwargs['raw_params']['input_shape']\n", | |||
| " kernel_shape = kwargs['raw_params']['kernel_shape']\n", | |||
| " dilations = kwargs['raw_params'].get('dilations', (1, 1))\n", | |||
| " strides = kwargs['raw_params']['strides']\n", | |||
| "\n", | |||
| " if not op:\n", | |||
| " raise ValueError(\"Can not get MindSpore operation name.\")\n", | |||
| "\n", | |||
| " # 定义生成代码的模板。`init_xx`是在`init`中的算子定义,`construct_xx`是在`construct`中的算子调用,\n", | |||
| " # 其中的`variable_slot`是替换用标签,会被后续的脚本生成模块填充。\n", | |||
| " variable_slot = \"var_0\"\n", | |||
| " init_template = f\"self.{{{variable_slot}}} = {op}({', '.join(['%s={%s}' % (p, p) for p in args])})\"\n", | |||
| " construct_template = f\"opt_{{{variable_slot}}} = self.{{{variable_slot}}}(opt_{{{variable_slot}}})\"\n", | |||
| "\n", | |||
| " # 由于该算子在ONNX和MindSpore中的实现差异较大,为了保证转换结果的一致性,需要添加`mindspore.nn.Pad`算子,\n", | |||
| " # 对输入进行处理之后,再传入算子中进行推理。\n", | |||
| " # 该方法的输出依次为`Pad`算子定义,`Pad`算子调用和`Pad`算子的参数`paddings`。\n", | |||
| " init_template_pad, construct_template_pad, paddings = \\\n", | |||
| " PoolMapper._generate_pad_init_and_construct(tensor_opt_shape, tensor_ipt_shape,\n", | |||
| " ms_opt_shape, variable_slot,\n", | |||
| " kernel_shape, dilations, strides)\n", | |||
| "\n", | |||
| " # 返回给后续模块的生成模板数据体,将按照列表顺序依次生成算子定义和算子调用,\n", | |||
| " # `TemplateKeyWords.INIT.value`和`TemplateKeyWords.CONSTRUCT.value`分别表示`init`和`construct`。\n", | |||
| " template = {\n", | |||
| " variable_slot: {\n", | |||
| " TemplateKeywords.INIT.value: [init_template_pad, init_template],\n", | |||
| " TemplateKeywords.CONSTRUCT.value: [construct_template_pad, construct_template]\n", | |||
| " }\n", | |||
| " }\n", | |||
| "\n", | |||
| " # 新添加算子`Pad`的参数`paddings`也作为算子`Pool`的参数进行返回,使该参数也能正确的进行设置。\n", | |||
| " args['paddings'] = paddings\n", | |||
| "\n", | |||
| " # 用于与后续模块进行信息交换。\n", | |||
| " exchange_msg = {\n", | |||
| " variable_slot: {\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.OPERATION.value: op, # MindSpore算子名。\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.VARIABLE_NAME.value: None, # 算子对应的变量名,由后续模块填写,此处为None。\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.OUTPUT_TYPE.value:\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.TSR_TYPE.value, # 算子输出的类型,`mindspore.Tensor`或者`Tuple<mindspore.Tensor>`。\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.INPUTS.value: [], # 算子输入,由后续模块填写,此处为list()。\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.ARGS.value: args, # 算子参数。\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.WEIGHTS.value: dict(), # 算子的权重信息。\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.TRAINABLE_PARAMS.value: dict() # 算子的可训练权重信息。由`_convert_trained_weights`方法返回。\n", | |||
| " }\n", | |||
| " }\n", | |||
| " # 算子输出的变量名。若为多输出,则按照列表顺序依次生成。\n", | |||
| " outputs_list = [f\"opt_{{{variable_slot}}}\"]\n", | |||
| " # ONNX算子和MindSpore算子输出的对应顺序,主要用于保证多输出算子输出拓扑序的一致性。\n", | |||
| " outputs_mapping = ((0, 0),)\n", | |||
| " return template, exchange_msg, outputs_list, outputs_mapping\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _generate_pad_init_and_construct(tensor_opt_shape, tensor_ipt_shape,\n", | |||
| " ms_opt_shape, variable_slot, kernel_shape, dilations, strides):\n", | |||
| " \"\"\"\n", | |||
| " 生成`Pad`算子定义语句,`Pad`算子调用语句和计算参数`paddings`。\n", | |||
| "\n", | |||
| " Args:\n", | |||
| " tensor_opt_shape (tuple): ONNX算子输出尺寸。\n", | |||
| " tensor_ipt_shape (tuple): ONNX算子输入尺寸。\n", | |||
| " ms_opt_shape (tuple): MindSpore算子输出尺寸。\n", | |||
| " variable_slot (str): 用于后续模块进行替换的标识符。\n", | |||
| " kernel_shape (Union[tuple, int]): ONNX算子参数`kernel_shape`。\n", | |||
| " dilations (Union[tuple, int]): ONNX算子参数`dilations`。\n", | |||
| " strides (Union[tuple, int]): ONNX算子参数`strides`。\n", | |||
| " \"\"\"\n", | |||
| "\n", | |||
| " onnx_opt_shape = tensor_opt_shape[-len(ms_opt_shape):]\n", | |||
| " onnx_ipt_shape = tensor_ipt_shape[-len(ms_opt_shape):]\n", | |||
| "\n", | |||
| " if np.any(np.array(ms_opt_shape) > np.array(onnx_opt_shape)):\n", | |||
| " raise ValueError(f\"ms_opt_shape[{ms_opt_shape}] should be no larger than onnx_opt_shape[{onnx_opt_shape}].\")\n", | |||
| "\n", | |||
| " if np.all(np.array(ms_opt_shape) == np.array(onnx_opt_shape)):\n", | |||
| " shape_diff = np.zeros(len(ms_opt_shape)).astype(np.int).tolist()\n", | |||
| " else:\n", | |||
| " shape_diff = np.subtract((np.array(onnx_opt_shape) - 1) * np.array(strides),\n", | |||
| " np.subtract(np.array(onnx_ipt_shape),\n", | |||
| " (np.array(kernel_shape) - 1) * np.array(dilations) + 1)).tolist()\n", | |||
| "\n", | |||
| " zero_pad_single = (0, 0)\n", | |||
| " paddings = [zero_pad_single]\n", | |||
| " num_zero_pads = len(tensor_opt_shape) - len(ms_opt_shape)\n", | |||
| " for _ in range(num_zero_pads - 1):\n", | |||
| " paddings.append(zero_pad_single)\n", | |||
| "\n", | |||
| " for axis_diff in shape_diff:\n", | |||
| " paddings.append((int(axis_diff // 2), int(axis_diff // 2 + axis_diff % 2)))\n", | |||
| "\n", | |||
| " init_template_pad = f\"self.pad_{{{variable_slot}}} = nn.Pad(paddings={{paddings}})\"\n", | |||
| " construct_template_pad = f\"opt_{{{variable_slot}}} = self.pad_{{{variable_slot}}}\" \\\n", | |||
| " f\"({{{ExchangeMessageKeywords.VariableScope.value.INPUTS.value}}})\"\n", | |||
| "\n", | |||
| " return init_template_pad, construct_template_pad, tuple(paddings)" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "将该Mapper脚本命名为`pool_mapper.py`,该命名方式需要和类名(`PoolMapper`)相对应。<br>\n", | |||
| "并放入 `mindinsight/mindconverter/graph_based_converter/mapper/impl/nn`目录下,该放置目录需要根据对应的MindSpore算子所在的层(`nn`/`ops`)来设置。<br>\n", | |||
| "最后修改 `mindinsight/mindconverter/graph_based_converter/mapper/onnx_to_ms.json`,\n", | |||
| "添加 `\"onnx::AveragePool\": \"mindinsight.mindconverter.graph_based_converter.mapper.impl.nn.pool_mapper.PoolMapper\"`来确定ONNX算子所对应的Mapper脚本文件。" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "## 验证自定义算子映射脚本" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 2, | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%%\n" | |||
| } | |||
| }, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import numpy as np\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.mapper.base import ONNXToMindSporeMapper\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.common.code_fragment import NewFragment\n", | |||
| "\n", | |||
| "\n", | |||
| "def test_mapper(onnx_info):\n", | |||
| " \"\"\"\n", | |||
| " Test mapper.\n", | |||
| "\n", | |||
| " Args:\n", | |||
| " onnx_info (dict): Onnx operator_info. Struct is\n", | |||
| " {\n", | |||
| " 'op_name': op_name,\n", | |||
| " 'attributes': dict(),\n", | |||
| " 'weights': [NodeWeight(), ...]\n", | |||
| " }\n", | |||
| " \"\"\"\n", | |||
| "\n", | |||
| " template, exchange_msg, outputs_lists, outputs_mapping = \\\n", | |||
| " ONNXToMindSporeMapper.convert(onnx_info['op_name'],\n", | |||
| " onnx_info['attributes'],\n", | |||
| " onnx_info['weights'])\n", | |||
| "\n", | |||
| " exchange_msg['var_0']['variable_name'] = 'self_defined_operator'\n", | |||
| " exchange_msg['var_0']['inputs'] = ['x']\n", | |||
| "\n", | |||
| " fragment = NewFragment(data_entity=exchange_msg, code_template=template, outputs=outputs_lists,\n", | |||
| " outputs_mapping=outputs_mapping)\n", | |||
| "\n", | |||
| " code = fragment()\n", | |||
| " init_code = code[0]\n", | |||
| " construct_code = code[1]\n", | |||
| " print('-'*30, 'init_code', '-'*30)\n", | |||
| " print('\\n'.join(init_code))\n", | |||
| " print('-'*30, 'construct_code', '-'*30)\n", | |||
| " print('\\n'.join(construct_code))" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 3, | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%%\n" | |||
| } | |||
| }, | |||
| "outputs": [ | |||
| { | |||
| "name": "stdout", | |||
| "output_type": "stream", | |||
| "text": [ | |||
| "------------------------------ init_code ------------------------------\n", | |||
| "self.pad_self_defined_operator = nn.Pad(paddings=((0, 0), (0, 0), (1, 2), (1, 2)))\n", | |||
| "self.self_defined_operator = nn.AvgPool2d(kernel_size=(5, 5), stride=(2, 2))\n", | |||
| "------------------------------ construct_code ------------------------------\n", | |||
| "opt_self_defined_operator = self.pad_self_defined_operator(x)\n", | |||
| "opt_self_defined_operator = self.self_defined_operator(opt_self_defined_operator)\n" | |||
| ] | |||
| } | |||
| ], | |||
| "source": [ | |||
| "onnx_operator_info = {'op_name': 'onnx::AveragePool',\n", | |||
| " 'attributes': {'auto_pad': 'NOTSET',\n", | |||
| " 'ceil_mode': 0,\n", | |||
| " 'count_include_pad': 0,\n", | |||
| " 'kernel_shape': (5, 5),\n", | |||
| " 'pads': (0, 0, 0, 0),\n", | |||
| " 'strides': (2, 2),\n", | |||
| " 'input_shape': (1, 3, 224, 224),\n", | |||
| " 'output_shape': (1, 3, 112, 112)\n", | |||
| " },\n", | |||
| " 'weights': []}\n", | |||
| "test_mapper(onnx_operator_info)" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "## 权重迁移相关教程\n", | |||
| "\n", | |||
| "以`onnx::Add`算子为例。" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 4, | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%%\n" | |||
| } | |||
| }, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import numpy as np\n", | |||
| "\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.constant import ExchangeMessageKeywords, TemplateKeywords, \\\n", | |||
| " WeightType\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.mapper.base import ONNXToMindSporeMapper\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.third_party_graph.onnx_utils import NodeWeight\n", | |||
| "\n", | |||
| "\n", | |||
| "class AddMapper(ONNXToMindSporeMapper):\n", | |||
| " \"\"\"Add mapper.\"\"\"\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _operation_name_in_ms(*args, **kwargs):\n", | |||
| " return \"P.Add\"\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _convert_params(**kwargs):\n", | |||
| " return dict()\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _convert_trained_weights(**kwargs):\n", | |||
| " \"\"\"\n", | |||
| " 权重迁移相关方法,返回数据体用于生成CheckPoint文件。\n", | |||
| "\n", | |||
| " Returns, dict(MindSpore算子权重名: {'data': 权重值, 'type': 权重类型, 'onnx_name': ONNX算子权重名})\n", | |||
| " \"\"\"\n", | |||
| " weights = kwargs.get('weights', list()) # 获取算子输入当中的静态ensor数据体,即为该算子权重,保存在CheckPoint文件当中。\n", | |||
| " tensor = AddMapper._find_val_by_index(0, weights) # 获取权重值,类型为`numpy.ndarray`。\n", | |||
| " onnx_name = AddMapper._find_onnx_name_by_index(0, weights) # 获取权重在ONNX框架中的名称,主要用于权重共享相关功能。\n", | |||
| " # 仅当静态tensor为`np.ndarray`且存在`shape`信息时,该tensor会被保存为权重。\n", | |||
| " if isinstance(tensor, np.ndarray) and tensor.shape:\n", | |||
| " return {'bias': {'data': tensor, 'type': WeightType.PARAMETER.value, 'onnx_name': onnx_name}}\n", | |||
| " return dict()\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _generate_snippet_template(**kwargs):\n", | |||
| " template, exchange_msg, outputs_list, outputs_mapping = ONNXToMindSporeMapper._generate_snippet_template(\n", | |||
| " **kwargs)\n", | |||
| " op = kwargs.get(\"operation\")\n", | |||
| " args = kwargs.get(\"converted_params\")\n", | |||
| " weights = kwargs.get(\"weights\")\n", | |||
| " trainable_params = kwargs.get('trainable_params', dict()) # 获取`_convert_trained_weights`方法的返回值。\n", | |||
| " if not op:\n", | |||
| " raise ValueError(\"Can not get MindSpore operation name.\")\n", | |||
| " if not weights:\n", | |||
| " return template, exchange_msg, outputs_list, outputs_mapping\n", | |||
| "\n", | |||
| " tensor = AddMapper._find_val_by_index(0, weights)\n", | |||
| " bias_shape = tensor.shape\n", | |||
| " # 该静态Tensor在原ONNX算子中的输入中的位置序列号,例如:在算子`onnx::Add(x, y)`中,`x`的位置序列号为0,`y`的位置序列号为1。\n", | |||
| " bias_location = AddMapper._find_location_by_index(0, weights)\n", | |||
| "\n", | |||
| " variable_slot = \"var_0\"\n", | |||
| " init_template = f\"self.{{{variable_slot}}} = {op}({', '.join(['%s={%s}' % (p, p) for p in args])})\"\n", | |||
| " inputs_in_construct = [f\"{{{ExchangeMessageKeywords.VariableScope.value.INPUTS.value}}}\"]\n", | |||
| "\n", | |||
| " # 使用该位置序列号信息,确保该静态Tensor在生成的MindSpore算子中的输入顺序和原ONNX算子中的输入顺序保持一致。\n", | |||
| " if bias_location != -1:\n", | |||
| " inputs_in_construct.insert(bias_location, f\"self.{{{variable_slot}}}_bias\")\n", | |||
| "\n", | |||
| " # 构建出常量Tensor算子,作为算子的输入。\n", | |||
| " # `XXX/bias`和`XXX_bias`当中的`bias`需要\n", | |||
| " # 和`_convert_trained_weights`方法返回值当中定义的`bias`(MindSpore算子权重名)保持一致。\n", | |||
| " if bias_shape:\n", | |||
| " # Note: adding weight shape to args is now deprecated due to conflict of partial weights share processing.\n", | |||
| " variable_slot_param_name = f\"{variable_slot}/bias\" # XX/bias`\n", | |||
| " init_tensor = f\"self.{{{variable_slot}}}_bias = {{{variable_slot_param_name}}}\"\n", | |||
| "\n", | |||
| " else:\n", | |||
| " # 当`shape`信息为None时,`tensor.tolist()`返回单个数值,这种情况下,该值作为算子参数,构建出常量算子作为算子输入。\n", | |||
| " args[\"bias_value\"] = tensor.tolist()\n", | |||
| " init_tensor = f\"self.{{{variable_slot}}}_bias = {{bias_value}}\"\n", | |||
| "\n", | |||
| " construct_template = f\"opt_{{{variable_slot}}} = self.{{{variable_slot}}}\" \\\n", | |||
| " f\"({', '.join(inputs_in_construct)})\"\n", | |||
| " template = {\n", | |||
| " variable_slot: {\n", | |||
| " TemplateKeywords.INIT.value: [init_template, init_tensor],\n", | |||
| " TemplateKeywords.CONSTRUCT.value: [construct_template]\n", | |||
| " }\n", | |||
| " }\n", | |||
| " exchange_msg = {\n", | |||
| " variable_slot: {\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.OPERATION.value: op,\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.VARIABLE_NAME.value: None,\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.OUTPUT_TYPE.value:\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.TSR_TYPE.value,\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.INPUTS.value: [],\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.ARGS.value: args,\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.WEIGHTS.value: weights,\n", | |||
| " ExchangeMessageKeywords.VariableScope.value.TRAINABLE_PARAMS.value: trainable_params\n", | |||
| " }\n", | |||
| " }\n", | |||
| "\n", | |||
| " # 权重共享相关。声明权重名称,权重值由后续模块添加。\n", | |||
| " if bias_shape:\n", | |||
| " exchange_msg[variable_slot][ExchangeMessageKeywords.VariableScope.value.PARAMETERS_DECLARED.value] = {\n", | |||
| " \"bias\": \"\"\n", | |||
| " }\n", | |||
| " outputs_list = [f\"opt_{{{variable_slot}}}\"]\n", | |||
| " outputs_mapping = ((0, 0),)\n", | |||
| " return template, exchange_msg, outputs_list, outputs_mapping" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "## 验证权重迁移算子映射脚本" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 5, | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%%\n" | |||
| } | |||
| }, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import numpy as np\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.mapper.base import ONNXToMindSporeMapper\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.common.code_fragment import NewFragment\n", | |||
| "\n", | |||
| "\n", | |||
| "def test_mapper(onnx_info):\n", | |||
| " \"\"\"\n", | |||
| " Test mapper.\n", | |||
| "\n", | |||
| " Args:\n", | |||
| " onnx_info (dict): Onnx operator_info. Struct is\n", | |||
| " {\n", | |||
| " 'op_name': op_name,\n", | |||
| " 'attributes': dict(),\n", | |||
| " 'weights': [NodeWeight(), ...]\n", | |||
| " }\n", | |||
| " \"\"\"\n", | |||
| "\n", | |||
| " template, exchange_msg, outputs_lists, outputs_mapping = \\\n", | |||
| " ONNXToMindSporeMapper.convert(onnx_info['op_name'],\n", | |||
| " onnx_info['attributes'],\n", | |||
| " onnx_info['weights'])\n", | |||
| "\n", | |||
| " exchange_msg['var_0']['variable_name'] = 'self_defined_operator'\n", | |||
| " exchange_msg['var_0']['inputs'] = ['x']\n", | |||
| " \n", | |||
| " trainable_params = exchange_msg['var_0']['trainable_params']\n", | |||
| " for weight_name, weight_inst in trainable_params.items():\n", | |||
| " weight = weight_inst['data']\n", | |||
| " weight_shape = weight.shape\n", | |||
| " weight_dtype = weight.dtype\n", | |||
| " exchange_msg['var_0']['parameters'][weight_name] = NewFragment.create_parameter(weight_shape, weight_dtype)\n", | |||
| "\n", | |||
| " fragment = NewFragment(data_entity=exchange_msg, code_template=template, outputs=outputs_lists,\n", | |||
| " outputs_mapping=outputs_mapping)\n", | |||
| "\n", | |||
| " code = fragment()\n", | |||
| " init_code = code[0]\n", | |||
| " construct_code = code[1]\n", | |||
| " print('-'*30, 'init_code', '-'*30)\n", | |||
| " print('\\n'.join(init_code))\n", | |||
| " print('-'*30, 'construct_code', '-'*30)\n", | |||
| " print('\\n'.join(construct_code))" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 6, | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%%\n" | |||
| } | |||
| }, | |||
| "outputs": [ | |||
| { | |||
| "name": "stdout", | |||
| "output_type": "stream", | |||
| "text": [ | |||
| "------------------------------ init_code ------------------------------\n", | |||
| "self.self_defined_operator = P.Add()\n", | |||
| "self.self_defined_operator_bias = Parameter(Tensor(np.random.uniform(0, 1, (1, 3, 224, 224)).astype(np.int64)), name=None)\n", | |||
| "------------------------------ construct_code ------------------------------\n", | |||
| "opt_self_defined_operator = self.self_defined_operator(x, self.self_defined_operator_bias)\n" | |||
| ] | |||
| } | |||
| ], | |||
| "source": [ | |||
| "onnx_operator_info = {'op_name': 'onnx::Add',\n", | |||
| " 'attributes': {},\n", | |||
| " 'weights': [NodeWeight(weight_name='onnx_bias',\n", | |||
| " weight_value=np.ones((1, 3, 224, 224), dtype=np.int),\n", | |||
| " weight_location=1)]}\n", | |||
| "test_mapper(onnx_operator_info)" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [] | |||
| } | |||
| ], | |||
| "metadata": { | |||
| "kernelspec": { | |||
| "display_name": "Python 3", | |||
| "language": "python", | |||
| "name": "python3" | |||
| }, | |||
| "language_info": { | |||
| "codemirror_mode": { | |||
| "name": "ipython", | |||
| "version": 3 | |||
| }, | |||
| "file_extension": ".py", | |||
| "mimetype": "text/x-python", | |||
| "name": "python", | |||
| "nbconvert_exporter": "python", | |||
| "pygments_lexer": "ipython3", | |||
| "version": "3.7.5" | |||
| } | |||
| }, | |||
| "nbformat": 4, | |||
| "nbformat_minor": 1 | |||
| } | |||
| @@ -0,0 +1,318 @@ | |||
| { | |||
| "cells": [ | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": { | |||
| "collapsed": true, | |||
| "pycharm": { | |||
| "name": "#%% md\n" | |||
| } | |||
| }, | |||
| "source": [ | |||
| "# 添加算子映射关系初级教程\n", | |||
| "`Linux` `Ascend` `GPU` `CPU` `模型迁移` `初级`\n", | |||
| "\n", | |||
| "[](https://gitee.com/mindspore/mindinsight/blob/master/mindinsight/mindconverter/tutorial/add_operator_mapper_base_tutorial.ipynb)" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "## 概述" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "MindConverter工具基于ONNX模型进行脚本转换,生成MindSpore脚本和权重文件。因此需要ONNX算子到MindSpore算子的映射关系文件来保证算子之间转换结果的正确性。本文将以简单的算子映射关系为例,来描述添加算子映射关系文件的方法。" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "## 环境准备\n", | |||
| "\n", | |||
| "本案例需安装以下Python三方库:\n", | |||
| "```bash\n", | |||
| "pip install mindspore==1.2.0\n", | |||
| "pip install mindinsight==1.2.0\n", | |||
| "```" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "## 算子映射脚本(base.py)结构" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 1, | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%%\n" | |||
| } | |||
| }, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import abc\n", | |||
| "\n", | |||
| "\n", | |||
| "class Mapper(metaclass=abc.ABCMeta):\n", | |||
| " \"\"\"Mapper between third-party-operation and MindSpore.\"\"\"\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " @abc.abstractmethod\n", | |||
| " def _operation_name_in_ms(**kwargs):\n", | |||
| " \"\"\"Corresponding operation name in MindSpore.\"\"\"\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " @abc.abstractmethod\n", | |||
| " def _convert_params(**kwargs):\n", | |||
| " \"\"\"Convert third-party-operation's attributes or weights into MindSpore operation's attributes.\"\"\"\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " @abc.abstractmethod\n", | |||
| " def _convert_trained_weights(**kwargs):\n", | |||
| " \"\"\"Convert third-party-operation's trainable weights into MindSpore operation's.\"\"\"\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " @abc.abstractmethod\n", | |||
| " def _generate_snippet_template(**kwargs):\n", | |||
| " \"\"\"Generate code template according to node info.\"\"\"" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%% md\n" | |||
| } | |||
| }, | |||
| "source": [ | |||
| "## 自定义添加算子映射脚本\n", | |||
| "\n", | |||
| "以`onnx::Gemm`算子为例进行演示。\n", | |||
| "\n", | |||
| "分别查阅[ONNX算子API文档](https://github.com/onnx/onnx/blob/master/docs/Operators.md)和[MindSpore算子API文档](http://www.mindspore.cn/doc/api_python/zh-CN/master/index.html),\n", | |||
| "找到与ONNX算子`onnx::Gemm`功能相同或相近的MindSpore算子`mindspore.nn.Dense`。\n", | |||
| "\n", | |||
| "|算子名|`onnx::Gemm`|`mindspore.nn.Dense`|\n", | |||
| "|:----:|:----|:----|\n", | |||
| "|算法实现|`Y = alpha*A'*B'+beta*C`|`output = activation(inputs*kernel+bias)`|\n", | |||
| "|参数|`alpha`: optional<br>`beta`: optional<br>`transA`: optional<br>`transB`: optional|`in_channels`: required<br>`out_channels`: required<br>`weight_init`: optional<br>`bias_init`: optional<br>`has_bias`: optional<br>`activation`: optional|\n", | |||
| "|输入|`A`: required<br>`B`: required<br>`C`: optional|`input`: required|\n", | |||
| "|输出|`Y`|`output`|\n", | |||
| "\n", | |||
| "<br>\n", | |||
| "依据双方算子中参数(Attributes/Parameters)和输入(Inputs)进行ONNX到MindSpore的算子映射。" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 2, | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%%\n" | |||
| } | |||
| }, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import numpy as np\n", | |||
| "\n", | |||
| "# 导入Mapper基类\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.mapper.base import ONNXToMindSporeMapper\n", | |||
| "\n", | |||
| "\n", | |||
| "class DenseMapper(ONNXToMindSporeMapper):\n", | |||
| " \"\"\"Dense mapper.\"\"\"\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _operation_name_in_ms(*args, **kwargs):\n", | |||
| " return \"nn.Dense\" # MindSpore中对应的算子名\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _convert_params(**kwargs):\n", | |||
| " \"\"\"\n", | |||
| " 参数迁移相关方法,该方法返回的参数将在生成的MindSpore脚本中以\n", | |||
| " `OP(dict_key_0=dict_value_0, dict_key_1=dict_value_1, ...)`的形式\n", | |||
| " 定义算子,因此需要保证dict_key_x与MindSpore算子中的参数名相同。\n", | |||
| "\n", | |||
| " Args:\n", | |||
| " kwargs: Data for converting.\n", | |||
| " Struct is `{\n", | |||
| " 'weights': [NodeWeight(), NodeWeight(), ...],\n", | |||
| " 'params': {\n", | |||
| " 'input_shape': input_shape,\n", | |||
| " 'output_shape': output_shape,\n", | |||
| " 'onnx_attribute_name_0': onnx_attribute_val_0,\n", | |||
| " ...\n", | |||
| " }\n", | |||
| " }`\n", | |||
| " \"\"\"\n", | |||
| "\n", | |||
| " weights = kwargs['weights'] # 获取ONNX算子的Inputs中的静态Tensor列表\n", | |||
| " # 获取Tensor列表中指定序列号的Tensor值,其中序列号与ONNX算子中的Inputs顺序一致。\n", | |||
| " weight = DenseMapper._find_val_by_index(0, weights)\n", | |||
| " bias = DenseMapper._find_val_by_index(1, weights)\n", | |||
| " has_bias = isinstance(bias, np.ndarray)\n", | |||
| " in_channels, out_channels = weight.shape\n", | |||
| " return {\n", | |||
| " 'in_channels': in_channels,\n", | |||
| " 'out_channels': out_channels,\n", | |||
| " 'has_bias': has_bias\n", | |||
| " }\n", | |||
| "\n", | |||
| " @staticmethod\n", | |||
| " def _convert_trained_weights(**kwargs):\n", | |||
| " \"\"\"\n", | |||
| " 权重迁移相关方法,该方法返回的权重将会保存在生成的CheckPoint(.ckpt)文件当中\n", | |||
| " 使生成的MindSpore脚本可以直接加载该权重文件用于重训练或推理。\n", | |||
| " 详细的内容可参考进阶篇。\n", | |||
| " \"\"\"\n", | |||
| "\n", | |||
| " weights = kwargs['weights']\n", | |||
| " weight = DenseMapper._find_val_by_index(0, weights)\n", | |||
| " bias = DenseMapper._find_val_by_index(1, weights)\n", | |||
| " return {\n", | |||
| " 'weight': {'data': weight},\n", | |||
| " 'bias': {'data': bias}\n", | |||
| " }" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "将该Mapper脚本命名为`dense_mapper.py`,该命名方式需要和类名(`DenseMapper`)相对应。<br>\n", | |||
| "并放入 `mindinsight/mindconverter/graph_based_converter/mapper/impl/nn`目录下,该放置目录需要根据对应的MindSpore算子所在的层(`nn`/`ops`)来设置。<br>\n", | |||
| "最后修改 `mindinsight/mindconverter/graph_based_converter/mapper/onnx_to_ms.json`,\n", | |||
| "添加 `\"onnx::Gemm\": \"mindinsight.mindconverter.graph_based_converter.mapper.impl.nn.dense_mapper.DenseMapper\"`来确定ONNX算子所对应的Mapper脚本文件。" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "## 验证自定义算子映射脚本" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 3, | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%%\n" | |||
| } | |||
| }, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import numpy as np\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.mapper.base import ONNXToMindSporeMapper\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.common.code_fragment import NewFragment\n", | |||
| "from mindinsight.mindconverter.graph_based_converter.third_party_graph.onnx_utils import NodeWeight\n", | |||
| "\n", | |||
| "def test_mapper(onnx_info):\n", | |||
| " \"\"\"\n", | |||
| " Test mapper.\n", | |||
| "\n", | |||
| " Args:\n", | |||
| " onnx_info (dict): Onnx operator_info. Struct is\n", | |||
| " {\n", | |||
| " 'op_name': op_name,\n", | |||
| " 'attributes': dict(),\n", | |||
| " 'weights': [NodeWeight(), ...]\n", | |||
| " }\n", | |||
| " \"\"\"\n", | |||
| "\n", | |||
| " template, exchange_msg, outputs_lists, outputs_mapping = \\\n", | |||
| " ONNXToMindSporeMapper.convert(onnx_info['op_name'],\n", | |||
| " onnx_info['attributes'],\n", | |||
| " onnx_info['weights'])\n", | |||
| "\n", | |||
| " exchange_msg['var_0']['variable_name'] = 'self_defined_operator'\n", | |||
| " exchange_msg['var_0']['inputs'] = ['x']\n", | |||
| "\n", | |||
| " fragment = NewFragment(data_entity=exchange_msg, code_template=template, outputs=outputs_lists,\n", | |||
| " outputs_mapping=outputs_mapping)\n", | |||
| "\n", | |||
| " code = fragment()\n", | |||
| " init_code = code[0]\n", | |||
| " construct_code = code[1]\n", | |||
| " print('-'*30, 'init_code', '-'*30)\n", | |||
| " print('\\n'.join(init_code))\n", | |||
| " print('-'*30, 'construct_code', '-'*30)\n", | |||
| " print('\\n'.join(construct_code))" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 4, | |||
| "metadata": { | |||
| "pycharm": { | |||
| "name": "#%%\n" | |||
| } | |||
| }, | |||
| "outputs": [ | |||
| { | |||
| "name": "stdout", | |||
| "output_type": "stream", | |||
| "text": [ | |||
| "------------------------------ init_code ------------------------------\n", | |||
| "self.self_defined_operator = nn.Dense(in_channels=3, out_channels=10, has_bias=True)\n", | |||
| "------------------------------ construct_code ------------------------------\n", | |||
| "opt_self_defined_operator = self.self_defined_operator(x)\n" | |||
| ] | |||
| } | |||
| ], | |||
| "source": [ | |||
| "onnx_operator_info = {'op_name': 'onnx::Gemm',\n", | |||
| " 'attributes': {'alpha': 1.0,\n", | |||
| " 'beta': 1.0,\n", | |||
| " 'transA': 0,\n", | |||
| " 'transB': 0},\n", | |||
| " 'weights': [NodeWeight(weight_name='weight',\n", | |||
| " weight_location=1,\n", | |||
| " weight_value=np.ones((10, 3),\n", | |||
| " dtype=np.int)),\n", | |||
| " NodeWeight(weight_name='bias',\n", | |||
| " weight_location=2,\n", | |||
| " weight_value=np.ones((10, 3),\n", | |||
| " dtype=np.int))]}\n", | |||
| "test_mapper(onnx_operator_info)" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [] | |||
| } | |||
| ], | |||
| "metadata": { | |||
| "kernelspec": { | |||
| "display_name": "Python 3", | |||
| "language": "python", | |||
| "name": "python3" | |||
| }, | |||
| "language_info": { | |||
| "codemirror_mode": { | |||
| "name": "ipython", | |||
| "version": 3 | |||
| }, | |||
| "file_extension": ".py", | |||
| "mimetype": "text/x-python", | |||
| "name": "python", | |||
| "nbconvert_exporter": "python", | |||
| "pygments_lexer": "ipython3", | |||
| "version": "3.7.5" | |||
| } | |||
| }, | |||
| "nbformat": 4, | |||
| "nbformat_minor": 1 | |||
| } | |||
| @@ -8,7 +8,7 @@ | |||
| "# PyTorch BERT迁移案例\n", | |||
| "`Linux` `Ascend` `GPU` `CPU` `模型迁移` `初级` `中级` `高级`\n", | |||
| "\n", | |||
| "[](https://gitee.com/mindspore/docs/blob/master/docs/migration_guide/source_zh_cn/torch_bert_migration_case_of_mindconverter.ipynb)" | |||
| "[](https://gitee.com/mindspore/mindinsight/blob/master/mindinsight/mindconverter/tutorial/pytorch_bert_migration_tutorial.ipynb)" | |||
| ] | |||
| }, | |||
| { | |||
| @@ -35,10 +35,8 @@ class CpuOpTypeAnalyser(GpuAnalyser): | |||
| Returns: | |||
| list, the converted data. | |||
| """ | |||
| factor_us_to_ms = 1e-3 | |||
| try: | |||
| return [row[0], int(row[1]), int(row[2]), float(row[3]) * factor_us_to_ms, | |||
| float(row[4]) * factor_us_to_ms, float(row[5])*100] | |||
| return [row[0], int(row[1]), int(row[2]), float(row[3]), float(row[4]), float(row[5])*100] | |||
| except IndexError as err: | |||
| log.exception(err) | |||
| raise ProfilerRawFileException('failed to get HOST CPU operator type data.') | |||
| @@ -60,10 +58,8 @@ class CpuOpInfoAnalyser(GpuAnalyser): | |||
| Returns: | |||
| list, the converted data. | |||
| """ | |||
| factor_us_to_ms = 1e-3 | |||
| try: | |||
| return [row[0], row[1], row[2], row[3], int(row[4]), float(row[5]) * factor_us_to_ms, | |||
| float(row[6]) * factor_us_to_ms, float(row[7]), row[8]] | |||
| return [row[0], row[1], row[2], row[3], int(row[4]), float(row[5]), float(row[6]), float(row[7]), row[8]] | |||
| except IndexError as err: | |||
| log.exception(err) | |||
| raise ProfilerRawFileException('failed to get HOST CPU operator detail data.') | |||
| @@ -109,9 +109,11 @@ class Command(BaseCommand): | |||
| processes.append(process) | |||
| try: | |||
| self._send_signal(process, signal.SIGINT) | |||
| # Wait 2 second, if not terminate, kill the worker process. | |||
| _, alive = psutil.wait_procs(processes, 2) | |||
| # Wait 3 seconds, if not terminate, kill the worker process. | |||
| exit_timeout_seconds = 3 | |||
| _, alive = psutil.wait_procs(processes, exit_timeout_seconds) | |||
| for alive_process in alive: | |||
| self.logfile.info("Stop process %d because timeout.", alive_process.pid) | |||
| self._send_signal(alive_process, signal.SIGKILL) | |||
| except psutil.Error as ex: | |||
| self.logfile.error("Stop process %d failed. Detail: %s.", pid, str(ex)) | |||
| @@ -138,11 +138,11 @@ export default { | |||
| dataMapDownloadStyle: '<style> #graph0 > polygon { fill: transparent; }' + | |||
| '.node, .cluster { cursor: pointer; }' + | |||
| '.selected { polygon, ellipse { stroke: red !important; stroke-width: 2px; } }' + | |||
| '.CreatDataset > polygon, .Operator > ellipse { stroke: #4ea6e6; fill: #b8e0ff; }' + | |||
| '.Create > polygon, .Operator > ellipse { stroke: #4ea6e6; fill: #b8e0ff; }' + | |||
| '.cluster > polygon { fill: #8df1f2; stroke: #00a5a7; }' + | |||
| '.RepeatDataset > polygon { stroke: #fdca5a; fill: #fff2d4; }' + | |||
| '.ShuffleDataset > polygon { stroke: #e37d29; fill: #ffd0a6; }' + | |||
| '.BatchDataset > polygon { stroke: #de504e; fill: #ffbcba; }' + | |||
| '.edge { path { stroke: rgb(167, 167, 167); }' + | |||
| 'polygon { fill: rgb(167, 167, 167); stroke: rgb(167, 167, 167); } }</style>', | |||
| '.Repeat > polygon { stroke: #fdca5a; fill: #fff2d4; }' + | |||
| '.Shuffle > polygon { stroke: #e37d29; fill: #ffd0a6; }' + | |||
| '.Batch > polygon { stroke: #de504e; fill: #ffbcba; }' + | |||
| '.edge path { stroke: rgb(167, 167, 167); }' + | |||
| '.edge polygon { fill: rgb(167, 167, 167); stroke: rgb(167, 167, 167); }</style>', | |||
| }; | |||
| @@ -388,7 +388,7 @@ | |||
| "opNum": "Number of operators:", | |||
| "opTimes": "Total operator execution times:", | |||
| "features": "Functions:", | |||
| "iterationInfo": "The step trace displays the duration of each step from the start of the previous iteration to the end of the step. The main time is divided into three parts: step interval, forward and backward propagation, and step tail.", | |||
| "iterationInfo": "The step trace displays the duration of each step from the start of the previous iteration to the end of the step. The main time is divided into three parts: step interval, forward and backward propagation, and step tail.(Note that this feature do not support heterogeneous training scene)", | |||
| "iterationGapInfo": "Reads data from data queues. If this part takes a long time, you are advised to check the data processing for further analysis.", | |||
| "fpbpTitle": "Forward and Backward Propagation", | |||
| "fpbpInfo": "Executes the forward and backward operators on the network, which carry the main calculation work of a step. If this part takes a long time, you are advised to check the operator statistics or timeline for further analysis.", | |||
| @@ -510,7 +510,8 @@ | |||
| "lifeCycle": "Lifecycle", | |||
| "fpStart": "Forward", | |||
| "bpEnd": "Backward" | |||
| } | |||
| }, | |||
| "isHeterogeneous": "Heterogeneous training scenarios are not supported temporarily." | |||
| }, | |||
| "profilingGPU": { | |||
| "minddata_get_next_queue": { | |||
| @@ -387,7 +387,7 @@ | |||
| "opNum": "算子数目:", | |||
| "opTimes": "算子执行总次数:", | |||
| "features": "功能介绍:", | |||
| "iterationInfo": "迭代轨迹展示的是每个step从上个迭代开始至该step结束的耗时信息,主体时间分为3部分:迭代间隙、前向反向、迭代拖尾。", | |||
| "iterationInfo": "迭代轨迹展示的是每个step从上个迭代开始至该step结束的耗时信息,主体时间分为3部分:迭代间隙、前向反向、迭代拖尾。(该特性暂不支持异构训练场景)", | |||
| "iterationGapInfo": "主要负责从数据队列中读取数据,如果该部分耗时较长,建议前往数据处理部分进一步分析;", | |||
| "fpbpTitle": "前向反向", | |||
| "fpbpInfo": "执行网络中的前向算子以及反向算子,承载了一个step主要的计算工作,如果该部分耗时较长,建议前往算子统计或时间线中进一步分析;", | |||
| @@ -509,7 +509,8 @@ | |||
| "lifeCycle": "生命周期", | |||
| "fpStart": "前向", | |||
| "bpEnd": "后向" | |||
| } | |||
| }, | |||
| "isHeterogeneous":"暂不支持异构训练场景" | |||
| }, | |||
| "profilingGPU": { | |||
| "minddata_get_next_queue": { | |||
| @@ -106,7 +106,7 @@ limitations under the License. | |||
| alt="" /> | |||
| </div> | |||
| <p v-show="!svg.initOver">{{$t("public.dataLoading")}}</p> | |||
| <p v-show="svg.initOver">{{$t("public.noData")}}</p> | |||
| <p v-show="svg.initOver">{{isHeterogeneous?$t("profiling.isHeterogeneous"):$t("public.noData")}}</p> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| @@ -475,6 +475,7 @@ export default { | |||
| noData: true, | |||
| initOver: false, // Is initialization complete | |||
| }, | |||
| isHeterogeneous: false, | |||
| }; | |||
| }, | |||
| mounted() { | |||
| @@ -823,6 +824,7 @@ export default { | |||
| RequestService.queryTrainingTrace(params).then( | |||
| (res) => { | |||
| this.svg.initOver = true; | |||
| this.isHeterogeneous = res.data.is_heterogeneous; | |||
| if (res && res.data && res.data.training_trace_graph && res.data.training_trace_graph.length) { | |||
| this.svg.noData = false; | |||
| this.removeTrace(); | |||
| @@ -866,6 +868,7 @@ export default { | |||
| this.totalSteps = '--'; | |||
| this.totalTime = '--'; | |||
| this.tailPercent = '--'; | |||
| this.isHeterogeneous = false; | |||
| }, | |||
| ); | |||
| }, | |||
| @@ -106,7 +106,7 @@ limitations under the License. | |||
| alt="" /> | |||
| </div> | |||
| <p v-show="!svg.initOver">{{$t("public.dataLoading")}}</p> | |||
| <p v-show="svg.initOver">{{$t("public.noData")}}</p> | |||
| <p v-show="svg.initOver">{{isHeterogeneous?$t("profiling.isHeterogeneous"):$t("public.noData")}}</p> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| @@ -475,6 +475,7 @@ export default { | |||
| }, | |||
| initOver: false, // Is initialization complete | |||
| }, | |||
| isHeterogeneous: false, | |||
| }; | |||
| }, | |||
| mounted() { | |||
| @@ -696,6 +697,7 @@ export default { | |||
| RequestService.queryTrainingTrace(params).then( | |||
| (res) => { | |||
| this.svg.initOver = true; | |||
| this.isHeterogeneous = res.data.is_heterogeneous; | |||
| if ( | |||
| res && | |||
| res.data && | |||
| @@ -743,6 +745,7 @@ export default { | |||
| this.totalSteps = '--'; | |||
| this.totalTime = '--'; | |||
| this.tailPercent = '--'; | |||
| this.isHeterogeneous = false; | |||
| }, | |||
| ); | |||
| }, | |||
| @@ -255,19 +255,24 @@ export default { | |||
| 'ShuffleDataset', | |||
| 'RepeatDataset', | |||
| 'MapDataset', | |||
| 'Batch', | |||
| 'Shuffle', | |||
| 'Repeat', | |||
| 'Map', | |||
| ]; | |||
| const subGraphNodeType = ['Map', 'MapDataset']; | |||
| let nodeStr = ''; | |||
| let edgeStr = ''; | |||
| Object.keys(this.allGraphData).forEach((key) => { | |||
| const node = this.allGraphData[key]; | |||
| if (node.op_type === 'MapDataset') { | |||
| if (subGraphNodeType.includes(node.op_type)) { | |||
| nodeStr += this.packageSubGraph(key); | |||
| } else { | |||
| node.id = key; | |||
| nodeStr += | |||
| `<${node.key}>[id="${node.key}";label="${node.op_type}";` + | |||
| `class=${ | |||
| nodeType.includes(node.op_type) ? node.op_type : 'CreatDataset' | |||
| nodeType.includes(node.op_type) ? node.op_type.replace('Dataset', '') : 'Create' | |||
| };shape=rect;fillcolor="#9cc3e5";];`; | |||
| } | |||
| }); | |||
| @@ -277,12 +282,8 @@ export default { | |||
| node.children.forEach((k) => { | |||
| const child = this.allGraphData[k]; | |||
| edgeStr += `<${child.id}>-><${node.id}>[${ | |||
| child.op_type === 'MapDataset' | |||
| ? `ltail=<cluster_${child.key}>;` | |||
| : '' | |||
| }${ | |||
| node.op_type === 'MapDataset' ? `lhead=<cluster_${node.key}>;` : '' | |||
| }];`; | |||
| subGraphNodeType.includes(child.op_type) ? `ltail=<cluster_${child.key}>;` : '' | |||
| }${subGraphNodeType.includes(node.op_type) ? `lhead=<cluster_${node.key}>;` : ''}];`; | |||
| }); | |||
| }); | |||
| const initSetting = | |||
| @@ -692,7 +693,7 @@ export default { | |||
| stroke: red !important; | |||
| stroke-width: 2px; | |||
| } | |||
| .cl-data-map-manage #data-maps .cl-data-map .data-map-container #graph .CreatDataset > polygon, | |||
| .cl-data-map-manage #data-maps .cl-data-map .data-map-container #graph .Create > polygon, | |||
| .cl-data-map-manage #data-maps .cl-data-map .data-map-container #graph .Operator > ellipse { | |||
| stroke: #4ea6e6; | |||
| fill: #b8e0ff; | |||
| @@ -701,15 +702,15 @@ export default { | |||
| fill: #8df1f2; | |||
| stroke: #00a5a7; | |||
| } | |||
| .cl-data-map-manage #data-maps .cl-data-map .data-map-container #graph .RepeatDataset > polygon { | |||
| .cl-data-map-manage #data-maps .cl-data-map .data-map-container #graph .Repeat > polygon { | |||
| stroke: #fdca5a; | |||
| fill: #fff2d4; | |||
| } | |||
| .cl-data-map-manage #data-maps .cl-data-map .data-map-container #graph .ShuffleDataset > polygon { | |||
| .cl-data-map-manage #data-maps .cl-data-map .data-map-container #graph .Shuffle > polygon { | |||
| stroke: #e37d29; | |||
| fill: #ffd0a6; | |||
| } | |||
| .cl-data-map-manage #data-maps .cl-data-map .data-map-container #graph .BatchDataset > polygon { | |||
| .cl-data-map-manage #data-maps .cl-data-map .data-map-container #graph .Batch > polygon { | |||
| stroke: #de504e; | |||
| fill: #ffbcba; | |||
| } | |||
| @@ -2011,19 +2011,24 @@ export default { | |||
| 'ShuffleDataset', | |||
| 'RepeatDataset', | |||
| 'MapDataset', | |||
| 'Batch', | |||
| 'Shuffle', | |||
| 'Repeat', | |||
| 'Map', | |||
| ]; | |||
| const subGraphNodeType = ['Map', 'MapDataset']; | |||
| let nodeStr = ''; | |||
| let edgeStr = ''; | |||
| Object.keys(this.allDatasetGraphData).forEach((key) => { | |||
| const node = this.allDatasetGraphData[key]; | |||
| if (node.op_type === 'MapDataset') { | |||
| if (subGraphNodeType.includes(node.op_type)) { | |||
| nodeStr += this.packageSubGraph(key); | |||
| } else { | |||
| node.id = key; | |||
| nodeStr += | |||
| `<${node.key}>[id="${node.key}";label="${node.op_type}";` + | |||
| `class=${ | |||
| nodeType.includes(node.op_type) ? node.op_type : 'CreatDataset' | |||
| nodeType.includes(node.op_type) ? node.op_type.replace('Dataset', '') : 'Create' | |||
| };shape=rect;fillcolor="#9cc3e5";];`; | |||
| } | |||
| }); | |||
| @@ -2033,12 +2038,8 @@ export default { | |||
| node.children.forEach((k) => { | |||
| const child = this.allDatasetGraphData[k]; | |||
| edgeStr += `<${child.id}>-><${node.id}>[${ | |||
| child.op_type === 'MapDataset' | |||
| ? `ltail=<cluster_${child.key}>;` | |||
| : '' | |||
| }${ | |||
| node.op_type === 'MapDataset' ? `lhead=<cluster_${node.key}>;` : '' | |||
| }];`; | |||
| subGraphNodeType.includes(child.op_type) ? `ltail=<cluster_${child.key}>;` : '' | |||
| }${subGraphNodeType.includes(node.op_type) ? `lhead=<cluster_${node.key}>;` : ''}];`; | |||
| }); | |||
| }); | |||
| const initSetting = | |||
| @@ -2345,7 +2346,7 @@ export default { | |||
| white-space: nowrap; | |||
| overflow: hidden; | |||
| } | |||
| .cl-dashboard #dataMapGraph .CreatDataset > polygon, | |||
| .cl-dashboard #dataMapGraph .Create > polygon, | |||
| .cl-dashboard #dataMapGraph .Operator > ellipse { | |||
| stroke: #4ea6e6; | |||
| fill: #b8e0ff; | |||
| @@ -2354,15 +2355,15 @@ export default { | |||
| fill: #8df1f2; | |||
| stroke: #00a5a7; | |||
| } | |||
| .cl-dashboard #dataMapGraph .RepeatDataset > polygon { | |||
| .cl-dashboard #dataMapGraph .Repeat > polygon { | |||
| stroke: #fdca5a; | |||
| fill: #fff2d4; | |||
| } | |||
| .cl-dashboard #dataMapGraph .ShuffleDataset > polygon { | |||
| .cl-dashboard #dataMapGraph .Shuffle > polygon { | |||
| stroke: #e37d29; | |||
| fill: #ffd0a6; | |||
| } | |||
| .cl-dashboard #dataMapGraph .BatchDataset > polygon { | |||
| .cl-dashboard #dataMapGraph .Batch > polygon { | |||
| stroke: #de504e; | |||
| fill: #ffbcba; | |||
| } | |||
| @@ -13,267 +13,146 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Compute resource manager.""" | |||
| import fractions | |||
| import math | |||
| import functools | |||
| import multiprocessing | |||
| import threading | |||
| from concurrent import futures | |||
| import multiprocessing | |||
| from mindinsight.utils.log import setup_logger | |||
| from mindinsight.utils.constant import GeneralErrors | |||
| from mindinsight.utils.exceptions import MindInsightException | |||
| import _thread | |||
| from mindinsight.utils.log import setup_logger | |||
| _MP_CONTEXT = multiprocessing.get_context(method="forkserver") | |||
| terminating = False | |||
| class ComputingResourceManager: | |||
| """ | |||
| Manager for computing resources. | |||
| This class provides executors for computing tasks. Executors can only be used once. | |||
| Args: | |||
| executors_cnt (int): Number of executors to be provided by this class. | |||
| max_processes_cnt (int): Max number of processes to be used for computing. | |||
| Note: | |||
| 1. Please always use the get_instance method to get instance. | |||
| 2. This class will be used in a multi-threaded env, so it needs to be | |||
| thread-safe. | |||
| """ | |||
| def __init__(self, executors_cnt=1, max_processes_cnt=4): | |||
| self._max_processes_cnt = max_processes_cnt | |||
| self._executors_cnt = executors_cnt | |||
| _cls_lock = threading.Lock() | |||
| _instance = None | |||
| _exiting = False | |||
| def __init__(self): | |||
| self._executors = {} | |||
| self._executor_id_counter = 1 | |||
| self._lock = threading.Lock() | |||
| self._executors = { | |||
| ind: Executor( | |||
| self, executor_id=ind, | |||
| available_workers=fractions.Fraction(self._max_processes_cnt, self._executors_cnt)) | |||
| for ind in range(self._executors_cnt) | |||
| } | |||
| self._remaining_executors = len(self._executors) | |||
| self._backend = futures.ProcessPoolExecutor(max_workers=max_processes_cnt, mp_context=_MP_CONTEXT) | |||
| self.logger = setup_logger("utils", "utils") | |||
| self.logger.info("Initialized ComputingResourceManager with executors_cnt=%s, max_processes_cnt=%s.", | |||
| executors_cnt, max_processes_cnt) | |||
| self._exiting = False | |||
| self._logger = setup_logger("utils", "utils") | |||
| def __enter__(self): | |||
| """This method is not thread safe.""" | |||
| return self | |||
| @classmethod | |||
| def get_instance(cls): | |||
| """Get the singleton instance.""" | |||
| with cls._cls_lock: | |||
| if cls._instance is None: | |||
| cls._instance = ComputingResourceManager() | |||
| return cls._instance | |||
| def __exit__(self, exc_type, exc_val, exc_tb): | |||
| def exit(self): | |||
| """ | |||
| This should not block because every executor have waited. If it blocks, there may be some problem. | |||
| Called when the gunicorn worker process is exiting. | |||
| This method is not thread safe. | |||
| This method will be called in the signal handling thread, which is not | |||
| the same thread with get_executor. Also, this method will hold the lock | |||
| to block other threads from operating the singleton or executors. | |||
| """ | |||
| self._backend.shutdown() | |||
| with self._lock: | |||
| self._logger.info("Start to exit.") | |||
| self._exiting = True | |||
| for executor in self._executors.values(): | |||
| # It's safe to call executor.shutdown() multiple times. | |||
| executor.shutdown(wait=True) | |||
| self._logger.info("Exited.") | |||
| def get_executor(self): | |||
| def get_executor(self, max_processes_cnt=1): | |||
| """ | |||
| Get an executor. | |||
| Returns: | |||
| Executor, which can be used for submitting tasks. | |||
| Raises: | |||
| ComputeResourceManagerException: when no more executor is available. | |||
| This method may be called by different business from different threads. | |||
| So it needs to be tread-safe. | |||
| """ | |||
| with self._lock: | |||
| self._remaining_executors -= 1 | |||
| if self._remaining_executors < 0: | |||
| raise ComputingResourceManagerException("No more executors.") | |||
| return self._executors[self._remaining_executors] | |||
| def destroy_executor(self, executor_id): | |||
| """ | |||
| Destroy an executor to reuse it's workers. | |||
| Args: | |||
| executor_id (int): Id of the executor to be destroyed. | |||
| """ | |||
| if self._exiting: | |||
| self._logger.info( | |||
| "System is exiting, will terminate the thread.") | |||
| _thread.exit() | |||
| executor = Executor( | |||
| max_processes_cnt=max_processes_cnt, | |||
| exit_callback=functools.partial( | |||
| self._remove_executor, | |||
| executor_id=self._executor_id_counter), | |||
| exit_check_fn=self._check_exit | |||
| ) | |||
| self._executors[self._executor_id_counter] = executor | |||
| self._executor_id_counter += 1 | |||
| return executor | |||
| def _remove_executor(self, executor_id): | |||
| with self._lock: | |||
| released_workers = self._executors[executor_id].available_workers | |||
| self._executors.pop(executor_id) | |||
| remaining_executors = len(self._executors) | |||
| self.logger.info("Destroy executor %s. Will release %s worker(s). Remaining executors: %s.", | |||
| executor_id, released_workers, remaining_executors) | |||
| if not remaining_executors: | |||
| return | |||
| for executor in self._executors.values(): | |||
| executor.add_worker( | |||
| fractions.Fraction( | |||
| released_workers.numerator, | |||
| released_workers.denominator * remaining_executors)) | |||
| def submit(self, *args, **kwargs): | |||
| """ | |||
| Submit a task. | |||
| See concurrent.futures.Executor.submit() for details. | |||
| This method should only be called by Executor. Users should not call this method directly. | |||
| """ | |||
| def _check_exit(self): | |||
| with self._lock: | |||
| if not terminating: | |||
| return self._backend.submit(*args, **kwargs) | |||
| self.logger.info('Got submit after process pool shutdown.') | |||
| return None | |||
| class ComputingResourceManagerException(MindInsightException): | |||
| """ | |||
| Indicates a computing resource error has occurred. | |||
| This exception should not be presented to end users. | |||
| Args: | |||
| msg (str): Exception message. | |||
| """ | |||
| def __init__(self, msg): | |||
| super().__init__(error=GeneralErrors.COMPUTING_RESOURCE_ERROR, message=msg) | |||
| class WrappedFuture: | |||
| """ | |||
| Wrap Future objects with custom logics to release compute slots. | |||
| Args: | |||
| executor (Executor): The executor which generates this future. | |||
| original_future (futures.Future): Original future object. | |||
| """ | |||
| def __init__(self, executor, original_future: futures.Future): | |||
| self._original_future = original_future | |||
| self._executor = executor | |||
| self.logger = setup_logger("utils", "utils") | |||
| def add_done_callback(self, callback): | |||
| """ | |||
| Add done callback. | |||
| See futures.Future.add_done_callback() for details. | |||
| """ | |||
| def _wrapped_callback(*args, **kwargs): | |||
| self.logger.debug("Future callback called.") | |||
| try: | |||
| return callback(*args, **kwargs) | |||
| finally: | |||
| self._executor.release_slot() | |||
| self._executor.remove_done_future(self._original_future) | |||
| self._original_future.add_done_callback(_wrapped_callback) | |||
| return self._exiting | |||
| class Executor: | |||
| """ | |||
| Task executor. | |||
| Wrapped ProcessPoolExecutor to help global management. | |||
| Args: | |||
| mgr (ComputingResourceManager): The ComputingResourceManager that generates this executor. | |||
| executor_id (int): Executor id. | |||
| available_workers (fractions.Fraction): Available workers. | |||
| max_processes_cnt (int): Max processes to use. | |||
| exit_callback (Callable): A callback that will be called after process | |||
| pool exit. | |||
| exit_check_fn (Callable): A function to check whether the system is | |||
| exiting. | |||
| """ | |||
| def __init__(self, mgr: ComputingResourceManager, executor_id, available_workers): | |||
| self._mgr = mgr | |||
| self.closed = False | |||
| self._available_workers = available_workers | |||
| self._effective_workers = self._calc_effective_workers(self._available_workers) | |||
| self._slots = threading.Semaphore(value=self._effective_workers) | |||
| self._id = executor_id | |||
| self._futures = set() | |||
| self._lock = threading.Lock() | |||
| self.logger = setup_logger("utils", "utils") | |||
| self.logger.debug("Available workers: %s.", available_workers) | |||
| def __init__(self, max_processes_cnt, exit_callback, exit_check_fn): | |||
| self._backend = futures.ProcessPoolExecutor( | |||
| max_workers=max_processes_cnt, | |||
| mp_context=_MP_CONTEXT) | |||
| self._exit_callback = exit_callback | |||
| self._task_slots = threading.Semaphore(value=max_processes_cnt) | |||
| self._exit_check_fn = exit_check_fn | |||
| self._logger = setup_logger("utils", "utils") | |||
| def __enter__(self): | |||
| """This method is not thread safe.""" | |||
| if self.closed: | |||
| raise ComputingResourceManagerException("Can not reopen closed executor.") | |||
| self._backend.__enter__() | |||
| return self | |||
| def __exit__(self, exc_type, exc_val, exc_tb): | |||
| """This method is not thread safe.""" | |||
| self._close() | |||
| def __exit__(self, *args, **kwargs): | |||
| ret = self._backend.__exit__(*args, **kwargs) | |||
| self._exit_callback() | |||
| return ret | |||
| def submit(self, *args, **kwargs): | |||
| """ | |||
| Submit task. | |||
| See concurrent.futures.Executor.submit() for details. This method is not thread safe. | |||
| """ | |||
| self.logger.debug("Task submitted to executor %s.", self._id) | |||
| if self.closed: | |||
| raise ComputingResourceManagerException("Cannot submit task to a closed executor.") | |||
| # Thread will wait on acquire(). | |||
| self._slots.acquire() | |||
| future = self._mgr.submit(*args, **kwargs) | |||
| if future is None: | |||
| return None | |||
| # set.add is atomic in c-python. | |||
| self._futures.add(future) | |||
| return WrappedFuture(self, future) | |||
| if self._exit_check_fn(): | |||
| self._logger.warning( | |||
| "System exiting, will terminate current thread.") | |||
| _thread.exit() | |||
| self._task_slots.acquire() | |||
| future = self._backend.submit(*args, **kwargs) | |||
| # The future object is not needed for releasing semaphores. | |||
| future.add_done_callback(lambda future_obj: self._task_slots.release()) | |||
| return future | |||
| def release_slot(self): | |||
| """ | |||
| Release a slot for new tasks to be submitted. | |||
| Semaphore is itself thread safe, so no lock is needed. | |||
| This method should only be called by ExecutorFuture. | |||
| """ | |||
| self._slots.release() | |||
| def remove_done_future(self, future): | |||
| """ | |||
| Remove done futures so the executor will not track them. | |||
| This method should only be called by WrappedFuture. | |||
| """ | |||
| # set.remove is atomic in c-python so no lock is needed. | |||
| self._futures.remove(future) | |||
| @staticmethod | |||
| def _calc_effective_workers(available_workers): | |||
| return 1 if available_workers <= 1 else math.floor(available_workers) | |||
| submit.__doc__ = futures.Executor.submit.__doc__ | |||
| def _close(self): | |||
| self.closed = True | |||
| self.logger.debug("Executor is being closed, futures to wait: %s", self._futures) | |||
| futures.wait(self._futures) | |||
| self.logger.debug("Executor wait futures completed.") | |||
| self._mgr.destroy_executor(self._id) | |||
| self.logger.debug("Executor is closed.") | |||
| def shutdown(self, wait): | |||
| self._backend.shutdown(wait) | |||
| @property | |||
| def available_workers(self): | |||
| """Get available workers.""" | |||
| with self._lock: | |||
| return self._available_workers | |||
| def add_worker(self, added_available_workers): | |||
| """This method should only be called by ComputeResourceManager.""" | |||
| self.logger.debug("Add worker: %s", added_available_workers) | |||
| with self._lock: | |||
| self._available_workers += added_available_workers | |||
| new_effective_workers = self._calc_effective_workers(self._available_workers) | |||
| if new_effective_workers > self._effective_workers: | |||
| for _ in range(new_effective_workers - self._effective_workers): | |||
| self._slots.release() | |||
| self._effective_workers = new_effective_workers | |||
| def wait_all_tasks_finish(self): | |||
| """ | |||
| Wait all tasks finish. | |||
| This method is not thread safe. | |||
| """ | |||
| futures.wait(self._futures) | |||
| shutdown.__doc__ = futures.Executor.shutdown.__doc__ | |||
| def terminate(): | |||
| """Set the terminating flag.""" | |||
| global terminating | |||
| terminating = True | |||
| ComputingResourceManager.get_instance().exit() | |||
| @@ -29,6 +29,12 @@ from setuptools.command.build_py import build_py | |||
| from setuptools.command.install import install | |||
| def get_readme_content(): | |||
| pwd = os.path.dirname(os.path.realpath(__file__)) | |||
| with open(os.path.join(pwd, 'README.md'), encoding='UTF-8') as f: | |||
| return f.read() | |||
| def get_version(): | |||
| """ | |||
| Get version. | |||
| @@ -196,6 +202,8 @@ if __name__ == '__main__': | |||
| 'Issue Tracker': 'https://gitee.com/mindspore/mindinsight/issues', | |||
| }, | |||
| description=get_description(), | |||
| long_description=get_readme_content(), | |||
| long_description_content_type="text/markdown", | |||
| packages=['mindinsight'], | |||
| platforms=[get_platform()], | |||
| include_package_data=True, | |||