Update TensorLayer3.0

4 years ago · 0e8a2ae701
--- a/.codacy.yaml
+++ b/.codacy.yaml
@@ -2,7 +2,7 @@
 ---
 engines:
  bandit:
    enabled: false # FIXME: make it work
    enabled: false # FIXME: make it works
 exclude_paths:
 - scripts/*
 - setup.py
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -67,7 +67,6 @@ To release a new version, please update the changelog as followed:

 <!-- YOU CAN EDIT FROM HERE -->


 ## [Unreleased]

 ### Added
@@ -80,8 +79,68 @@ To release a new version, please update the changelog as followed:

 ### Fixed

 - Fix README. (#PR 1044)
 - Fix package info. (#PR 1046)
 ### Removed

 ### Security

 ### Contributors

 ## [2.2.3] - 2020-06-18

 TensorLayer 2.2.3 is a maintenance release.
 It contains numerous bug fixes.

 ### Added

 ### Changed

 ### Dependencies Update

 ### Deprecated

 ### Fixed

 - Fix VGG. (#1078, 1079, 1089)
 - Fix norm layer. (#1080)
 - Fix DeCov2d layer. (#1081)
 - Fix ModelLayer and LayerList doc. (#1083)
 - Fix bug in SAC. (#1085)
 - Fix refactoring: Deduplication. (#1086)
 - Fix maxpool, batchnorm Data format fixed, vgg forward. (#1089)
 - Fix package info. (#1090)

 ### Removed

 ### Security

 ### Contributors
 - @zsdonghao
 - @tiancheng2000 (#1078 #1079 #1080 #1081)
 - @ChrisWu1997 (#1083)
 - @quantumiracle (#1085)
 - @marload (#1086)
 - @Gyx-One (#1089)
 - @Laicheng0830 (#1090)

 ## [2.2.2] - 2020-04-26

 TensorLayer 2.2.2 is a maintenance release.

 ### Added

 - Reinforcement learning(#1065)
 - Mish activation(#1068)

 ### Changed

 ### Dependencies Update

 ### Deprecated

 ### Fixed

 - Fix README.
 - Fix package info.

 ### Removed

@@ -89,8 +148,36 @@ To release a new version, please update the changelog as followed:

 ### Contributors

 - @luomai (PR #1044, 1046)
 - @zsdonghao
 - @quantumiracle(1065)
 - @Laicheng0830(#1068)

 ## [2.2.1] - 2020-01-14

 TensorLayer 2.2.1 is a maintenance release.
 It contains numerous bug fixes.

 ### Added

 ### Changed

 ### Dependencies Update

 ### Deprecated

 ### Fixed

 - Fix README. (#1044)
 - Fix package info. (#1046)
 - Fix build test (Using YAPF 0.29) (#1057)

 ### Removed

 ### Security

 ### Contributors

 - @luomai (#1044, #1046, #1057)

 ## [2.2.0] - 2019-09-13

@@ -150,7 +237,7 @@ This release is compatible with TensorFlow 2 RC1.
 - Replace tf.nn.func with tf.nn.func.\_\_name\_\_ in model config. (PR #994)
 - Add Reinforcement learning tutorials. (PR #995)
 - Add RNN layers with simple rnn cell, GRU cell, LSTM cell. (PR #998)
 - Update Seq2seq (#998) 
 - Update Seq2seq (#998)
 - Add Seq2seqLuongAttention model (#998)

 ### Fixed
@@ -571,12 +658,15 @@ To many PR for this update, please check [here](https://github.com/tensorlayer/t
@zsdonghao @luomai @DEKHTIARJonathan

 [Unreleased]: https://github.com/tensorlayer/tensorlayer/compare/2.0....master
 [2.2.0]: https://github.com/tensorlayer/tensorlayer/compare/2.2.0...2.2.0
 [2.1.0]: https://github.com/tensorlayer/tensorlayer/compare/2.1.0...2.1.0
 [2.0.2]: https://github.com/tensorlayer/tensorlayer/compare/2.0.2...2.0.2
 [2.0.1]: https://github.com/tensorlayer/tensorlayer/compare/2.0.1...2.0.1
 [2.0.0]: https://github.com/tensorlayer/tensorlayer/compare/2.0.0...2.0.0
 [1.11.1]: https://github.com/tensorlayer/tensorlayer/compare/1.11.0...1.11.0
 [2.2.3]: https://github.com/tensorlayer/tensorlayer/compare/2.2.2...2.2.3
 [2.2.2]: https://github.com/tensorlayer/tensorlayer/compare/2.2.1...2.2.2
 [2.2.1]: https://github.com/tensorlayer/tensorlayer/compare/2.2.0...2.2.1
 [2.2.0]: https://github.com/tensorlayer/tensorlayer/compare/2.1.0...2.2.0
 [2.1.0]: https://github.com/tensorlayer/tensorlayer/compare/2.0.2...2.1.0
 [2.0.2]: https://github.com/tensorlayer/tensorlayer/compare/2.0.1...2.0.2
 [2.0.1]: https://github.com/tensorlayer/tensorlayer/compare/2.0.0...2.0.1
 [2.0.0]: https://github.com/tensorlayer/tensorlayer/compare/1.11.1...2.0.0
 [1.11.1]: https://github.com/tensorlayer/tensorlayer/compare/1.11.0...1.11.1
 [1.11.0]: https://github.com/tensorlayer/tensorlayer/compare/1.10.1...1.11.0
 [1.10.1]: https://github.com/tensorlayer/tensorlayer/compare/1.10.0...1.10.1
 [1.10.0]: https://github.com/tensorlayer/tensorlayer/compare/1.9.1...1.10.0
--- a/LICENSE.rst
+++ b/LICENSE.rst
@@ -1,7 +1,7 @@
 License
 =======

 Copyright (c) 2016~2018 The TensorLayer contributors.  All rights reserved.
 Copyright (c) 2016~2020 The TensorLayer contributors.  All rights reserved.

                                 Apache License
                           Version 2.0, January 2004
@@ -208,4 +208,4 @@ Copyright (c) 2016~2018 The TensorLayer contributors.  All rights reserved.

 Contact
 =======
 Questions? Please contact hao.dong11@imperial.ac.uk
 Questions? Please contact hao.dong@pku.edu.cn
--- a/+ 7
+++ b/+ 7
@@ -14,16 +14,17 @@ test:
 	python3 tests/files/test_utils_saveload.py

 format:
 	autoflake -i examples/*.py
 	autoflake -i tensorlayer/*.py
 	autoflake -i tensorlayer/**/*.py
 	autoflake -ir examples
 	autoflake -ir tensorlayer
 	autoflake -ir tests

 	isort -rc examples
 	isort -rc tensorlayer
 	isort -rc tests

 	yapf -i examples/*.py
 	yapf -i tensorlayer/*.py
 	yapf -i tensorlayer/**/*.py
 	yapf -ir examples
 	yapf -ir tensorlayer
 	yapf -ir tests

 install3:
 	pip3 install -U . --user
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
 <a href="https://tensorlayer.readthedocs.io/">
 <a href="https://tensorlayer3.readthedocs.io/">
    <div align="center">
        <img src="img/tl_transparent_logo.png" width="50%" height="30%"/>
    </div>
@@ -7,26 +7,28 @@
 <!--- [![PyPI Version](https://badge.fury.io/py/tensorlayer.svg)](https://badge.fury.io/py/tensorlayer) --->
 <!--- ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/tensorlayer.svg)) --->

 ![GitHub last commit (branch)](https://img.shields.io/github/last-commit/tensorlayer/tensorlayer/master.svg)
 [![GitHub last commit (branch)](https://img.shields.io/github/last-commit/tensorlayer/tensorlayer/master.svg)](https://git.openi.org.cn/TensorLayer/tensorlayer3.0)
 [![Supported TF Version](https://img.shields.io/badge/TensorFlow-2.0.0%2B-brightgreen.svg)](https://github.com/tensorflow/tensorflow/releases)
 [![Documentation Status](https://readthedocs.org/projects/tensorlayer/badge/)](https://tensorlayer.readthedocs.io/)
 [![Build Status](https://travis-ci.org/tensorlayer/tensorlayer.svg?branch=master)](https://travis-ci.org/tensorlayer/tensorlayer)
 [![Downloads](http://pepy.tech/badge/tensorlayer)](http://pepy.tech/project/tensorlayer)
 [![Downloads](https://pepy.tech/badge/tensorlayer/week)](https://pepy.tech/project/tensorlayer/week)
 [![Docker Pulls](https://img.shields.io/docker/pulls/tensorlayer/tensorlayer.svg)](https://hub.docker.com/r/tensorlayer/tensorlayer/)
 [![Codacy Badge](https://api.codacy.com/project/badge/Grade/d6b118784e25435498e7310745adb848)](https://www.codacy.com/app/tensorlayer/tensorlayer)
 [![Documentation Status](https://readthedocs.org/projects/tensorlayer/badge/)](https://tensorlayer3.readthedocs.io)
 ![Build Status](https://travis-ci.org/tensorlayer/tensorlayer.svg?branch=master)
 ![Downloads](http://pepy.tech/badge/tensorlayer)
 ![Downloads](https://pepy.tech/badge/tensorlayer/week)
 ![Docker Pulls](https://img.shields.io/docker/pulls/tensorlayer/tensorlayer.svg)
 ![Codacy Badge](https://api.codacy.com/project/badge/Grade/d6b118784e25435498e7310745adb848)

 [中文简介](https://git.openi.org.cn/TensorLayer/tensorlayer3.0/src/branch/master/tensorlayer_cn.md)

 <!---  [![CircleCI](https://circleci.com/gh/tensorlayer/tensorlayer/tree/master.svg?style=svg)](https://circleci.com/gh/tensorlayer/tensorlayer/tree/master) --->

 <!---  [![Documentation Status](https://readthedocs.org/projects/tensorlayercn/badge/)](https://tensorlayercn.readthedocs.io/)
 <!---  [![PyUP Updates](https://pyup.io/repos/github/tensorlayer/tensorlayer/shield.svg)](https://pyup.io/repos/github/tensorlayer/tensorlayer/) --->

 [TensorLayer](https://tensorlayer.readthedocs.io) is a novel TensorFlow-based deep learning and reinforcement learning library designed for researchers and engineers. It provides an extensive collection of customizable neural layers to build advanced AI models quickly, based on this, the community open-sourced mass [tutorials](https://github.com/tensorlayer/tensorlayer/blob/master/examples/reinforcement_learning/README.md) and [applications](https://github.com/tensorlayer). TensorLayer is awarded the 2017 Best Open Source Software by the [ACM Multimedia Society](https://twitter.com/ImperialDSI/status/923928895325442049). 
 [TensorLayer](https://tensorlayer3.readthedocs.io) is a novel supports multiple backends deep learning and reinforcement learning library designed for researchers and engineers. It provides an extensive collection of customizable neural layers to build advanced AI models quickly, based on this, the community open-sourced mass [tutorials](https://git.openi.org.cn/TensorLayer/tensorlayer3.0/src/branch/master/examples/basic_tutorials) and [applications](https://git.openi.org.cn/TensorLayer/tensorlayer3.0/src/branch/master/examples/model_zoo). TensorLayer is awarded the 2017 Best Open Source Software by the [ACM Multimedia Society](https://twitter.com/ImperialDSI/status/923928895325442049). 
 This project can also be found at [iHub](https://code.ihub.org.cn/projects/328) and [Gitee](https://gitee.com/organizations/TensorLayer).

 # News

 🔥 **3.0.0 will supports multiple backends, such as TensorFlow, MindSpore, PaddlePaddle and more, allowing users to run the code on different hardware like Nvidia-GPU and Huawei-Ascend. We need more people to join the dev team, if you are interested, please email hao.dong@pku.edu.cn**
 🔥 **3.0.0 has been pre-released, it supports TensorFlow and MindSpore backends, and supports some PaddlePaddle operator backends, allowing users to run the code on different hardware like Nvidia-GPU and Huawei-Ascend. It will support TensorFlow, MindSpore, PaddlePaddle, and PyTorch backends in the future. Feel free to use it and make suggestions. We need more people to join the dev team, if you are interested, please email hao.dong@pku.edu.cn**

 🔥 Reinforcement Learning Zoo: [Low-level APIs](https://github.com/tensorlayer/tensorlayer/tree/master/examples/reinforcement_learning) for professional usage, [High-level APIs](https://github.com/tensorlayer/RLzoo) for simple usage, and a corresponding [Springer textbook](http://springer.com/gp/book/9789811540943)

@@ -38,7 +40,7 @@ This project can also be found at [iHub](https://code.ihub.org.cn/projects/328)

 TensorLayer is a new deep learning library designed with simplicity, flexibility and high-performance in mind.

 - ***Simplicity*** : TensorLayer has a high-level layer/model abstraction which is effortless to learn. You can learn how deep learning can benefit your AI tasks in minutes through the massive [examples](https://github.com/tensorlayer/awesome-tensorlayer).
 - ***Simplicity*** : TensorLayer has a high-level layer/model abstraction which is effortless to learn. You can learn how deep learning can benefit your AI tasks in minutes through the massive [examples](https://git.openi.org.cn/TensorLayer/tensorlayer3.0/src/branch/master/examples).
 - ***Flexibility*** : TensorLayer APIs are transparent and flexible, inspired by the emerging PyTorch library. Compared to the Keras abstraction, TensorLayer makes it much easier to build and train complex AI models.
 - ***Zero-cost Abstraction*** : Though simple to use, TensorLayer does not require you to make any compromise in the performance of TensorFlow (Check the following benchmark section for more details).

@@ -53,12 +55,12 @@ Imperial College London, UC Berkeley, Carnegie Mellon University, Stanford Unive
 TensorLayer has extensive documentation for both beginners and professionals. The documentation is available in
 both English and Chinese.

 [![English Documentation](https://img.shields.io/badge/documentation-english-blue.svg)](https://tensorlayer.readthedocs.io/)
 [![English Documentation](https://img.shields.io/badge/documentation-english-blue.svg)](https://tensorlayer3.readthedocs.io/)
 [![Chinese Documentation](https://img.shields.io/badge/documentation-%E4%B8%AD%E6%96%87-blue.svg)](https://tensorlayercn.readthedocs.io/)
 [![Chinese Book](https://img.shields.io/badge/book-%E4%B8%AD%E6%96%87-blue.svg)](http://www.broadview.com.cn/book/5059/)

 If you want to try the experimental features on the the master branch, you can find the latest document
 [here](https://tensorlayer.readthedocs.io/en/latest/).
 [here](https://tensorlayer3.readthedocs.io/en/latest/).

 # Extensive Examples

@@ -71,8 +73,15 @@ You can find a large collection of examples that use TensorLayer in [here](examp
 </a>

 # Getting Start
 Comparison of TensorLayer version

 <a href="https://tensorlayer3.readthedocs.io/">
    <div align="center">
        <img src="img/tensorlayer_v.png" width="100%" height="30%"/>
    </div>
 </a>

 TensorLayer 2.0 relies on TensorFlow, numpy, and others. To use GPUs, CUDA and cuDNN are required.
 TensorLayer 3.0 relies on TensorFlow, numpy, and others. To use GPUs, CUDA and cuDNN are required.

 Install TensorFlow:

@@ -81,16 +90,21 @@ pip3 install tensorflow-gpu==2.0.0-rc1 # TensorFlow GPU (version 2.0 RC1)
 pip3 install tensorflow # CPU version
 ```

 Install the stable release of TensorLayer:
 Install the stable release of TensorLayer3:

 ```bash
 pip3 install tensorlayer
 pip3 install tensorlayer3
 ```

 Install the unstable development version of TensorLayer:
 Install the stable release of TensorLayer2.x:

 ```bast
 pip3 install tensorlayer
 ```
 Install the unstable development version of TensorLayer3:

 ```bash
 pip3 install git+https://github.com/tensorlayer/tensorlayer.git
 pip3 install git+https://git.openi.org.cn/TensorLayer/tensorlayer3.0.git
 ```

 If you want to install the additional dependencies, you can also run
@@ -99,6 +113,15 @@ pip3 install --upgrade tensorlayer[all]              # all additional dependenci
 pip3 install --upgrade tensorlayer[extra]            # only the `extra` dependencies
 pip3 install --upgrade tensorlayer[contrib_loggers]  # only the `contrib_loggers` dependencies
 ```
 If you want to use mindspore backend, you should install mindspore>=1.2.1
 ```bash
 pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/1.2.1/MindSpore/gpu/ubuntu_x86/cuda-10.1/mindspore_gpu-1.2.1-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple
 ```

 If you want to use paddlepaddle backend, you should install paddlepaddle>=2.1.1
 ```bash
 python -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
 ```

 If you are TensorFlow 1.X users, you can use TensorLayer 1.11.0:

@@ -150,6 +173,7 @@ The following table shows the training speeds of [VGG16](http://www.robots.ox.ac
 |   Graph   |      Keras      | channel last  | 8677 |      2580         |        2576         |      101       |
 |   Eager   | TensorFlow 2.0  | channel last  | 8723 |      2052         |        2024         |      97       |
 |           | TensorLayer 2.0 | channel last  | 8723 |      2010         |        2007         |      95       |
 |           | TensorLayer 3.0 | channel last  |      |                   |                     |               |                                             

 # Getting Involved

@@ -169,7 +193,7 @@ We suggest users to report bugs using Github issues. Users can also discuss how

 # Citing TensorLayer

 If you find TensorLayer useful for your project, please cite the following paper：
 If you find TensorLayer useful for your project, please cite the following papers：

 ```
@article{tensorlayer2017,
@@ -179,4 +203,13 @@ If you find TensorLayer useful for your project, please cite the following paper
    url     = {http://tensorlayer.org},
    year    = {2017}
 }

@inproceedings{tensorlayer2021,
  title={Tensorlayer 3.0: A Deep Learning Library Compatible With Multiple Backends},
  author={Lai, Cheng and Han, Jiarong and Dong, Hao},
  booktitle={2021 IEEE International Conference on Multimedia \& Expo Workshops (ICMEW)},
  pages={1--3},
  year={2021},
  organization={IEEE}
 }
 ```
--- a/README.rst
+++ b/README.rst
@@ -10,12 +10,15 @@

 |JOIN-SLACK-LOGO|

 TensorLayer is a novel TensorFlow-based deep learning and reinforcement
 learning library designed for researchers and engineers. It provides a
 large collection of customizable neural layers / functions that are key
 to build real-world AI applications. TensorLayer is awarded the 2017
 Best Open Source Software by the `ACM Multimedia
 Society <http://www.acmmm.org/2017/mm-2017-awardees/>`__.
 `TensorLayer3 <https://tensorlayer3.readthedocs.io>`__ is a novel supports
 multiple backends deep learning and reinforcement learning library designed
 for researchers and engineers.
 It provides an extensive collection of customizable neural layers to
 build advanced AI models quickly, based on this, the community open-sourced
 mass `tutorials <https://git.openi.org.cn/TensorLayer/tensorlayer3.0/src/branch/master/examples/basic_tutorials>`__ and
 `applications <https://git.openi.org.cn/TensorLayer/tensorlayer3.0/src/branch/master/examples/model_zoo>`__.
 TensorLayer is awarded the 2017 Best Open Source Software by the `ACM Multimedia Society <https://twitter.com/ImperialDSI/status/923928895325442049>`__.
 This project can also be found at `OpenI <https://git.openi.org.cn/TensorLayer/tensorlayer3.0>`__ and `Gitee <https://gitee.com/organizations/TensorLayer>`__.

 Why another deep learning library: TensorLayer
 ==============================================
@@ -73,15 +76,15 @@ The simplest way to install TensorLayer is to use the Python Package Index (PyPI
 .. code:: bash

    # for last stable version
    pip install --upgrade tensorlayer
    pip install --upgrade tensorlayer3

    # for latest release candidate
    pip install --upgrade --pre tensorlayer
    pip install --upgrade --pre tensorlayer3

    # if you want to install the additional dependencies, you can also run
    pip install --upgrade tensorlayer[all]              # all additional dependencies
    pip install --upgrade tensorlayer[extra]            # only the `extra` dependencies
    pip install --upgrade tensorlayer[contrib_loggers]  # only the `contrib_loggers` dependencies
    pip install --upgrade tensorlayer3[all]              # all additional dependencies
    pip install --upgrade tensorlayer3[extra]            # only the `extra` dependencies
    pip install --upgrade tensorlayer3[contrib_loggers]  # only the `contrib_loggers` dependencies

 Alternatively, you can install the latest or development version by directly pulling from github:

@@ -139,7 +142,7 @@ Cite
 ====

 If you find this project useful, we would be grateful if you cite the
 TensorLayer paper：
 TensorLayer papers.

 ::

@@ -151,6 +154,17 @@ TensorLayer paper：
        year    = {2017}
    }

 ::

    @inproceedings{tensorlayer2021,
        title={Tensorlayer 3.0: A Deep Learning Library Compatible With Multiple Backends},
        author={Lai, Cheng and Han, Jiarong and Dong, Hao},
        booktitle={2021 IEEE International Conference on Multimedia \& Expo Workshops (ICMEW)},
        pages={1--3},
        year={2021},
        organization={IEEE}
    }

 License
 =======

--- a/docs/index.rst
+++ b/docs/index.rst
@@ -57,12 +57,14 @@ method, this part of the documentation is for you.
  modules/activation
  modules/array_ops
  modules/cost
  modules/dataflow
  modules/prepro
  modules/files
  modules/iterate
  modules/layers
  modules/models
  modules/nlp
  modules/vision
  modules/initializers
  modules/rein
  modules/utils
--- a/docs/modules/activation.rst
+++ b/docs/modules/activation.rst
@@ -2,9 +2,7 @@ API - Activations
 =========================

 To make TensorLayer simple, we minimize the number of activation functions as much as
 we can. So we encourage you to use TensorFlow's function. TensorFlow provides
 ``tf.nn.relu``, ``tf.nn.relu6``, ``tf.nn.elu``, ``tf.nn.softplus``,
 ``tf.nn.softsign`` and so on.
 we can. So we encourage you to use Customizes activation function.
 For parametric activation, please read the layer APIs.

 The shortcut of ``tensorlayer.activation`` is ``tensorlayer.act``.
@@ -14,64 +12,71 @@ Your activation

 Customizes activation function in TensorLayer is very easy.
 The following example implements an activation that multiplies its input by 2.
 For more complex activation, TensorFlow API will be required.
 For more complex activation, TensorFlow(MindSpore/PaddlePaddle) API will be required.

 .. code-block:: python

  def double_activation(x):
      return x * 2
      
  double_activation = lambda x: x * 2
  class DoubleActivation(object):
    def __init__(self):
        pass
    def __call__(self, x):
        return x * 2
  double_activation = DoubleActivation()

 .. automodule:: tensorlayer.activation
 .. automodule:: tensorlayer.layers.activation

 .. autosummary::

   leaky_relu
   leaky_relu6
   leaky_twice_relu6
   ramp
   swish
   sign
   hard_tanh
   pixel_wise_softmax
   mish

 Ramp
   PRelu
   PRelu6
   PTRelu6
   LeakyReLU
   LeakyReLU6
   LeakyTwiceRelu6
   Ramp
   Swish
   HardTanh
   Mish

 PRelu
 ------
 .. autofunction:: ramp
 .. autofunction:: PRelu

 Leaky ReLU
 PRelu6
 ------------
 .. autofunction:: leaky_relu
 .. autofunction:: PRelu6

 Leaky ReLU6
 PTRelu6
 ------------
 .. autofunction:: leaky_relu6
 .. autofunction:: PTRelu6

 Twice Leaky ReLU6
 LeakyReLU
 -----------------
 .. autofunction:: leaky_twice_relu6
 .. autofunction:: LeakyReLU

 Swish
 LeakyReLU6
 ------------
 .. autofunction:: swish
 .. autofunction:: LeakyReLU6

 Sign
 LeakyTwiceRelu6
 ---------------------
 .. autofunction:: sign
 .. autofunction:: LeakyTwiceRelu6

 Hard Tanh
 Ramp
 ---------------------
 .. autofunction:: hard_tanh
 .. autofunction:: Ramp

 Pixel-wise softmax
 Swish
 --------------------
 .. autofunction:: pixel_wise_softmax
 .. autofunction:: Swish

 HardTanh
 ----------------
 .. autofunction:: HardTanh

 mish
 Mish
 ---------
 .. autofunction:: mish
 .. autofunction:: Mish

 Parametric activation
 ------------------------------
--- a/docs/modules/app.rst
+++ b/docs/modules/app.rst
@@ -1,10 +0,0 @@
 API - Application Library
 =========================

 Application library is an open source Deep learning applications based on TensorLayer.

 Supported Application:
 -------------------------



--- a/docs/modules/cost.rst
+++ b/docs/modules/cost.rst
@@ -11,7 +11,7 @@ we can. So we encourage you to use TensorFlow's function, , see `TensorFlow API

 .. autosummary::

   cross_entropy
   softmax_cross_entropy_with_logits
   sigmoid_cross_entropy
   binary_cross_entropy
   mean_squared_error
@@ -28,12 +28,11 @@ we can. So we encourage you to use TensorFlow's function, , see `TensorFlow API
   maxnorm_regularizer
   maxnorm_o_regularizer
   maxnorm_i_regularizer
   huber_loss


 Softmax cross entropy
 ----------------------
 .. autofunction:: cross_entropy
 .. autofunction:: softmax_cross_entropy_with_logits

 Sigmoid cross entropy
 ----------------------
@@ -94,7 +93,3 @@ Special
 .. autofunction:: lo_regularizer
 .. autofunction:: maxnorm_o_regularizer
 .. autofunction:: maxnorm_i_regularizer

 Huber Loss
 ^^^^^^^^^^
 .. autofunction:: huber_loss
--- a/docs/modules/dataflow.rst
+++ b/docs/modules/dataflow.rst
@@ -0,0 +1,79 @@
 API - Dataflow
 ============

 .. automodule:: tensorlayer.dataflow

 .. -----------------------------------------------------------
 ..                        Dataflow List
 .. -----------------------------------------------------------

 Dataflow list
 ----------------------

 .. autosummary::

   Dataset
   IterableDataset
   FromGenerator
   FromSlices
   Dataloader

   Concat
   Zip
   Batch
   Map
   Repeat
   Shuffle

 .. -----------------------------------------------------------
 ..                        Dataflow
 .. -----------------------------------------------------------

 Dataflow
 -----------------

 Dataset
 ^^^^^^^^^^^^^^^^
 .. autoclass:: Dataset


 IterableDataset
 ^^^^^^^^^^^^^^^^
 .. autoclass:: IterableDataset

 FromGenerator
 ^^^^^^^^^^^^^^^^
 .. autoclass:: FromGenerator

 FromSlices
 ^^^^^^^^^^^^^^^^
 .. autoclass:: FromSlices

 Dataloader
 ^^^^^^^^^^^^^^^^
 .. autoclass:: Dataloader

 Concat
 ^^^^^^^^^^^^^^^^
 .. autoclass:: Concat

 Zip
 ^^^^^^^^^^^^^^^^
 .. autoclass:: Zip

 Batch
 ^^^^^^^^^^^^^^^^
 .. autoclass:: Batch

 Map
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: Map

 Repeat
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: Repeat

 Shuffle
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: Shuffle

--- a/docs/modules/initializers.rst
+++ b/docs/modules/initializers.rst
@@ -16,6 +16,7 @@ e.g. ``tf.initializers.he_normal``, please refer to TensorFlow provided initiali
   RandomUniform
   RandomNormal
   TruncatedNormal
   HeNormal
   deconv2d_bilinear_upsampling_initializer

 Initializer
@@ -46,6 +47,10 @@ TruncatedNormal
 ---------------------
 .. autoclass:: TruncatedNormal

 HeNormal
 ------------
 .. autoclass:: HeNormal

 deconv2d_bilinear_upsampling_initializer
 ------------------------------------------
 .. autofunction:: deconv2d_bilinear_upsampling_initializer
--- a/docs/modules/layers.rst
+++ b/docs/modules/layers.rst
@@ -12,10 +12,9 @@ Layer list

 .. autosummary::

   Layer
   Module
   
   ModelLayer
   LayerList
   SequentialLayer

   Input

@@ -73,14 +72,6 @@ Layer list
   BatchNorm1d
   BatchNorm2d
   BatchNorm3d
   LocalResponseNorm
   InstanceNorm
   InstanceNorm1d
   InstanceNorm2d
   InstanceNorm3d
   LayerNorm
   GroupNorm
   SwitchNorm

   RNN
   SimpleRNN
@@ -134,17 +125,13 @@ Layer list
 Base Layer
 -----------

 Base Layer
 ^^^^^^^^^^^^^^^^
 .. autoclass:: Layer

 Model Layer
 Module
 ^^^^^^^^^^^^^^^^
 .. autoclass:: ModelLayer
 .. autoclass:: Module

 Layer List
 Sequential Layer
 ^^^^^^^^^^^^^^^^
 .. autoclass:: LayerList
 .. autoclass:: SequentialLayer

 .. -----------------------------------------------------------
 ..                        Input Layer
@@ -399,38 +386,6 @@ Batch Normalization 3D
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: BatchNorm3d

 Local Response Normalization
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: LocalResponseNorm

 Instance Normalization
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: InstanceNorm

 Instance Normalization 1D
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: InstanceNorm1d

 Instance Normalization 2D
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: InstanceNorm2d

 Instance Normalization 3D
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: InstanceNorm3d

 Layer Normalization
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: LayerNorm

 Group Normalization
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: GroupNorm

 Switch Normalization
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: SwitchNorm

 .. -----------------------------------------------------------
 ..                     Padding Layers
 .. -----------------------------------------------------------
--- a/docs/modules/models.rst
+++ b/docs/modules/models.rst
@@ -1,59 +1,34 @@
 API - Models
 API - Pretrained Models
 ================================

 TensorLayer provides many pretrained models, you can easily use the whole or a part of the pretrained models via these APIs.

 .. automodule:: tensorlayer.models
 .. automodule:: examples.model_zoo

 .. autosummary::

    Model

    VGG16
    VGG19
    SqueezeNetV1
    MobileNetV1
    vgg16
    vgg19
    YOLOv4
    ResNet50
    Seq2seq
    Seq2seqLuongAttention


 Base Model
 -----------

 .. autoclass:: Model

 VGG16
 vgg16
 ----------------------

 .. autofunction:: VGG16
 .. autofunction:: vgg16

 VGG19
 vgg19
 ----------------------

 .. autofunction:: VGG19

 SqueezeNetV1
 ----------------
 .. autofunction:: SqueezeNetV1
 .. autofunction:: vgg19

 MobileNetV1
 YOLOv4
 ----------------

 .. autofunction:: MobileNetV1
 .. autofunction:: YOLOv4

 ResNet50
 ----------------

 .. autofunction:: ResNet50

 Seq2seq
 ------------------------

 .. autoclass:: Seq2seq


 Seq2seq Luong Attention
 ------------------------
 .. autofuncion:: ResNet50

 .. autoclass:: Seq2seqLuongAttention
--- a/docs/modules/optimizers.rst
+++ b/docs/modules/optimizers.rst
@@ -5,6 +5,8 @@ API - Optimizers

 TensorLayer provides simple API and tools to ease research, development and reduce the time to production.
 Therefore, we provide the latest state of the art optimizers that work with Tensorflow.
 The optimizers functions provided by TensorFlow can be used in TensorLayer.
 We have also wrapped the optimizers functions for each framework, which can be found in tensorlayer.optimizers.

 Optimizers List
 ---------------
@@ -12,6 +14,17 @@ Optimizers List
 .. autosummary::

   AMSGrad
   Adadelta
   Adagrad
   Adam
   Adamax
   Ftrl
   Nadam
   RMSprop
   SGD
   Momentum
   Lamb
   LARS

 AMSGrad Optimizer
 -----------------
--- a/docs/modules/vision.rst
+++ b/docs/modules/vision.rst
@@ -0,0 +1,204 @@
 API - Vision
 ============

 .. automodule:: tensorlayer.vision.transforms

 .. -----------------------------------------------------------
 ..                        Vision Transforms List
 .. -----------------------------------------------------------

 Vision Transforms list
 ----------------------

 .. autosummary::

   ToTensor
   Compose

   Crop
   CentralCrop
   RandomCrop
   Pad
   PadToBoundingbox
   Resize
   RandomResizedCrop

   RgbToGray
   HsvToRgb
   RgbToHsv

   AdjustBrightness
   AdjustContrast
   AdjustHue
   AdjustSaturation
   RandomBrightness
   RandomContrast
   RandomHue
   RandomSaturation
   ColorJitter

   FlipHorizontal
   FlipVertical
   RandomFlipHorizontal
   RandomFlipVertical

   RandomRotation
   RandomShift
   RandomShear
   RandomZoom
   RandomAffine

   Transpose
   HWC2CHW
   CHW2HWC

   Normalize
   StandardizePerImage

 .. -----------------------------------------------------------
 ..                        Vision Transforms
 .. -----------------------------------------------------------

 Vision Transforms
 -----------------

 ToTensor
 ^^^^^^^^^^^^^^^^
 .. autoclass:: ToTensor


 Compose
 ^^^^^^^^^^^^^^^^
 .. autoclass:: Compose

 Crop
 ^^^^^^^^^^^^^^^^
 .. autoclass:: Crop

 CentralCrop
 ^^^^^^^^^^^^^^^^
 .. autoclass:: CentralCrop

 RandomCrop
 ^^^^^^^^^^^^^^^^
 .. autoclass:: RandomCrop

 Pad
 ^^^^^^^^^^^^^^^^
 .. autoclass:: Pad

 PadToBoundingbox
 ^^^^^^^^^^^^^^^^
 .. autoclass:: PadToBoundingbox

 Resize
 ^^^^^^^^^^^^^^^^
 .. autoclass:: Resize

 RandomResizedCrop
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomResizedCrop

 RgbToGray
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RgbToGray

 HsvToRgb
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: HsvToRgb

 RgbToHsv
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RgbToHsv

 AdjustBrightness
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: AdjustBrightness

 AdjustContrast
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: AdjustContrast

 AdjustHue
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: AdjustHue

 AdjustSaturation
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: AdjustSaturation

 RandomBrightness
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomBrightness

 RandomContrast
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomContrast

 RandomHue
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomHue

 RandomSaturation
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomSaturation

 ColorJitter
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: ColorJitter

 FlipHorizontal
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: FlipHorizontal

 FlipVertical
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: FlipVertical

 RandomFlipHorizontal
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomFlipHorizontal

 RandomFlipVertical
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomFlipVertical

 RandomRotation
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomRotation

 RandomShift
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomShift

 RandomShear
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomShear

 RandomZoom
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomZoom

 RandomAffine
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: RandomAffine

 Transpose
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: Transpose

 HWC2CHW
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: HWC2CHW

 CHW2HWC
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: CHW2HWC

 Normalize
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: Normalize

 StandardizePerImage
 ^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: StandardizePerImage
--- a/docs/user/contributing.rst
+++ b/docs/user/contributing.rst
@@ -4,8 +4,8 @@
 Contributing
 ===============

 TensorLayer 2.0 is a major ongoing research project in CFCS, Peking University, the first version was established at Imperial College London in 2016. The goal of the project is to develop a compositional language while complex learning systems
 can be built through composition of neural network modules.
 TensorLayer 3.0 is a major ongoing research project in Peking University and Pengcheng Laboratory, the first version was established at Imperial College London in 2016. The goal of the project is to develop a compositional languagea that is compatible with multiple deep learning frameworks,
 while complex learning systems can be built through composition of neural network modules.

 Numerous contributors come from various horizons such as: Imperial College London, Tsinghua University, Carnegie Mellon University, Stanford, University of Technology of Compiegne, Google, Microsoft, Bloomberg and etc.

@@ -25,6 +25,12 @@ Project Maintainers

 The TensorLayer project was started by `Hao Dong <https://zsdonghao.github.io>`_ at Imperial College London in June 2016. 

 For TensorLayer 3.x, it is now actively developing and maintaining by the following people *(in alphabetical order)*:

 - **Cheng Lai** (`@Laicheng0830 <https://github.com/Laicheng0830>`_) - `<https://Laicheng0830.github.io>`_
 - **Hao Dong** (`@zsdonghao <https://github.com/zsdonghao>`_) - `<https://zsdonghao.github.io>`_
 - **Jiarong Han** (`@hanjr92 <https://github.com/hanjr92>`_) - `<https://hanjr92.github.io>`_

 For TensorLayer 2.x, it is now actively developing and maintaining by the following people who has more than 50 contributions:

 - **Hao Dong** (`@zsdonghao <https://github.com/zsdonghao>`_) - `<https://zsdonghao.github.io>`_
--- a/docs/user/examples.rst
+++ b/docs/user/examples.rst
@@ -6,13 +6,28 @@ Examples

 We list some examples here, but more tutorials and applications can be found in `Github examples <https://github.com/tensorlayer/tensorlayer/tree/master/examples>`__ and `Awesome-TensorLayer <https://github.com/tensorlayer/awesome-tensorlayer>`_.

 Commonly used dataset and pretrained models
 ===========================================

 - MNIST, see `MNIST <http://yann.lecun.com/exdb/mnist/>`__.
 - CIFAR10, see `CIFAR10 <http://www.cs.toronto.edu/~kriz/cifar.html>`__.

 - YOLOv4 Pretrained Model, see `YOLOv4 <https://pan.baidu.com/s/1MC1dmEwpxsdgHO1MZ8fYRQ>`__. password: idsz
 - VGG16 Pretrained Model, see `VGG16 <https://pan.baidu.com/s/1s7jlzXftZ07n1gIk1zOQOQ>`__. password: t36u
 - VGG19 Pretrained Model, see `VGG19 <https://pan.baidu.com/s/13XZ1LxqZf70qihxp5Uxhdg>`__. password: rb8w
 - ResNet50 Pretrained Model, see `ResNet50 <https://pan.baidu.com/s/1zgwzWXP4uhxljEPdJWWxQA>`__. password: 3nui

 Basics
 ============

 - Multi-layer perceptron (MNIST), simple usage. Classification task, see `tutorial_mnist_simple.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/tutorial_mnist_simple.py>`__.
 - Multi-layer perceptron (MNIST), dynamic model. Classification with dropout using iterator, see `tutorial_mnist_mlp_dynamic.py method2 <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/tutorial_mnist_mlp_dynamic.py>`__.
 - Multi-layer perceptron (MNIST), static model. Classification with dropout using iterator, see `tutorial_mnist_mlp_static.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/tutorial_mnist_mlp_static.py>`__.
 - Convolutional Network (CIFAR-10). Classification task, see `tutorial_cifar10_cnn_static.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/tutorial_cifar10_cnn_static.py>`_.
 - Multi-layer perceptron (MNIST), simple usage and supports multiple backends. Classification task, see `tutorial_mnist_simple.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/tutorial_mnist_simple.py>`__.
 - Multi-layer perceptron (MNIST), mix of tensorlayer and tensorflow. Classification with dropout using iterator, see `tutorial_mnist_mlp_tensorflow_backend.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/tutorial_mnist_mlp_tensorflow_backend.py>`__.
 - Multi-layer perceptron (MNIST), mix of tensorlayer and mindspore. Classification task, see `tutorial_mnist_mlp_mindspore_backend.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/tutorial_mnist_mlp_mindspore_backend.py>`__.
 - Multi-layer perceptron (MNIST), mix of tensorlayer and paddlepaddle. Classification task, see `tutorial_mnist_mlp_paddlepaddle_backend.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/tutorial_mnist_mlp_paddlepaddle_backend.py>`__.

 - Convolutional Network (CIFAR-10). mix of tensorlayer and tensorflow. Classification task, see `tutorial_cifar10_cnn_tensorflow_backend.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/tutorial_cifar10_cnn_tensorflow_backend.py>`_.
 - Convolutional Network (CIFAR-10). mix of tensorlayer and mindspore. Classification task, see `tutorial_cifar10_cnn_mindspore_backend.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/tutorial_cifar10_cnn_mindspore_backend.py>`_.

 - TensorFlow dataset API for object detection see `here <https://github.com/tensorlayer/tensorlayer/blob/master/examples/data_process/tutorial_tf_dataset_voc.py>`__.
 - Data augmentation with TFRecord. Effective way to load and pre-process data, see `tutorial_tfrecord*.py <https://github.com/tensorlayer/tensorlayer/tree/master/examples/data_process>`__ and `tutorial_cifar10_tfrecord.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/basic_tutorials/data_process/tutorial_tfrecord.py>`__.
 - Data augmentation with TensorLayer. See `tutorial_fast_affine_transform.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/data_process/tutorial_fast_affine_transform.py>`__ (for quick test only).
@@ -20,15 +35,16 @@ Basics
 Pretrained Models
 ==================

 - VGG 16 (ImageNet). Classification task, see `tutorial_models_vgg16 <https://github.com/tensorlayer/tensorlayer/blob/master/examples/pretrained_cnn/tutorial_models_vgg16.py>`__.
 - VGG 16 (ImageNet). Classification task, see `pretrained_vgg16 <https://github.com/tensorlayer/tensorlayer/blob/master/examples/model_zoo/pretrained_vgg16.py>`__.
 - VGG 19 (ImageNet). Classification task, see `tutorial_models_vgg19.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/pretrained_cnn/tutorial_vgg19.py>`__.
 - SqueezeNet (ImageNet). Model compression, see `tutorial_models_squeezenetv1.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/pretrained_cnn/tutorial_models_squeezenetv1.py>`__.
 - MobileNet (ImageNet). Model compression, see `tutorial_models_mobilenetv1.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/pretrained_cnn/tutorial_models_mobilenetv1.py>`__.
 - YOLOv4 (MS-COCO). Object Detection, see `pretrained_yolov4.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/model_zoo/pretrained_yolov4.py>`__.
 - SqueezeNet (ImageNet, Based on TensroLayer2.0). Model compression, see `tutorial_models_squeezenetv1.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/pretrained_cnn/tutorial_models_squeezenetv1.py>`__.
 - MobileNet (ImageNet, Based on TensroLayer2.0). Model compression, see `tutorial_models_mobilenetv1.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/pretrained_cnn/tutorial_models_mobilenetv1.py>`__.
 - All pretrained models in `pretrained-models <https://github.com/tensorlayer/pretrained-models>`__.

 Vision
 ==================

 Warning:These examples below only support Tensorlayer 2.0. Tensorlayer 3.0 is under development.
 - Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization, see `examples <https://github.com/tensorlayer/adaptive-style-transfer>`__.
 - ArcFace: Additive Angular Margin Loss for Deep Face Recognition, see `InsignFace <https://github.com/auroua/InsightFace_TF>`__.
 - BinaryNet. Model compression, see `mnist <https://github.com/tensorlayer/tensorlayer/blob/master/examples/quantized_net/tutorial_binarynet_mnist_cnn.py>`__ `cifar10 <https://github.com/tensorlayer/tensorlayer/blob/master/examples/quantized_net/tutorial_binarynet_cifar10_tfrecord.py>`__.
@@ -44,6 +60,7 @@ Vision

 Adversarial Learning
 ========================
 Warning:These examples below only support Tensorlayer 2.0. Tensorlayer 3.0 is under development.
 - DCGAN (CelebA). Generating images by `Deep Convolutional Generative Adversarial Networks <http://arxiv.org/abs/1511.06434>`__ by `zsdonghao <https://github.com/tensorlayer/dcgan>`__.
 - `Generative Adversarial Text to Image Synthesis <https://github.com/zsdonghao/text-to-image>`__ by `zsdonghao <https://github.com/zsdonghao/text-to-image>`__.
 - `Unsupervised Image to Image Translation with Generative Adversarial Networks <https://github.com/zsdonghao/Unsup-Im2Im>`__ by `zsdonghao <https://github.com/zsdonghao/Unsup-Im2Im>`__.
@@ -54,7 +71,7 @@ Adversarial Learning

 Natural Language Processing
 ==============================

 Warning:These examples below only support Tensorlayer 2.0. Tensorlayer 3.0 is under development.
 - Recurrent Neural Network (LSTM). Apply multiple LSTM to PTB dataset for language modeling, see `tutorial_ptb_lstm_state_is_tuple.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/text_ptb/tutorial_ptb_lstm_state_is_tuple.py>`__.
 - Word Embedding (Word2vec). Train a word embedding matrix, see `tutorial_word2vec_basic.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/text_word_embedding/tutorial\_word2vec_basic.py>`__.
 - Restore Embedding matrix. Restore a pre-train embedding matrix, see `tutorial_generate_text.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/text_generation/tutorial_generate_text.py>`__.
@@ -65,7 +82,7 @@ Natural Language Processing

 Reinforcement Learning
 ==============================

 Warning:These examples below only support Tensorlayer 2.0. Tensorlayer 3.0 is under development.
 - Policy Gradient / Network (Atari Ping Pong), see `tutorial_atari_pong.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/reinforcement_learning/tutorial_atari_pong.py>`__.
 - Deep Q-Network (Frozen lake), see `tutorial_frozenlake_dqn.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/reinforcement_learning/tutorial_frozenlake_dqn.py>`__.
 - Q-Table learning algorithm (Frozen lake), see `tutorial_frozenlake_q_table.py <https://github.com/tensorlayer/tensorlayer/blob/master/examples/reinforcement_learning/tutorial_frozenlake_q_table.py>`__.
@@ -77,6 +94,7 @@ Reinforcement Learning

 Miscellaneous
 =================
 Warning:These examples below only support Tensorlayer 2.0. Tensorlayer 3.0 is under development.

 - `Sipeed <https://github.com/sipeed/Maix-EMC>`__ : Run TensorLayer on AI Chips

--- a/docs/user/get_start_advance.rst
+++ b/docs/user/get_start_advance.rst
@@ -11,11 +11,13 @@ Customizing layer
 Layers with weights
 ----------------------

 The fully-connected layer is `a = f(x*W+b)`, the most simple implementation is as follow, which can only support static model.
 The fully-connected layer is `a = f(x*W+b)`, the most simple implementation is as follow.

 .. code-block:: python

  class Dense(Layer):
  from tensorlayer.layers import Module

  class Dense(Module):
    """The :class:`Dense` class is a fully connected layer.
    
    Parameters
@@ -33,12 +35,16 @@ The fully-connected layer is `a = f(x*W+b)`, the most simple implementation is a
            n_units,   # the number of units/channels of this layer
            act=None,  # None: no activation, tf.nn.relu or 'relu': ReLU ...
            name=None, # the name of this layer (optional)
            in_channels = None
    ):
        super(Dense, self).__init__(name, act=act) # auto naming, dense_1, dense_2 ...
        self.n_units = n_units
        self.in_channels = in_channels
        self.build()
        self._built = True
        
    def build(self, inputs_shape): # initialize the model weights here
        shape = [inputs_shape[1], self.n_units]
    def build(self): # initialize the model weights here
        shape = [self.in_channels, self.n_units]
        self.W = self._get_weights("weights", shape=tuple(shape), init=self.W_init)
        self.b = self._get_weights("biases", shape=(self.n_units, ), init=self.b_init)

@@ -48,13 +54,14 @@ The fully-connected layer is `a = f(x*W+b)`, the most simple implementation is a
            z = self.act(z)
        return z

 The full implementation is as follow, which supports both static and dynamic models and allows users to control whether to use the bias, how to initialize the weight values.
 The full implementation is as follow, which supports both automatic inference input and dynamic models and allows users to control whether to use the bias, how to initialize the weight values.

 .. code-block:: python

  class Dense(Layer):

  class Dense(Module):
    """The :class:`Dense` class is a fully connected layer.
    

    Parameters
    ----------
    n_units : int
@@ -70,38 +77,53 @@ The full implementation is as follow, which supports both static and dynamic mod
        If None, it will be automatically detected when the layer is forwarded for the first time.
    name : None or str
        A unique layer name. If None, a unique name will be automatically generated.

    Examples
    --------
    With TensorLayer

    >>> net = tl.layers.Input([100, 50], name='input')
    >>> dense = tl.layers.Dense(n_units=800, act=tl.ReLU, in_channels=50, name='dense_1')
    >>> print(dense)
    Dense(n_units=800, relu, in_channels='50', name='dense_1')
    >>> tensor = tl.layers.Dense(n_units=800, act=tl.ReLU, name='dense_2')(net)
    >>> print(tensor)
    tf.Tensor([...], shape=(100, 800), dtype=float32)

    Notes
    -----
    If the layer input has more than two axes, it needs to be flatten by using :class:`Flatten`.

    """
    

    def __init__(
            self,
            n_units,
            act=None,
            W_init=tl.initializers.truncated_normal(stddev=0.1),
            b_init=tl.initializers.constant(value=0.0),
            in_channels=None,  # the number of units/channels of the previous layer
            name=None,
        self,
        n_units,
        act=None,
        W_init=tl.initializers.truncated_normal(stddev=0.05),
        b_init=tl.initializers.constant(value=0.0),
        in_channels=None,
        name=None,  # 'dense',
    ):
        # we feed activation function to the base layer, `None` denotes identity function
        # string (e.g., relu, sigmoid) will be converted into function.
        super(Dense, self).__init__(name, act=act) 

        super(Dense, self).__init__(name, act=act)

        self.n_units = n_units
        self.W_init = W_init
        self.b_init = b_init
        self.in_channels = in_channels

        # in dynamic model, the number of input channel is given, we initialize the weights here
        if self.in_channels is not None: 
        if self.in_channels is not None:
            self.build(self.in_channels)
            self._built = True

        logging.info(
            "Dense  %s: %d %s" %
            (self.name, self.n_units, self.act.__name__ if self.act is not None else 'No Activation')
            (self.name, self.n_units, self.act.__class__.__name__ if self.act is not None else 'No Activation')
        )

    def __repr__(self): # optional, for printing information
        actstr = self.act.__name__ if self.act is not None else 'No Activation'
    def __repr__(self):
        actstr = self.act.__class__.__name__ if self.act is not None else 'No Activation'
        s = ('{classname}(n_units={n_units}, ' + actstr)
        if self.in_channels is not None:
            s += ', in_channels=\'{in_channels}\''
@@ -110,21 +132,40 @@ The full implementation is as follow, which supports both static and dynamic mod
        s += ')'
        return s.format(classname=self.__class__.__name__, **self.__dict__)

    def build(self, inputs_shape): # initialize the model weights here
        if self.in_channels: # if the number of input channel is given, use it
    def build(self, inputs_shape):
        if self.in_channels is None and len(inputs_shape) != 2:
            raise AssertionError("The input dimension must be rank 2, please reshape or flatten it")
        if self.in_channels:
            shape = [self.in_channels, self.n_units]
        else:                # otherwise, get it from static model
        else:
            self.in_channels = inputs_shape[1]
            shape = [inputs_shape[1], self.n_units]

        self.W = self._get_weights("weights", shape=tuple(shape), init=self.W_init)
        if self.b_init:      # if b_init is None, no bias is applied
            self.b = self._get_weights("biases", shape=(self.n_units, ), init=self.b_init)

    def forward(self, inputs):
        z = tf.matmul(inputs, self.W)
        self.b_init_flag = False
        if self.b_init:
            z = tf.add(z, self.b)
            self.b = self._get_weights("biases", shape=(self.n_units, ), init=self.b_init)
            self.b_init_flag = True
            self.bias_add = tl.ops.BiasAdd()

        self.act_init_flag = False
        if self.act:
            self.act_init_flag = True

        self.matmul = tl.ops.MatMul()

    def forward(self, inputs):
        if self._forward_state == False:
            if self._built == False:
                self.build(tl.get_tensor_shape(inputs))
                self._built = True
            self._forward_state = True

        z = self.matmul(inputs, self.W)
        if self.b_init_flag:
            z = self.bias_add(z, self.b)
        if self.act_init_flag:
            z = self.act(z)
        return z

@@ -136,37 +177,54 @@ We use Dropout as an example here:

 .. code-block:: python
  
  class Dropout(Layer):
      """
      The :class:`Dropout` class is a noise layer which randomly set some
      activations to zero according to a keeping probability.
      Parameters
      ----------
      keep : float
          The keeping probability.
          The lower the probability it is, the more activations are set to zero.
      name : None or str
          A unique layer name.
      """

      def __init__(self, keep, name=None):
          super(Dropout, self).__init__(name)
          self.keep = keep

          self.build()
          self._built = True

          logging.info("Dropout %s: keep: %f " % (self.name, self.keep))

      def build(self, inputs_shape=None):
          pass   # no weights in dropout layer

      def forward(self, inputs):
          if self.is_train:  # this attribute is changed by Model.train() and Model.eval() described above
              outputs = tf.nn.dropout(inputs, rate=1 - (self.keep), name=self.name)
          else:
              outputs = inputs
          return outputs
  class Dropout(Module):
    """
    The :class:`Dropout` class is a noise layer which randomly set some
    activations to zero according to a keeping probability.

    Parameters
    ----------
    keep : float
        The keeping probability.
        The lower the probability it is, the more activations are set to zero.
    seed : int or None
        The seed for random dropout.
    name : None or str
        A unique layer name.

    Examples
    --------
    >>> net = tl.layers.Input([10, 200])
    >>> net = tl.layers.Dropout(keep=0.2)(net)

    """

    def __init__(self, keep, seed=0, name=None):  #"dropout"):
        super(Dropout, self).__init__(name)
        self.keep = keep
        self.seed = seed

        self.build()
        self._built = True

        logging.info("Dropout %s: keep: %f " % (self.name, self.keep))

    def __repr__(self):
        s = ('{classname}(keep={keep}')
        if self.name is not None:
            s += ', name=\'{name}\''
        s += ')'
        return s.format(classname=self.__class__.__name__, **self.__dict__)

    def build(self, inputs_shape=None):
        self.dropout = tl.ops.Dropout(keep=self.keep, seed=self.seed)

    def forward(self, inputs):
        if self.is_train:
            outputs = self.dropout(inputs)
        else:
            outputs = inputs
        return outputs

 Pre-trained CNN
 ================
@@ -176,42 +234,14 @@ Get entire CNN

 .. code-block:: python

  import tensorflow as tf

  import tensorlayer as tl
  import numpy as np
  from tensorlayer.models.imagenet_classes import class_names
  from examples.model_zoo import vgg16

  vgg = tl.models.vgg16(pretrained=True)
  vgg = vgg16(pretrained=True)
  img = tl.vis.read_image('data/tiger.jpeg')
  img = tl.prepro.imresize(img, (224, 224)).astype(np.float32) / 255
  img = tl.prepro.imresize(img, (224, 224)).astype(tl.float32) / 255
  output = vgg(img, is_train=False)

 Get a part of CNN
 ------------------

 .. code-block:: python

  # get VGG without the last layer
  cnn = tl.models.vgg16(end_with='fc2_relu', mode='static').as_layer()
  # add one more layer and build a new model
  ni = tl.layers.Input([None, 224, 224, 3], name="inputs")
  nn = cnn(ni)
  nn = tl.layers.Dense(n_units=100, name='out')(nn)
  model = tl.models.Model(inputs=ni, outputs=nn)
  # train your own classifier (only update the last layer)
  train_weights = model.get_layer('out').all_weights

 Reuse CNN
 ------------------

 .. code-block:: python

  # in dynamic model, we can directly use the same model
  # in static model
  vgg_layer = tl.models.vgg16().as_layer()
  ni_1 = tl.layers.Input([None, 224, 224, 3])
  ni_2 = tl.layers.Input([None, 224, 224, 3])
  a_1 = vgg_layer(ni_1)
  a_2 = vgg_layer(ni_2)
  M = Model(inputs=[ni_1, ni_2], outputs=[a_1, a_2])

--- a/docs/user/get_start_model.rst
+++ b/docs/user/get_start_model.rst
@@ -5,31 +5,26 @@ Define a model
 ===============

 TensorLayer provides two ways to define a model.
 Static model allows you to build model in a fluent way while dynamic model allows you to fully control the forward process.
 Sequential model allows you to build model in a fluent way while dynamic model allows you to fully control the forward process.

 Static model
 Sequential model
 ===============

 .. code-block:: python

  import tensorflow as tf
  from tensorlayer.layers import Input, Dropout, Dense
  from tensorlayer.models import Model

  def get_model(inputs_shape):
      ni = Input(inputs_shape)
      nn = Dropout(keep=0.8)(ni)
      nn = Dense(n_units=800, act=tf.nn.relu, name="dense1")(nn) # “name" is optional
      nn = Dropout(keep=0.8)(nn)
      nn = Dense(n_units=800, act=tf.nn.relu)(nn)
      nn = Dropout(keep=0.8)(nn)
      nn = Dense(n_units=10, act=None)(nn)
      M = Model(inputs=ni, outputs=nn, name="mlp") # “name" is optional
      return M

  MLP = get_model([None, 784])
  MLP.eval()
  outputs = MLP(data)
  from tensorlayer.layers import SequentialLayer
  from tensorlayer.layers import Dense
  import tensorlayer as tl

  def get_model():
      layer_list = []
      layer_list.append(Dense(n_units=800, act=tl.ReLU, in_channels=784, name='Dense1'))
      layer_list.append(Dense(n_units=800, act=tl.ReLU, in_channels=800, name='Dense2'))
      layer_list.append(Dense(n_units=10, act=tl.ReLU, in_channels=800, name='Dense3'))
      MLP = SequentialLayer(layer_list)
      return MLP



 Dynamic model
 =======================
@@ -39,15 +34,18 @@ In this case, you need to manually input the output shape of the previous layer

 .. code-block:: python

  class CustomModel(Model):
  import tensorlayer as tl
  from tensorlayer.layers import Module
  from tensorlayer.layers import Dropout, Dense
  class CustomModel(Module):

      def __init__(self):
          super(CustomModel, self).__init__()

          self.dropout1 = Dropout(keep=0.8)
          self.dense1 = Dense(n_units=800, act=tf.nn.relu, in_channels=784)
          self.dense1 = Dense(n_units=800, act=tl.ReLU, in_channels=784)
          self.dropout2 = Dropout(keep=0.8)
          self.dense2 = Dense(n_units=800, act=tf.nn.relu, in_channels=800)
          self.dense2 = Dense(n_units=800, act=tl.ReLU, in_channels=800)
          self.dropout3 = Dropout(keep=0.8)
          self.dense3 = Dense(n_units=10, act=None, in_channels=800)

@@ -63,73 +61,83 @@ In this case, you need to manually input the output shape of the previous layer
          return out

  MLP = CustomModel()
  MLP.eval()
  MLP.set_eval()
  outputs = MLP(data, foo=True) # controls the forward here
  outputs = MLP(data, foo=False)
  
  
 Dynamic model do not manually input the output shape
 =======================


 In this case, you do not manually input the output shape of the previous layer to the new layer.

 .. code-block:: python

  import tensorlayer as tl
  from tensorlayer.layers import Module
  from tensorlayer.layers import Dropout, Dense
  class CustomModel(Module):

      def __init__(self):
          super(CustomModel, self).__init__()

          self.dropout1 = Dropout(keep=0.8)
          self.dense1 = Dense(n_units=800, act=tl.ReLU)
          self.dropout2 = Dropout(keep=0.8)
          self.dense2 = Dense(n_units=800, act=tl.ReLU)
          self.dropout3 = Dropout(keep=0.8)
          self.dense3 = Dense(n_units=10, act=None)

      def forward(self, x, foo=False):
          z = self.dropout1(x)
          z = self.dense1(z)
          z = self.dropout2(z)
          z = self.dense2(z)
          z = self.dropout3(z)
          out = self.dense3(z)
          if foo:
              out = tf.nn.softmax(out)
          return out

  MLP = CustomModel()
  MLP.init_build(tl.layers.Input(shape=(1, 784))) # init_build must be called to initialize the weights.
  MLP.set_eval()
  outputs = MLP(data, foo=True) # controls the forward here
  outputs = MLP(data, foo=False)

 Switching train/test modes
 =============================

 .. code-block:: python

  # method 1: switch before forward
  Model.train() # enable dropout, batch norm moving avg ...
  output = Model(train_data) 
  MLP.set_train() # enable dropout, batch norm moving avg ...
  output = MLP(train_data)
  ... # training code here
  Model.eval()  # disable dropout, batch norm moving avg ...
  output = Model(test_data) 
  Model.set_eval()  # disable dropout, batch norm moving avg ...
  output = MLP(test_data)
  ... # testing code here
  
  # method 2: switch while forward
  output = Model(train_data, is_train=True)
  output = Model(test_data, is_train=False)
  # method 2: Using packaged training modules
  model = tl.models.Model(network=MLP, loss_fn=tl.cost.softmax_cross_entropy_with_logits, optimizer=optimizer)
  model.train(n_epoch=n_epoch, train_dataset=train_ds)

 Reuse weights
 =======================

 For static model, call the layer multiple time in model creation

 .. code-block:: python

  # create siamese network

  def create_base_network(input_shape):
        '''Base network to be shared (eq. to feature extraction).
        '''
        input = Input(shape=input_shape)
        x = Flatten()(input)
        x = Dense(128, act=tf.nn.relu)(x)
        x = Dropout(0.9)(x)
        x = Dense(128, act=tf.nn.relu)(x)
        x = Dropout(0.9)(x)
        x = Dense(128, act=tf.nn.relu)(x)
        return Model(input, x)


  def get_siamese_network(input_shape):
        """Create siamese network with shared base network as layer
        """
        base_layer = create_base_network(input_shape).as_layer() # convert model as layer

        ni_1 = Input(input_shape)
        ni_2 = Input(input_shape)
        nn_1 = base_layer(ni_1) # call base_layer twice
        nn_2 = base_layer(ni_2)
        return Model(inputs=[ni_1, ni_2], outputs=[nn_1, nn_2])

  siamese_net = get_siamese_network([None, 784])

 For dynamic model, call the layer multiple time in forward function

 .. code-block:: python

  class MyModel(Model):
  import tensorlayer as tl
  from tensorlayer.layers import Module, Dense, Concat
  class MyModel(Module):
      def __init__(self):
          super(MyModel, self).__init__()
          self.dense_shared = Dense(n_units=800, act=tf.nn.relu, in_channels=784)
          self.dense1 = Dense(n_units=10, act=tf.nn.relu, in_channels=800)
          self.dense2 = Dense(n_units=10, act=tf.nn.relu, in_channels=800)
          self.dense_shared = Dense(n_units=800, act=tl.ReLU, in_channels=784)
          self.dense1 = Dense(n_units=10, act=tl.ReLU, in_channels=800)
          self.dense2 = Dense(n_units=10, act=tl.ReLU, in_channels=800)
          self.cat = Concat()

      def forward(self, x):
@@ -158,56 +166,6 @@ Print model information
  #   (dropout_2): Dropout(keep=0.8, name='dropout_2')
  #   (dense_2): Dense(n_units=10, None, in_channels='800', name='dense_2')
  # )
  
  import pprint
  pprint.pprint(MLP.config) # print the model architecture
  #   {'inputs': '_inputlayer_1_node_0',
  #  'model_architecture': [{'args': {'dtype': tf.float32,
  #                                   'layer_type': 'normal',
  #                                   'name': '_inputlayer_1',
  #                                   'shape': [None, 784]},
  #                          'class': '_InputLayer',
  #                          'prev_layer': None},
  #                         {'args': {'keep': 0.8,
  #                                   'layer_type': 'normal',
  #                                   'name': 'dropout_1'},
  #                          'class': 'Dropout',
  #                          'prev_layer': ['_inputlayer_1_node_0']},
  #                         {'args': {'act': 'relu',
  #                                   'layer_type': 'normal',
  #                                   'n_units': 800,
  #                                   'name': 'dense_1'},
  #                          'class': 'Dense',
  #                          'prev_layer': ['dropout_1_node_0']},
  #                         {'args': {'keep': 0.8,
  #                                   'layer_type': 'normal',
  #                                   'name': 'dropout_2'},
  #                          'class': 'Dropout',
  #                          'prev_layer': ['dense_1_node_0']},
  #                         {'args': {'act': 'relu',
  #                                   'layer_type': 'normal',
  #                                   'n_units': 800,
  #                                   'name': 'dense_2'},
  #                          'class': 'Dense',
  #                          'prev_layer': ['dropout_2_node_0']},
  #                         {'args': {'keep': 0.8,
  #                                   'layer_type': 'normal',
  #                                   'name': 'dropout_3'},
  #                          'class': 'Dropout',
  #                          'prev_layer': ['dense_2_node_0']},
  #                         {'args': {'act': None,
  #                                   'layer_type': 'normal',
  #                                   'n_units': 10,
  #                                   'name': 'dense_3'},
  #                          'class': 'Dense',
  #                          'prev_layer': ['dropout_3_node_0']}],
  #  'name': 'mlp',
  #  'outputs': 'dense_3_node_0',
  #  'version_info': {'backend': 'tensorflow',
  #                   'backend_version': '2.0.0-alpha0',
  #                   'save_date': None,
  #                   'tensorlayer_version': '2.1.0',
  #                   'training_device': 'gpu'}}

 Get specific weights
 =======================
@@ -220,10 +178,6 @@ We can get the specific weights by indexing or naming.
  all_weights = MLP.all_weights
  some_weights = MLP.all_weights[1:3]

  # naming
  some_weights = MLP.get_layer('dense1').all_weights


 Save and restore model
 =======================

@@ -235,15 +189,17 @@ Save weights only

 .. code-block:: python

  MLP.save_weights('model_weights.h5') # by default, file will be in hdf5 format
  MLP.load_weights('model_weights.h5')
  MLP.save_weights('./model_weights.npz') # by default, file will be in hdf5 format
  MLP.load_weights('./model_weights.npz')

 Save model architecture and weights (optional)
 Save model weights (optional)
 -----------------------------------------------

 .. code-block:: python

  # When using Model.load(), there is no need to reimplement or declare the architecture of the model explicitly in code
  MLP.save('model.h5', save_weights=True)
  MLP = Model.load('model.h5', load_weights=True)
  # When using packaged training modules. Saving and loading the model can be done as follows
  model = tl.models.Model(network=MLP, loss_fn=tl.cost.softmax_cross_entropy_with_logits, optimizer=optimizer)
  model.train(n_epoch=n_epoch, train_dataset=train_ds)
  model.save_weights('./model.npz', format='npz_dict')
  model.load_weights('./model.npz', format='npz_dict')

--- a/docs/user/installation.rst
+++ b/docs/user/installation.rst
@@ -15,8 +15,9 @@ Mac OX, Linux and Windows, or ask for help on `tensorlayer@gmail.com <tensorlaye
 or `FAQ <http://tensorlayer.readthedocs.io/en/latest/user/more.html>`_.


 Install TensorFlow
 Install Backend
 =========================
 TensorLayer supports multiple deep learning backends, default TensorFlow as backend also supports MindSpore and PaddlePaddle.

 .. code-block:: bash

@@ -24,9 +25,24 @@ Install TensorFlow
  pip3 install tensorflow-gpu # GPU version
  pip3 install tensorflow # CPU version


 The installation instructions of TensorFlow are written to be very detailed on `TensorFlow`_  website.
 However, there are something need to be considered. For example, `TensorFlow`_ officially supports GPU acceleration for Linux, Mac OX and Windows at present. For ARM processor architecture, you need to install TensorFlow from source.

 If you want to use mindspore backend, you should install mindspore==1.2.1.

 .. code-block:: bash

  pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/1.2.1/MindSpore/gpu/ubuntu_x86/cuda-10.1/mindspore_gpu-1.2.1-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple


 If you want to use paddlepaddle backend, you should install paddlepaddle>=2.1.1

 .. code-block:: bash

   python -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple


 Install TensorLayer
 =========================

@@ -34,23 +50,21 @@ For stable version:

 .. code-block:: bash

  pip3 install tensorlayer
  pip3 install tensorlayer3
  
  pip install tensorlayer -i https://pypi.tuna.tsinghua.edu.cn/simple  (faster in China)
  pip install tensorlayer3 -i https://pypi.tuna.tsinghua.edu.cn/simple  (faster in China)

 For latest version, please install from Github.
 For latest version, please install from OpenI.

 .. code-block:: bash

  pip3 install git+https://github.com/tensorlayer/tensorlayer.git
  or
  pip3 install https://github.com/tensorlayer/tensorlayer/archive/master.zip
  pip3 install git+https://git.openi.org.cn/TensorLayer/tensorlayer3.0.git

 For developers, you should clone the folder to your local machine and put it along with your project scripts.

 .. code-block:: bash

  git clone https://github.com/tensorlayer/tensorlayer.git
  git clone https://git.openi.org.cn/TensorLayer/tensorlayer3.0.git


 Alternatively, you can build from the source.
@@ -58,7 +72,7 @@ Alternatively, you can build from the source.
 .. code-block:: bash

  # First clone the repository and change the current directory to the newly cloned repository
  git clone https://github.com/tensorlayer/tensorlayer.git
  git clone https://git.openi.org.cn/TensorLayer/tensorlayer3.0.git
  cd tensorlayer

  # Install virtualenv if necessary
@@ -85,6 +99,12 @@ Alternatively, you can build from the source.
  # for a machine **with** an NVIDIA GPU
  pip3 install -e ".[all_gpu_dev]"

 If you want install TensorLayer 2.X

 .. code-block:: bash

  [stable version] pip3 install tensorlayer==2.x.x

 If you want install TensorLayer 1.X, the simplest way to install TensorLayer 1.X is as follow. It will also install the numpy and matplotlib automatically.

 .. code-block:: bash
@@ -190,17 +210,6 @@ The NVIDIA CUDA® Deep Neural Network library (cuDNN) is a GPU-accelerated libra

 After extracting cuDNN, you will get three folders (bin, lib, include). Then these folders should be copied to CUDA installation. (The default installation directory is `C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0`)

 Installing TensorLayer
 ------------------------
 For TensorLayer, please refer to the steps mentioned above.

 .. code-block:: bash

  pip3 install tensorflow        #CPU version
  pip3 install tensorflow-gpu    #GPU version (GPU version and CPU version just choose one)
  pip3 install tensorlayer       #Install tensorlayer



 Issue
 =======
--- a/examples/basic_tutorials/tutorial_LayerList.py
+++ b/examples/basic_tutorials/tutorial_LayerList.py
@@ -1,43 +1,38 @@
 #!/usr/bin/env python3
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 from tensorlayer.layers import LayerList
 from tensorlayer.layers import Dense
 from tensorlayer.layers import Module, LayerList, Dense
 import tensorlayer as tl
 import numpy as np

 layer_list = []
 layer_list.append(Dense(n_units=800, act=tl.ReLU, in_channels=784, name='Dense1'))
 layer_list.append(Dense(n_units=800, act=tl.ReLU, in_channels=800, name='Dense2'))
 layer_list.append(Dense(n_units=10, act=tl.ReLU, in_channels=800, name='Dense3'))
 MLP = LayerList(layer_list)

 X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))

 def generator_train():
    inputs = X_train
    targets = y_train
    if len(inputs) != len(targets):
        raise AssertionError("The length of inputs and targets should be equal")
    for _input, _target in zip(inputs, targets):
        yield (_input, np.array(_target))

 n_epoch = 50
 batch_size = 128
 print_freq = 2
 shuffle_buffer_size = 128

 # train_weights = MLP.trainable_weights
 # print(train_weights)
 optimizer = tl.optimizers.Momentum(0.05, 0.9)
 train_ds = tl.dataflow.FromGenerator(
    generator_train, output_types=(tl.float32, tl.int32) , column_names=['data', 'label']
 )
 train_ds = tl.dataflow.Shuffle(train_ds,shuffle_buffer_size)
 train_ds = tl.dataflow.Batch(train_ds,batch_size)


 model = tl.models.Model(network=MLP, loss_fn=tl.cost.cross_entropy, optimizer=optimizer)
 model.train(n_epoch=n_epoch, train_dataset=train_ds, print_freq=print_freq, print_train_batch=False)
 model.save_weights('./model.npz', format='npz_dict')
 model.load_weights('./model.npz', format='npz_dict')

 d1 = Dense(n_units=800, act=tl.ReLU, in_channels=784, name='Dense1')
 d2 = Dense(n_units=800, act=tl.ReLU, in_channels=800, name='Dense2')
 d3 = Dense(n_units=10, act=tl.ReLU, in_channels=800, name='Dense3')

 layer_list = LayerList([d1, d2])
 # Inserts a given d2 before a given index in the list
 layer_list.insert(1, d2)
 layer_list.insert(2, d2)
 # Appends d2 from a Python iterable to the end of the list.
 layer_list.extend([d2])
 # Appends a given d3 to the end of the list.
 layer_list.append(d3)

 print(layer_list)


 class model(Module):

    def __init__(self):
        super(model, self).__init__()
        self._list = layer_list

    def forward(self, inputs):
        output = self._list[0](inputs)
        for i in range(1, len(self._list)):
            output = self._list[i](output)
        return output


 net = model()
 print(net)
 print(net(tl.layers.Input((10, 784))))
--- a/examples/basic_tutorials/tutorial_SequentialLayer.py
+++ b/examples/basic_tutorials/tutorial_SequentialLayer.py
@@ -0,0 +1,46 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 import os
 os.environ['TL_BACKEND'] = 'tensorflow'

 from tensorlayer.layers import SequentialLayer
 from tensorlayer.layers import Dense
 import tensorlayer as tl
 import numpy as np

 layer_list = []
 layer_list.append(Dense(n_units=800, act=tl.ReLU, in_channels=784, name='Dense1'))
 layer_list.append(Dense(n_units=800, act=tl.ReLU, in_channels=800, name='Dense2'))
 layer_list.append(Dense(n_units=10, act=tl.ReLU, in_channels=800, name='Dense3'))
 MLP = SequentialLayer(layer_list)

 X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))


 def generator_train():
    inputs = X_train
    targets = y_train
    if len(inputs) != len(targets):
        raise AssertionError("The length of inputs and targets should be equal")
    for _input, _target in zip(inputs, targets):
        yield (_input, np.array(_target))


 n_epoch = 50
 batch_size = 128
 print_freq = 2
 shuffle_buffer_size = 128

 # train_weights = MLP.trainable_weights
 # print(train_weights)
 optimizer = tl.optimizers.Momentum(0.05, 0.9)
 train_ds = tl.dataflow.FromGenerator(
    generator_train, output_types=(tl.float32, tl.int32), column_names=['data', 'label']
 )
 train_ds = tl.dataflow.Shuffle(train_ds, shuffle_buffer_size)
 train_ds = tl.dataflow.Batch(train_ds, batch_size)

 model = tl.models.Model(network=MLP, loss_fn=tl.cost.softmax_cross_entropy_with_logits, optimizer=optimizer)
 model.train(n_epoch=n_epoch, train_dataset=train_ds, print_freq=print_freq, print_train_batch=False)
 model.save_weights('./model.npz', format='npz_dict')
 model.load_weights('./model.npz', format='npz_dict')
--- a/examples/basic_tutorials/tutorial_automatic_inference_input
+++ b/examples/basic_tutorials/tutorial_automatic_inference_input
@@ -0,0 +1,95 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 import os
 os.environ['TL_BACKEND'] = 'tensorflow'

 import numpy as np
 import time
 import tensorflow as tf
 import tensorlayer as tl
 from tensorlayer.layers import Module
 from tensorlayer.layers import Dense, Dropout, BatchNorm1d

 X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))


 class CustomModel(Module):

    def __init__(self):
        super(CustomModel, self).__init__()
        self.dropout1 = Dropout(keep=0.8)
        self.dense1 = Dense(n_units=800)
        self.batchnorm = BatchNorm1d(act=tl.ReLU)
        self.dropout2 = Dropout(keep=0.8)
        self.dense2 = Dense(n_units=800, act=tl.ReLU)
        self.dropout3 = Dropout(keep=0.8)
        self.dense3 = Dense(n_units=10, act=tl.ReLU)

    def forward(self, x, foo=None):
        z = self.dropout1(x)
        z = self.dense1(z)
        z = self.batchnorm(z)
        z = self.dropout2(z)
        z = self.dense2(z)
        z = self.dropout3(z)
        out = self.dense3(z)
        if foo is not None:
            out = tl.ops.relu(out)
        return out


 MLP = CustomModel()
 # Automatic inference input of shape.
 # If Layer has no input in_channels, init_build(input) must be called to initialize the weights.
 MLP.init_build(tl.layers.Input(shape=(1, 784)))

 n_epoch = 50
 batch_size = 500
 print_freq = 5
 train_weights = MLP.trainable_weights
 optimizer = tl.optimizers.Adam(lr=0.0001)

 for epoch in range(n_epoch):  ## iterate the dataset n_epoch times
    start_time = time.time()
    ## iterate over the entire training set once (shuffle the data via training)
    for X_batch, y_batch in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
        MLP.set_train()  # enable dropout
        with tf.GradientTape() as tape:
            ## compute outputs
            _logits = MLP(X_batch)
            ## compute loss and update model
            _loss = tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='train_loss')
        grad = tape.gradient(_loss, train_weights)
        optimizer.apply_gradients(zip(grad, train_weights))

    ## use training and evaluation sets to evaluate the model every print_freq epoch
    if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
        print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time))
        train_loss, train_acc, n_iter = 0, 0, 0
        for X_batch, y_batch in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=False):
            _logits = MLP(X_batch)
            train_loss += tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='eval_loss')
            train_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
            n_iter += 1
        print("   train loss: {}".format(train_loss / n_iter))
        print("   train acc:  {}".format(train_acc / n_iter))

        val_loss, val_acc, n_iter = 0, 0, 0
        for X_batch, y_batch in tl.iterate.minibatches(X_val, y_val, batch_size, shuffle=False):
            _logits = MLP(X_batch)  # is_train=False, disable dropout
            val_loss += tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='eval_loss')
            val_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
            n_iter += 1
        print("   val loss: {}".format(val_loss / n_iter))
        print("   val acc:  {}".format(val_acc / n_iter))

 ## use testing data to evaluate the model
 MLP.set_eval()
 test_loss, test_acc, n_iter = 0, 0, 0
 for X_batch, y_batch in tl.iterate.minibatches(X_test, y_test, batch_size, shuffle=False):
    _logits = MLP(X_batch, foo=1)
    test_loss += tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='test_loss')
    test_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
    n_iter += 1
 print("   test foo=1 loss: {}".format(test_loss / n_iter))
 print("   test foo=1 acc:  {}".format(test_acc / n_iter))
--- a/examples/basic_tutorials/tutorial_cifar10_cnn_dynamic_MS_backend.py
+++ b/examples/basic_tutorials/tutorial_cifar10_cnn_dynamic_MS_backend.py
@@ -1,6 +1,9 @@
 #!/usr/bin/env python3
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 import os
 os.environ['TL_BACKEND'] = 'mindspore'

 import time
 import numpy as np
 import multiprocessing
@@ -18,23 +21,23 @@ import mindspore.ops.operations as P

 # enable debug logging
 tl.logging.set_verbosity(tl.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)


 class CNN(Module):

    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = Conv2d(64, (5, 5), (2, 2), padding='SAME', b_init=None, name='conv1', in_channels=3, act=tl.ReLU, data_format='channels_first')
        self.bn = BatchNorm2d(num_features=64, act=tl.ReLU, data_format='channels_first')
        self.maxpool1 = MaxPool2d((3, 3), (2, 2), padding='SAME', name='pool1', data_format='channels_first')
        self.conv2 = Conv2d(128, (5, 5), (2, 2), padding='SAME', act=tl.ReLU, b_init=None, name='conv2', in_channels=64, data_format='channels_first')
        self.maxpool2 = MaxPool2d((3, 3), (2, 2), padding='SAME', name='pool2', data_format='channels_first')
        self.conv1 = Conv2d(64, (5, 5), (2, 2), b_init=None, name='conv1', in_channels=3, act=tl.ReLU)
        self.bn = BatchNorm2d(num_features=64, act=tl.ReLU)
        self.maxpool1 = MaxPool2d((3, 3), (2, 2), name='pool1')
        self.conv2 = Conv2d(128, (5, 5), (2, 2), act=tl.ReLU, b_init=None, name='conv2', in_channels=64)
        self.maxpool2 = MaxPool2d((3, 3), (2, 2), name='pool2')

        self.flatten = Flatten(name='flatten')
        self.dense1 = Dense(120, act=tl.ReLU, name='dense1relu', in_channels=4608)
        self.dense1 = Dense(120, act=tl.ReLU, name='dense1relu', in_channels=512)
        self.dense2 = Dense(84, act=tl.ReLU, name='dense2relu', in_channels=120)
        self.dense3 = Dense(10, act=None, name='output', in_channels=84)


    def forward(self, x):
        z = self.conv1(x)
        z = self.bn(z)
@@ -47,14 +50,16 @@ class CNN(Module):
        z = self.dense3(z)
        return z


 # training settings
 batch_size = 128
 n_epoch = 500
 shuffle_buffer_size = 128


 # prepare cifar10 data
 X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)


 def generator_train():
    inputs = X_train
    targets = y_train
@@ -73,6 +78,7 @@ def generator_test():
        # yield _input.encode('utf-8'), _target.encode('utf-8')
        yield _input, _target


 def _map_fn_train(img, target):
    # 1. Randomly crop a [height, width] section of the image.
    img = tf.image.random_crop(img, [24, 24, 3])
@@ -127,8 +133,6 @@ for epoch in range(n_epoch):
    for X_batch, y_batch in train_ds:
        X_batch = ms.Tensor(X_batch.numpy(), dtype=ms.float32)
        y_batch = ms.Tensor(y_batch.numpy(), dtype=ms.int32)
        X_batch = tl.nhwc_to_nchw(X_batch)
        y_batch = tl.nhwc_to_nchw(y_batch)
        output = net(X_batch)
        loss_output = criterion(output, y_batch)
        grads = train_network(X_batch, y_batch)
@@ -141,26 +145,3 @@ for epoch in range(n_epoch):
        print("   train loss: {}".format(train_loss / n_iter))
        print("   train acc:  {}".format(train_acc / n_iter))
        print(" loss ", loss)

 #     start_time = time.time()

 #     train_loss, train_acc, n_iter = 0, 0, 0
 #     for X_batch, y_batch in train_ds:
 #         net.set_train()

 #         with tf.GradientTape() as tape:
 #             # compute outputs
 #             _logits = net(X_batch)
 #             # compute loss and update model
 #             _loss_ce = tl.cost.cross_entropy(_logits, y_batch, name='train_loss')

 #         grad = tape.gradient(_loss_ce, train_weights)
 #         optimizer.apply_gradients(zip(grad, train_weights))

 #         train_loss += _loss_ce
 #         train_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
 #         n_iter += 1

 #         print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time))
 #         print("   train loss: {}".format(train_loss / n_iter))
 #         print("   train acc:  {}".format(train_acc / n_iter))
--- a/examples/basic_tutorials/tutorial_cifar10_cnn_paddle_backend.py
+++ b/examples/basic_tutorials/tutorial_cifar10_cnn_paddle_backend.py
@@ -0,0 +1,166 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 # The tensorlayer and tensorflow operators can be mixed
 import os
 os.environ['TL_BACKEND'] = 'paddle'

 import time
 import numpy as np
 import multiprocessing
 import tensorflow as tf
 import paddle as pd
 from tensorlayer.layers import Module
 import tensorlayer as tl
 from tensorlayer.layers import (Conv2d, Dense, Flatten, MaxPool2d, BatchNorm2d)

 # enable debug logging
 tl.logging.set_verbosity(tl.logging.DEBUG)

 # prepare cifar10 data
 X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)


 class CNN(Module):

    def __init__(self):
        super(CNN, self).__init__()
        # weights init
        W_init = tl.initializers.truncated_normal(stddev=5e-2)
        W_init2 = tl.initializers.truncated_normal(stddev=0.04)
        b_init2 = tl.initializers.constant(value=0.1)

        self.conv1 = Conv2d(64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='conv1', in_channels=3)
        self.bn1 = BatchNorm2d(num_features=64, act=tl.ReLU)
        self.maxpool1 = MaxPool2d((3, 3), (2, 2), padding='SAME', name='pool1')

        self.conv2 = Conv2d(
            64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='conv2', in_channels=64
        )
        self.bn2 = BatchNorm2d(num_features=64, act=tl.ReLU)
        self.maxpool2 = MaxPool2d((3, 3), (2, 2), padding='SAME', name='pool2')

        self.flatten = Flatten(name='flatten')
        self.dense1 = Dense(384, act=tl.ReLU, W_init=W_init2, b_init=b_init2, name='dense1relu', in_channels=2304)
        self.dense2 = Dense(192, act=tl.ReLU, W_init=W_init2, b_init=b_init2, name='dense2relu', in_channels=384)
        self.dense3 = Dense(10, act=None, W_init=W_init2, name='output', in_channels=192)

    def forward(self, x):
        z = self.conv1(x)
        z = self.bn1(z)
        z = self.maxpool1(z)
        z = self.conv2(z)
        z = self.bn2(z)
        z = self.maxpool2(z)
        z = self.flatten(z)
        z = self.dense1(z)
        z = self.dense2(z)
        z = self.dense3(z)
        return z


 def generator_train():
    inputs = X_train
    targets = y_train
    if len(inputs) != len(targets):
        raise AssertionError("The length of inputs and targets should be equal")
    for _input, _target in zip(inputs, targets):
        # yield _input.encode('utf-8'), _target.encode('utf-8')
        yield _input, _target


 def generator_test():
    inputs = X_test
    targets = y_test
    if len(inputs) != len(targets):
        raise AssertionError("The length of inputs and targets should be equal")
    for _input, _target in zip(inputs, targets):
        # yield _input.encode('utf-8'), _target.encode('utf-8')
        yield _input, _target


 def _map_fn_train(img, target):
    # 1. Randomly crop a [height, width] section of the image.
    img = tf.image.random_crop(img, [24, 24, 3])
    # 2. Randomly flip the image horizontally.
    img = tf.image.random_flip_left_right(img)
    # 3. Randomly change brightness.
    img = tf.image.random_brightness(img, max_delta=63)
    # 4. Randomly change contrast.
    img = tf.image.random_contrast(img, lower=0.2, upper=1.8)
    # 5. Subtract off the mean and divide by the variance of the pixels.
    img = tf.image.per_image_standardization(img)
    target = tf.reshape(target, ())
    return img, target


 def _map_fn_test(img, target):
    # 1. Crop the central [height, width] of the image.
    img = tf.image.resize_with_pad(img, 24, 24)
    # 2. Subtract off the mean and divide by the variance of the pixels.
    img = tf.image.per_image_standardization(img)
    img = tf.reshape(img, (24, 24, 3))
    target = tf.reshape(target, ())
    return img, target


 # get the network
 net = CNN()

 # training settings
 batch_size = 128
 n_epoch = 500
 learning_rate = 0.0001
 print_freq = 5
 shuffle_buffer_size = 128
 metrics = tl.metric.Accuracy()

 train_weights = net.trainable_weights
 optimizer = tl.optimizers.Adam(learning_rate)
 # looking for decay learning rate? see https://github.com/tensorlayer/srgan/blob/master/train.py

 # dataset API and augmentation
 train_ds = tf.data.Dataset.from_generator(
    generator_train, output_types=(tf.float32, tf.int32)
 )  # , output_shapes=((24, 24, 3), (1)))
 train_ds = train_ds.map(_map_fn_train, num_parallel_calls=multiprocessing.cpu_count())
 # train_ds = train_ds.repeat(n_epoch)
 train_ds = train_ds.shuffle(shuffle_buffer_size)
 train_ds = train_ds.prefetch(buffer_size=4096)
 train_ds = train_ds.batch(batch_size)
 # value = train_ds.make_one_shot_iterator().get_next()

 test_ds = tf.data.Dataset.from_generator(
    generator_test, output_types=(tf.float32, tf.int32)
 )  # , output_shapes=((24, 24, 3), (1)))
 # test_ds = test_ds.shuffle(shuffle_buffer_size)
 test_ds = test_ds.map(_map_fn_test, num_parallel_calls=multiprocessing.cpu_count())
 # test_ds = test_ds.repeat(n_epoch)
 test_ds = test_ds.prefetch(buffer_size=4096)
 test_ds = test_ds.batch(batch_size)
 # value_test = test_ds.make_one_shot_iterator().get_next()

 for epoch in range(n_epoch):
    train_loss, train_acc, n_iter = 0, 0, 0
    for X_batch, y_batch in test_ds:
        start_time = time.time()
        X_batch = pd.to_tensor(X_batch.numpy(), dtype=tl.float32)
        y_batch = pd.to_tensor(y_batch.numpy(), dtype=tl.int64)
        net.set_train()

        output = net(X_batch)
        loss = pd.nn.functional.cross_entropy(output, y_batch)
        loss_ce = loss.numpy()
        params_grads = optimizer.gradient(loss, train_weights)
        optimizer.apply_gradients(params_grads)

        train_loss += loss_ce

        if metrics:
            metrics.update(output, y_batch)
            train_acc += metrics.result()
            metrics.reset()
        n_iter += 1

        print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time))
        print("   train loss: {}".format(train_loss / n_iter))
        print("   train acc:  {}".format(train_acc / n_iter))
--- a/examples/basic_tutorials/tutorial_cifar10_cnn_tensorflow_backend.py
+++ b/examples/basic_tutorials/tutorial_cifar10_cnn_tensorflow_backend.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 # The tensorlayer and tensorflow operators can be mixed
 import os
 os.environ['TL_BACKEND'] = 'tensorflow'

 import time
 import numpy as np
@@ -12,7 +15,6 @@ from tensorlayer.layers import (Conv2d, Dense, Flatten, MaxPool2d, BatchNorm2d)

 # enable debug logging
 tl.logging.set_verbosity(tl.logging.DEBUG)
 tl.logging.set_verbosity(tl.logging.DEBUG)

 # prepare cifar10 data
 X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
@@ -148,7 +150,7 @@ for epoch in range(n_epoch):
            # compute outputs
            _logits = net(X_batch)
            # compute loss and update model
            _loss_ce = tl.cost.cross_entropy(_logits, y_batch, name='train_loss')
            _loss_ce = tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='train_loss')

        grad = tape.gradient(_loss_ce, train_weights)
        optimizer.apply_gradients(zip(grad, train_weights))
@@ -164,22 +166,22 @@ for epoch in range(n_epoch):
    # use training and evaluation sets to evaluate the model every print_freq epoch
    if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:

        net.eval()
        net.set_eval()
        val_loss, val_acc, n_iter = 0, 0, 0
        for X_batch, y_batch in test_ds:
            _logits = net(X_batch)  # is_train=False, disable dropout
            val_loss += tl.cost.cross_entropy(_logits, y_batch, name='eval_loss')
            val_loss += tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='eval_loss')
            val_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
            n_iter += 1
        print("   val loss: {}".format(val_loss / n_iter))
        print("   val acc:  {}".format(val_acc / n_iter))

 # use testing data to evaluate the model
 net.eval()
 net.set_eval()
 test_loss, test_acc, n_iter = 0, 0, 0
 for X_batch, y_batch in test_ds:
    _logits = net(X_batch)
    test_loss += tl.cost.cross_entropy(_logits, y_batch, name='test_loss')
    test_loss += tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='test_loss')
    test_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
    n_iter += 1
 print("   test loss: {}".format(test_loss / n_iter))
--- a/examples/basic_tutorials/tutorial_cifar10_cnn_tensorlayer.py
+++ b/examples/basic_tutorials/tutorial_cifar10_cnn_tensorlayer.py
@@ -0,0 +1,181 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 import os
 # os.environ['TL_BACKEND'] = 'paddle'
 os.environ['TL_BACKEND'] = 'tensorflow'
 # os.environ['TL_BACKEND'] = 'mindspore'

 import time
 import multiprocessing
 import tensorflow as tf

 from tensorlayer.models import TrainOneStep
 from tensorlayer.layers import Module
 import tensorlayer as tl
 from tensorlayer.layers import (Conv2d, Dense, Flatten, MaxPool2d, BatchNorm2d)

 # enable debug logging
 tl.logging.set_verbosity(tl.logging.DEBUG)

 # prepare cifar10 data
 X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)


 class CNN(Module):

    def __init__(self):
        super(CNN, self).__init__()
        # weights init
        W_init = tl.initializers.truncated_normal(stddev=5e-2)
        W_init2 = tl.initializers.truncated_normal(stddev=0.04)
        b_init2 = tl.initializers.constant(value=0.1)

        self.conv1 = Conv2d(64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='conv1', in_channels=3)
        self.bn = BatchNorm2d(num_features=64, act=tl.ReLU)
        self.maxpool1 = MaxPool2d((3, 3), (2, 2), padding='SAME', name='pool1')

        self.conv2 = Conv2d(
            64, (5, 5), (1, 1), padding='SAME', act=tl.ReLU, W_init=W_init, b_init=None, name='conv2', in_channels=64
        )
        self.maxpool2 = MaxPool2d((3, 3), (2, 2), padding='SAME', name='pool2')

        self.flatten = Flatten(name='flatten')
        self.dense1 = Dense(384, act=tl.ReLU, W_init=W_init2, b_init=b_init2, name='dense1relu', in_channels=2304)
        self.dense2 = Dense(192, act=tl.ReLU, W_init=W_init2, b_init=b_init2, name='dense2relu', in_channels=384)
        self.dense3 = Dense(10, act=None, W_init=W_init2, name='output', in_channels=192)

    def forward(self, x):
        z = self.conv1(x)
        z = self.bn(z)
        z = self.maxpool1(z)
        z = self.conv2(z)
        z = self.maxpool2(z)
        z = self.flatten(z)
        z = self.dense1(z)
        z = self.dense2(z)
        z = self.dense3(z)
        return z


 # get the network
 net = CNN()

 # training settings
 batch_size = 128
 n_epoch = 500
 learning_rate = 0.0001
 print_freq = 5
 n_step_epoch = int(len(y_train) / batch_size)
 n_step = n_epoch * n_step_epoch
 shuffle_buffer_size = 128

 train_weights = net.trainable_weights
 optimizer = tl.optimizers.Adam(learning_rate)
 # looking for decay learning rate? see https://github.com/tensorlayer/srgan/blob/master/train.py
 metrics = tl.metric.Accuracy()


 def generator_train():
    inputs = X_train
    targets = y_train
    if len(inputs) != len(targets):
        raise AssertionError("The length of inputs and targets should be equal")
    for _input, _target in zip(inputs, targets):
        # yield _input.encode('utf-8'), _target.encode('utf-8')
        yield _input, _target


 def generator_test():
    inputs = X_test
    targets = y_test
    if len(inputs) != len(targets):
        raise AssertionError("The length of inputs and targets should be equal")
    for _input, _target in zip(inputs, targets):
        # yield _input.encode('utf-8'), _target.encode('utf-8')
        yield _input, _target


 def _map_fn_train(img, target):
    # 1. Randomly crop a [height, width] section of the image.
    img = tf.image.random_crop(img, [24, 24, 3])
    # 2. Randomly flip the image horizontally.
    img = tf.image.random_flip_left_right(img)
    # 3. Randomly change brightness.
    img = tf.image.random_brightness(img, max_delta=63)
    # 4. Randomly change contrast.
    img = tf.image.random_contrast(img, lower=0.2, upper=1.8)
    # 5. Subtract off the mean and divide by the variance of the pixels.
    img = tf.image.per_image_standardization(img)
    target = tf.reshape(target, ())
    return img, target


 def _map_fn_test(img, target):
    # 1. Crop the central [height, width] of the image.
    img = tf.image.resize_with_pad(img, 24, 24)
    # 2. Subtract off the mean and divide by the variance of the pixels.
    img = tf.image.per_image_standardization(img)
    img = tf.reshape(img, (24, 24, 3))
    target = tf.reshape(target, ())
    return img, target


 # dataset API and augmentation
 train_ds = tf.data.Dataset.from_generator(
    generator_train, output_types=(tf.float32, tf.int32)
 )  # , output_shapes=((24, 24, 3), (1)))
 train_ds = train_ds.map(_map_fn_train, num_parallel_calls=multiprocessing.cpu_count())
 # train_ds = train_ds.repeat(n_epoch)
 train_ds = train_ds.shuffle(shuffle_buffer_size)
 train_ds = train_ds.prefetch(buffer_size=4096)
 train_ds = train_ds.batch(batch_size)
 # value = train_ds.make_one_shot_iterator().get_next()

 test_ds = tf.data.Dataset.from_generator(
    generator_test, output_types=(tf.float32, tf.int32)
 )  # , output_shapes=((24, 24, 3), (1)))
 # test_ds = test_ds.shuffle(shuffle_buffer_size)
 test_ds = test_ds.map(_map_fn_test, num_parallel_calls=multiprocessing.cpu_count())
 # test_ds = test_ds.repeat(n_epoch)
 test_ds = test_ds.prefetch(buffer_size=4096)
 test_ds = test_ds.batch(batch_size)
 # value_test = test_ds.make_one_shot_iterator().get_next()


 class WithLoss(Module):

    def __init__(self, net, loss_fn):
        super(WithLoss, self).__init__()
        self._net = net
        self._loss_fn = loss_fn

    def forward(self, data, label):
        out = self._net(data)
        loss = self._loss_fn(out, label)
        return loss


 net_with_loss = WithLoss(net, loss_fn=tl.cost.softmax_cross_entropy_with_logits)
 net_with_train = TrainOneStep(net_with_loss, optimizer, train_weights)

 for epoch in range(n_epoch):
    start_time = time.time()
    net.set_train()
    train_loss, train_acc, n_iter = 0, 0, 0
    for X_batch, y_batch in train_ds:

        X_batch = tl.ops.convert_to_tensor(X_batch.numpy(), dtype=tl.float32)
        y_batch = tl.ops.convert_to_tensor(y_batch.numpy(), dtype=tl.int64)

        _loss_ce = net_with_train(X_batch, y_batch)
        train_loss += _loss_ce

        n_iter += 1
        _logits = net(X_batch)
        metrics.update(_logits, y_batch)
        train_acc += metrics.result()
        metrics.reset()
        print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time))
        print("   train loss: {}".format(train_loss / n_iter))
        print("   train acc:  {}".format(train_acc / n_iter))
--- a/examples/basic_tutorials/tutorial_dataflow.py
+++ b/examples/basic_tutorials/tutorial_dataflow.py
@@ -0,0 +1,84 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 import os
 os.environ['TL_BACKEND'] = 'tensorflow'
 # os.environ['TL_BACKEND'] = 'mindspore'
 # os.environ['TL_BACKEND'] = 'paddle'

 import tensorlayer as tl
 from tensorlayer.layers import Module
 from tensorlayer.layers import Dense, Flatten
 from tensorlayer.vision.transforms import Normalize, Compose
 from tensorlayer.dataflow import Dataset, IterableDataset

 transform = Compose([Normalize(mean=[127.5], std=[127.5], data_format='HWC')])

 print('download training data and load training data')

 X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))
 X_train = X_train * 255

 print('load finished')


 class mnistdataset(Dataset):

    def __init__(self, data=X_train, label=y_train, transform=transform):
        self.data = data
        self.label = label
        self.transform = transform

    def __getitem__(self, index):
        data = self.data[index].astype('float32')
        data = self.transform(data)
        label = self.label[index].astype('int64')

        return data, label

    def __len__(self):

        return len(self.data)


 class mnistdataset1(IterableDataset):

    def __init__(self, data=X_train, label=y_train, transform=transform):
        self.data = data
        self.label = label
        self.transform = transform

    def __iter__(self):

        for i in range(len(self.data)):
            data = self.data[i].astype('float32')
            data = self.transform(data)
            label = self.label[i].astype('int64')
            yield data, label


 class MLP(Module):

    def __init__(self):
        super(MLP, self).__init__()
        self.linear1 = Dense(n_units=120, in_channels=784, act=tl.ReLU)
        self.linear2 = Dense(n_units=84, in_channels=120, act=tl.ReLU)
        self.linear3 = Dense(n_units=10, in_channels=84)
        self.flatten = Flatten()

    def forward(self, x):
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.linear3(x)
        return x


 train_dataset = mnistdataset1(data=X_train, label=y_train, transform=transform)
 train_dataset = tl.dataflow.FromGenerator(
    train_dataset, output_types=[tl.float32, tl.int64], column_names=['data', 'label']
 )
 train_loader = tl.dataflow.Dataloader(train_dataset, batch_size=128, shuffle=False)

 for i in train_loader:
    print(i[0].shape, i[1])
--- a/examples/basic_tutorials/tutorial_mnist_gan_tensorlayer.py
+++ b/examples/basic_tutorials/tutorial_mnist_gan_tensorlayer.py
@@ -0,0 +1,154 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 import os
 # os.environ['TL_BACKEND'] = 'paddle'
 os.environ['TL_BACKEND'] = 'tensorflow'
 # os.environ['TL_BACKEND'] = 'mindspore'

 import time
 import numpy as np
 import tensorlayer as tl
 from tensorlayer.layers import Module, Dense
 from tensorlayer.dataflow import Dataset
 from tensorlayer.models import TrainOneStep

 X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))


 class mnistdataset(Dataset):

    def __init__(self, data=X_train, label=y_train):
        self.data = data
        self.label = label

    def __getitem__(self, index):
        data = self.data[index].astype('float32')
        label = self.label[index].astype('int64')
        return data, label

    def __len__(self):
        return len(self.data)


 batch_size = 128
 train_dataset = mnistdataset(data=X_train, label=y_train)
 train_dataset = tl.dataflow.FromGenerator(
    train_dataset, output_types=[tl.float32, tl.int64], column_names=['data', 'label']
 )
 train_loader = tl.dataflow.Dataloader(train_dataset, batch_size=batch_size, shuffle=True)


 class generator(Module):

    def __init__(self):
        super(generator, self).__init__()
        self.g_fc1 = Dense(n_units=256, in_channels=100, act=tl.ReLU)
        self.g_fc2 = Dense(n_units=256, in_channels=256, act=tl.ReLU)
        self.g_fc3 = Dense(n_units=784, in_channels=256, act=tl.Tanh)

    def forward(self, x):
        out = self.g_fc1(x)
        out = self.g_fc2(out)
        out = self.g_fc3(out)
        return out


 class discriminator(Module):

    def __init__(self):
        super(discriminator, self).__init__()
        self.d_fc1 = Dense(n_units=256, in_channels=784, act=tl.LeakyReLU)
        self.d_fc2 = Dense(n_units=256, in_channels=256, act=tl.LeakyReLU)
        self.d_fc3 = Dense(n_units=1, in_channels=256, act=tl.Sigmoid)

    def forward(self, x):
        out = self.d_fc1(x)
        out = self.d_fc2(out)
        out = self.d_fc3(out)
        return out


 G = generator()
 D = discriminator()


 class WithLossG(Module):

    def __init__(self, G, D, loss_fn):
        super(WithLossG, self).__init__()
        self.g_net = G
        self.d_net = D
        self.loss_fn = loss_fn

    def forward(self, g_data, label):
        fake_image = self.g_net(g_data)
        logits_fake = self.d_net(fake_image)
        valid = tl.convert_to_tensor(np.ones(logits_fake.shape), dtype=tl.float32)
        loss = self.loss_fn(logits_fake, valid)
        return loss


 class WithLossD(Module):

    def __init__(self, G, D, loss_fn):
        super(WithLossD, self).__init__()
        self.g_net = G
        self.d_net = D
        self.loss_fn = loss_fn

    def forward(self, real_data, g_data):
        logits_real = self.d_net(real_data)
        fake_image = self.g_net(g_data)
        logits_fake = self.d_net(fake_image)

        valid = tl.convert_to_tensor(np.ones(logits_real.shape), dtype=tl.float32)
        fake = tl.convert_to_tensor(np.zeros(logits_fake.shape), dtype=tl.float32)

        loss = self.loss_fn(logits_real, valid) + self.loss_fn(logits_fake, fake)
        return loss


 # loss_fn = tl.cost.sigmoid_cross_entropy
 # optimizer = tl.optimizers.Momentum(learning_rate=5e-4, momentum=0.5)
 loss_fn = tl.cost.mean_squared_error
 optimizer_g = tl.optimizers.Adam(learning_rate=3e-4, beta_1=0.5, beta_2=0.999)
 optimizer_d = tl.optimizers.Adam(learning_rate=3e-4)

 g_weights = G.trainable_weights
 d_weights = D.trainable_weights
 net_with_loss_G = WithLossG(G, D, loss_fn)
 net_with_loss_D = WithLossD(G, D, loss_fn)
 train_one_setp_g = TrainOneStep(net_with_loss_G, optimizer_g, g_weights)
 train_one_setp_d = TrainOneStep(net_with_loss_D, optimizer_d, d_weights)
 n_epoch = 50


 def plot_fake_image(fake_image, num):
    fake_image = tl.reshape(fake_image, shape=(num, 28, 28))
    fake_image = tl.convert_to_numpy(fake_image)
    import matplotlib.pylab as plt
    for i in range(num):
        plt.subplot(int(np.sqrt(num)), int(np.sqrt(num)), i + 1)
        plt.imshow(fake_image[i])
    plt.show()


 for epoch in range(n_epoch):
    d_loss, g_loss = 0.0, 0.0
    n_iter = 0
    start_time = time.time()
    for data, label in train_loader:
        noise = tl.convert_to_tensor(np.random.random(size=(batch_size, 100)), dtype=tl.float32)

        _loss_d = train_one_setp_d(data, noise)
        _loss_g = train_one_setp_g(noise, label)
        d_loss += _loss_d
        g_loss += _loss_g

        n_iter += 1
        print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time))
        print("   d loss: {}".format(d_loss / n_iter))
        print("   g loss:  {}".format(g_loss / n_iter))
    fake_image = G(tl.convert_to_tensor(np.random.random(size=(36, 100)), dtype=tl.float32))
    plot_fake_image(fake_image, 36)
--- a/examples/basic_tutorials/tutorial_mnist_mlp_dynamci_dragon.py
+++ b/examples/basic_tutorials/tutorial_mnist_mlp_dynamci_dragon.py
@@ -1,100 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import os
 os.environ['TL_BACKEND'] = 'dragon'

 from tensorlayer.layers import Module
 from tensorlayer.layers import Dense
 import tensorlayer as tl
 import dragon as dg
 import time
 import argparse
 import numpy as np

 X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))


 class CustomModel(Module):

    def __init__(self):
        super(CustomModel, self).__init__()
        self.dense1 = Dense(n_units=800, act=tl.ReLU, in_channels=784)
        self.dense2 = Dense(n_units=800, act=tl.ReLU, in_channels=800)
        self.dense3 = Dense(n_units=10, act=tl.ReLU, in_channels=800)

    def forward(self, x, foo=None):
        z = self.dense1(x)
        z = self.dense2(z)
        out = self.dense3(z)
        return out


 def parse_args():
    """Parse the arguments."""
    parser = argparse.ArgumentParser(description='Train a cifar10 resnet')
    parser.add_argument('--execution', default='EAGER_MODE', type=str, help='The execution mode')
    parser.add_argument('--seed', default=1337, type=int, help='The random seed')
    parser.add_argument('--cuda', default=-1, type=int, help='The cuda device to use')
    return parser.parse_args()


 class Classifier(object):
    """The base classifier class."""

    # TensorSpec for graph execution
    image_spec = dg.Tensor([None, 3, 32, 32], 'float32')
    label_spec = dg.Tensor([None], 'int64')

    def __init__(self, optimizer):
        super(Classifier, self).__init__()
        self.net = CustomModel()
        self.optimizer = optimizer
        self.params = self.net.trainable_weights

    def step(self, image, label):
        with dg.GradientTape() as tape:
            logit = self.net(image)
            # logit = dg.cast(logit, 'float64')
            logit = dg.cast(dg.math.argmax(logit, -1), 'int64')
            label = dg.cast(label, 'int64')
            # print("logit :\n", logit, label)
            # loss = dg.losses.smooth_l1_loss([logit, label])
            loss = dg.math.sum(logit - label)  # dg.losses.sparse_softmax_cross_entropy([logit, label])
        accuracy = dg.math.mean(dg.math.equal([logit, label]).astype('float32'))
        grads = tape.gradient(loss, self.params)
        self.optimizer.apply_gradients(zip(self.params, grads))
        return loss, accuracy, self.optimizer


 if __name__ == '__main__':
    args = parse_args()
    dg.logging.info('Called with args:\n' + str(args))

    np.random.seed(args.seed)
    dg.autograph.set_execution(args.execution)
    dg.cuda.set_default_device(args.cuda)

    # Define the model
    model = Classifier(dg.optimizers.SGD(base_lr=0.01, momentum=0.9, weight_decay=1e-4))

    # Compile for graph execution if necessary
    if args.execution == 'GRAPH_MODE':
        model.step = dg.function(
            func=model.step,
            input_signature=[model.image_spec, model.label_spec],
        )

    # Main loop
    import tensorflow as tf
    batch_size = 200
    for i in range(50):
        for X_batch, y_batch in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
            image = dg.EagerTensor(X_batch, copy=False)
            label = dg.EagerTensor(y_batch, copy=False, dtype='float32')
            loss, accuracy, _ = model.step(image, label)
            if i % 20 == 0:
                dg.logging.info(
                    'Iteration %d, lr = %s, loss = %.5f, accuracy = %.3f' %
                    (i, str(model.optimizer.base_lr), loss, accuracy)
                )
--- a/examples/basic_tutorials/tutorial_mnist_mlp_mindspore.py
+++ b/examples/basic_tutorials/tutorial_mnist_mlp_mindspore.py
@@ -1,117 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import numpy as np
 import mindspore.nn as nn
 import mindspore.ops.operations as P
 from mindspore.ops import composite as C
 from mindspore.common import dtype as mstype
 from mindspore import context, Tensor, ParameterTuple
 from mindspore.common.initializer import TruncatedNormal
 from mindspore.nn import Dense, WithLossCell, SoftmaxCrossEntropyWithLogits, Momentum
 import tensorlayer as tl
 import mindspore as ms
 import tensorflow as tf
 import time

 context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")


 def fc_with_initialize(input_channels, out_channels):
    """weight initial for fc layer"""
    weight = weight_variable()
    bias = weight_variable()
    return nn.Dense(input_channels, out_channels, weight, bias)


 def weight_variable():
    """weight initial"""
    return TruncatedNormal(0.02)


 class MLP(nn.Cell):
    """
    Lenet network
    Args:
        num_class (int): Num classes. Default: 10.

    Returns:
        Tensor, output tensor

    Examples:
        >>> MLP(num_class=10)
    """

    def __init__(self, num_class=10):
        super(MLP, self).__init__()
        self.num_class = num_class
        self.fc1 = fc_with_initialize(784, 800)
        self.fc2 = fc_with_initialize(800, 800)
        self.fc3 = fc_with_initialize(800, self.num_class)
        self.relu = nn.ReLU()

    def construct(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x


 class GradWrap(nn.Cell):
    """ GradWrap definition """

    def __init__(self, network):
        super(GradWrap, self).__init__(auto_prefix=False)
        self.network = network
        self.weights = ParameterTuple(filter(lambda x: x.requires_grad, network.get_parameters()))

    def construct(self, x, label):
        weights = self.weights
        return C.GradOperation('get_by_list', get_by_list=True)(self.network, weights)(x, label)


 def generator_train():
    inputs = X_train
    targets = y_train
    if len(inputs) != len(targets):
        raise AssertionError("The length of inputs and targets should be equal")
    for _input, _target in zip(inputs, targets):
        yield _input, _target


 net = MLP()
 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9)
 criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
 net_with_criterion = WithLossCell(net, criterion)
 train_network = GradWrap(net_with_criterion)
 train_network.set_train()

 X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))
 train_ds = tf.data.Dataset.from_generator(generator_train, output_types=(tf.float32, tf.int32))
 shuffle_buffer_size = 128
 batch_size = 128
 train_ds = train_ds.shuffle(shuffle_buffer_size)
 train_ds = train_ds.batch(batch_size)
 n_epoch = 50

 for epoch in range(n_epoch):
    start_time = time.time()
    train_network.set_train()
    train_loss, train_acc, n_iter = 0, 0, 0
    for X_batch, y_batch in train_ds:
        X_batch = ms.Tensor(X_batch.numpy(), dtype=ms.float32)
        y_batch = ms.Tensor(y_batch.numpy(), dtype=ms.int32)
        output = net(X_batch)
        loss_output = criterion(output, y_batch)
        grads = train_network(X_batch, y_batch)
        success = optimizer(grads)
        loss = loss_output.asnumpy()
        train_loss += loss
        n_iter += 1
        # train_acc += np.mean((P.Equal()(P.Argmax(axis=1)(output), y_batch).asnumpy()))
        print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time))
        print("   train loss: {}".format(train_loss / n_iter))
        # print("   train acc:  {}".format(train_acc / n_iter))
        print(" triain weights ", train_network.trainable_params()[0].data)
--- a/examples/basic_tutorials/tutorial_mnist_mlp_dynamic_MS_backend.py
+++ b/examples/basic_tutorials/tutorial_mnist_mlp_dynamic_MS_backend.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python3
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 import mindspore.nn as nn
 import os
 os.environ['TL_BACKEND'] = 'mindspore'

 import mindspore.ops.operations as P
 from mindspore.ops import composite as C
 from mindspore.common import dtype as mstype
 from mindspore import context, Tensor, ParameterTuple
 from mindspore.common.initializer import TruncatedNormal
 from mindspore.nn import SoftmaxCrossEntropyWithLogits, Momentum, WithLossCell
 from mindspore import ParameterTuple
 from mindspore.nn import Momentum, WithLossCell

 import numpy as np
 import tensorlayer as tl
--- a/examples/basic_tutorials/tutorial_mnist_mlp_paddlepaddle_backend.py
+++ b/examples/basic_tutorials/tutorial_mnist_mlp_paddlepaddle_backend.py
@@ -1,19 +1,23 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 # The tensorlayer and Paddle operators can be mixed

 import os
 os.environ['TL_BACKEND'] = 'paddle'
 # os.environ['TL_BACKEND'] = 'tensorflow'

 import tensorlayer as tl
 from tensorlayer.layers import Module
 from tensorlayer.layers import Dense, Flatten
 import paddle
 from paddle.io import TensorDataset

 print('download training data and load training data')

 X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))

 print('load finished')
 X_train = paddle.to_tensor(X_train.astype('float32'))
 y_train = paddle.to_tensor(y_train.astype('int64'))


 class MLP(Module):
@@ -33,11 +37,16 @@ class MLP(Module):
        return x


 traindataset = tl.dataflow.FromSlices((X_train, y_train))
 train_loader = tl.dataflow.Dataloader(traindataset, batch_size=64, shuffle=True)
 traindataset = paddle.io.TensorDataset([X_train, y_train])
 train_loader = paddle.io.DataLoader(traindataset, batch_size=64, shuffle=True)
 net = MLP()

 optimizer = tl.optimizers.Adam(learning_rate=0.001)
 metric = tl.metric.Accuracy()
 model = tl.models.Model(network=net, loss_fn=tl.cost.cross_entropy, optimizer=optimizer, metrics=metric)
 model.train(n_epoch=20, train_dataset=train_loader, print_freq=5, print_train_batch=True)
 model = tl.models.Model(
    network=net, loss_fn=tl.cost.softmax_cross_entropy_with_logits, optimizer=optimizer, metrics=metric
 )
 model.train(n_epoch=2, train_dataset=train_loader, print_freq=5, print_train_batch=True)
 model.save_weights('./model_mlp.npz', format='npz_dict')
 model.load_weights('./model_mlp.npz', format='npz_dict')
 # model.eval(train_loader)
--- a/examples/basic_tutorials/tutorial_mnist_mlp_tensorflow_backend.py
+++ b/examples/basic_tutorials/tutorial_mnist_mlp_tensorflow_backend.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 # The tensorlayer and tensorflow operators can be mixed
 import os
 os.environ['TL_BACKEND'] = 'tensorflow'

 import numpy as np
 import time
@@ -53,18 +56,17 @@ for epoch in range(n_epoch):  ## iterate the dataset n_epoch times
            ## compute outputs
            _logits = MLP(X_batch)
            ## compute loss and update model
            _loss = tl.cost.cross_entropy(_logits, y_batch, name='train_loss')
            _loss = tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='train_loss')
        grad = tape.gradient(_loss, train_weights)
        optimizer.apply_gradients(zip(grad, train_weights))

    ## use training and evaluation sets to evaluate the model every print_freq epoch
    if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
        MLP.set_train()
        print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time))
        train_loss, train_acc, n_iter = 0, 0, 0
        for X_batch, y_batch in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=False):
            _logits = MLP(X_batch)
            train_loss += tl.cost.cross_entropy(_logits, y_batch, name='eval_loss')
            train_loss += tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='eval_loss')
            train_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
            n_iter += 1
        print("   train loss: {}".format(train_loss / n_iter))
@@ -73,19 +75,19 @@ for epoch in range(n_epoch):  ## iterate the dataset n_epoch times
        val_loss, val_acc, n_iter = 0, 0, 0
        for X_batch, y_batch in tl.iterate.minibatches(X_val, y_val, batch_size, shuffle=False):
            _logits = MLP(X_batch)  # is_train=False, disable dropout
            val_loss += tl.cost.cross_entropy(_logits, y_batch, name='eval_loss')
            val_loss += tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='eval_loss')
            val_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
            n_iter += 1
        print("   val loss: {}".format(val_loss / n_iter))
        print("   val acc:  {}".format(val_acc / n_iter))

 ## use testing data to evaluate the model
 MLP.eval()
 MLP.set_eval()
 test_loss, test_acc, n_iter = 0, 0, 0
 for X_batch, y_batch in tl.iterate.minibatches(X_test, y_test, batch_size, shuffle=False):
    _logits = MLP(X_batch, foo=1)
    test_loss += tl.cost.cross_entropy(_logits, y_batch, name='test_loss')
    test_loss += tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='test_loss')
    test_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
    n_iter += 1
 print("   test foo=1 loss: {}".format(val_loss / n_iter))
 print("   test foo=1 acc:  {}".format(val_acc / n_iter))
 print("   test foo=1 loss: {}".format(test_loss / n_iter))
 print("   test foo=1 acc:  {}".format(test_acc / n_iter))
--- a/examples/basic_tutorials/tutorial_mnist_simple.py
+++ b/examples/basic_tutorials/tutorial_mnist_simple.py
@@ -1,18 +1,37 @@
 #!/usr/bin/env python3
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 import numpy as np
 # The same set of code can switch the backend with one line
 import os
 os.environ['TL_BACKEND'] = 'tensorflow'
 # os.environ['TL_BACKEND'] = 'tensorflow'
 # os.environ['TL_BACKEND'] = 'mindspore'
 os.environ['TL_BACKEND'] = 'paddle'

 import tensorlayer as tl
 from tensorlayer.layers import Module
 from tensorlayer.layers import Dense, Dropout
 from tensorlayer.dataflow import Dataset

 X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))


 class mnistdataset(Dataset):

    def __init__(self, data=X_train, label=y_train):
        self.data = data
        self.label = label

    def __getitem__(self, index):
        data = self.data[index].astype('float32')
        label = self.label[index].astype('int64')

        return data, label

    def __len__(self):

        return len(self.data)


 class CustomModel(Module):

    def __init__(self):
@@ -27,7 +46,6 @@ class CustomModel(Module):
    def forward(self, x, foo=None):
        z = self.dropout1(x)
        z = self.dense1(z)
        # z = self.bn(z)
        z = self.dropout2(z)
        z = self.dense2(z)
        z = self.dropout3(z)
@@ -37,32 +55,23 @@ class CustomModel(Module):
        return out


 def generator_train():
    inputs = X_train
    targets = y_train
    if len(inputs) != len(targets):
        raise AssertionError("The length of inputs and targets should be equal")
    for _input, _target in zip(inputs, targets):
        yield (_input, np.array(_target))


 MLP = CustomModel()

 n_epoch = 50
 batch_size = 128
 print_freq = 2
 shuffle_buffer_size = 128

 train_weights = MLP.trainable_weights
 optimizer = tl.optimizers.Momentum(0.05, 0.9)
 train_ds = tl.dataflow.FromGenerator(
    generator_train, output_types=(tl.float32, tl.int32) , column_names=['data', 'label']
 metric = tl.metric.Accuracy()
 loss_fn = tl.cost.softmax_cross_entropy_with_logits
 train_dataset = mnistdataset(data=X_train, label=y_train)
 train_dataset = tl.dataflow.FromGenerator(
    train_dataset, output_types=[tl.float32, tl.int64], column_names=['data', 'label']
 )
 train_ds = tl.dataflow.Shuffle(train_ds,shuffle_buffer_size)
 train_ds = tl.dataflow.Batch(train_ds,batch_size)

 train_loader = tl.dataflow.Dataloader(train_dataset, batch_size=batch_size, shuffle=True)

 model = tl.models.Model(network=MLP, loss_fn=tl.cost.cross_entropy, optimizer=optimizer)
 model.train(n_epoch=n_epoch, train_dataset=train_ds, print_freq=print_freq, print_train_batch=False)
 model = tl.models.Model(network=MLP, loss_fn=loss_fn, optimizer=optimizer, metrics=metric)
 model.train(n_epoch=n_epoch, train_dataset=train_loader, print_freq=print_freq, print_train_batch=False)
 model.save_weights('./model.npz', format='npz_dict')
 model.load_weights('./model.npz', format='npz_dict')
--- a/examples/basic_tutorials/tutorial_nested_usage_of_Layer.py
+++ b/examples/basic_tutorials/tutorial_nested_usage_of_Layer.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python3
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 import os
 os.environ['TL_BACKEND'] = 'tensorflow'

 import time
 import numpy as np
@@ -12,7 +14,9 @@ from tensorlayer.layers import (Conv2d, Dense, Flatten, MaxPool2d, BatchNorm2d,

 X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)


 class Block(Module):

    def __init__(self, in_channels):
        super(Block, self).__init__()
        self.dense1 = Dense(in_channels=in_channels, n_units=256)
@@ -83,6 +87,7 @@ class CNN(Module):

 # get the network
 net = CNN()
 print(net)
 # training settings
 batch_size = 128
 n_epoch = 500
@@ -173,7 +178,7 @@ for epoch in range(n_epoch):
            # compute outputs
            _logits = net(X_batch)
            # compute loss and update model
            _loss_ce = tl.cost.cross_entropy(_logits, y_batch, name='train_loss')
            _loss_ce = tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='train_loss')

        grad = tape.gradient(_loss_ce, train_weights)
        optimizer.apply_gradients(zip(grad, train_weights))
@@ -189,23 +194,23 @@ for epoch in range(n_epoch):
    # use training and evaluation sets to evaluate the model every print_freq epoch
    if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:

        net.eval()
        net.set_eval()
        val_loss, val_acc, n_iter = 0, 0, 0
        for X_batch, y_batch in test_ds:
            _logits = net(X_batch)  # is_train=False, disable dropout
            val_loss += tl.cost.cross_entropy(_logits, y_batch, name='eval_loss')
            val_loss += tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='eval_loss')
            val_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
            n_iter += 1
        print("   val loss: {}".format(val_loss / n_iter))
        print("   val acc:  {}".format(val_acc / n_iter))

 # use testing data to evaluate the model
 net.eval()
 net.set_eval()
 test_loss, test_acc, n_iter = 0, 0, 0
 for X_batch, y_batch in test_ds:
    _logits = net(X_batch)
    test_loss += tl.cost.cross_entropy(_logits, y_batch, name='test_loss')
    test_loss += tl.cost.softmax_cross_entropy_with_logits(_logits, y_batch, name='test_loss')
    test_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch))
    n_iter += 1
 print("   test loss: {}".format(test_loss / n_iter))
 print("   test acc:  {}".format(test_acc / n_iter))
 print("   test acc:  {}".format(test_acc / n_iter))
--- a/examples/model_zoo/init.py
+++ b/examples/model_zoo/init.py
@@ -0,0 +1,6 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 from .vgg import vgg16, vgg19
 from .yolo import YOLOv4
 from .resnet import ResNet50
--- a/examples/model_zoo/common.py
+++ b/examples/model_zoo/common.py
@@ -6,6 +6,7 @@ import colorsys, random, cv2
 import numpy as np
 from tensorlayer.visualize import save_image


 def decode_tf(conv_output, output_size, NUM_CLASS, STRIDES, ANCHORS, i=0, XYSCALE=[1, 1, 1]):
    batch_size = tf.shape(conv_output)[0]
    conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
@@ -284,4 +285,4 @@ def draw_boxes_and_labels_to_image_with_json(image, json_result, class_list, sav
    if save_name is not None:
        save_image(image, save_name)

    return image
    return image
--- a/examples/model_zoo/pretrained_resnet50.py
+++ b/examples/model_zoo/pretrained_resnet50.py
@@ -14,7 +14,7 @@ from examples.model_zoo.resnet import ResNet50
 tl.logging.set_verbosity(tl.logging.DEBUG)

 # get the whole model
 resnet = ResNet50(pretrained=False)
 resnet = ResNet50(pretrained=True)
 resnet.set_eval()

 img1 = tl.vis.read_image('data/tiger.jpeg')
--- a/examples/model_zoo/pretrained_yolov4.py
+++ b/examples/model_zoo/pretrained_yolov4.py
@@ -1,3 +1,6 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 import numpy as np
 import cv2
 from PIL import Image
@@ -25,4 +28,4 @@ json_result = result_to_json(image, pred_bbox)

 image = draw_boxes_and_labels_to_image_with_json(image, json_result, class_names)
 image = Image.fromarray(image.astype(np.uint8))
 image.show()
 image.show()
--- a/examples/model_zoo/resnet.py
+++ b/examples/model_zoo/resnet.py
@@ -28,6 +28,7 @@ in_channels_conv = [64, 256, 512, 1024]
 in_channels_identity = [256, 512, 1024, 2048]
 henorm = tl.initializers.he_normal()


 class identity_block(Module):
    """The identity block where there is no conv layer at shortcut.

@@ -49,10 +50,11 @@ class identity_block(Module):
        Output tensor of this block.

    """

    def __init__(self, kernel_size, n_filters, stage, block):
        super(identity_block, self).__init__()
        filters1, filters2, filters3 = n_filters
        _in_channels = in_channels_identity[stage-2]
        _in_channels = in_channels_identity[stage - 2]
        conv_name_base = 'res' + str(stage) + block + '_branch'
        bn_name_base = 'bn' + str(stage) + block + '_branch'

@@ -60,7 +62,9 @@ class identity_block(Module):
        self.bn1 = BatchNorm(name=bn_name_base + '2a', act='relu', num_features=filters1)

        ks = (kernel_size, kernel_size)
        self.conv2 = Conv2d(filters2, ks, padding='SAME', W_init=henorm, name=conv_name_base + '2b', in_channels=filters1)
        self.conv2 = Conv2d(
            filters2, ks, padding='SAME', W_init=henorm, name=conv_name_base + '2b', in_channels=filters1
        )
        self.bn2 = BatchNorm(name=bn_name_base + '2b', act='relu', num_features=filters2)

        self.conv3 = Conv2d(filters3, (1, 1), W_init=henorm, name=conv_name_base + '2c', in_channels=filters2)
@@ -80,23 +84,30 @@ class identity_block(Module):


 class conv_block(Module):

    def __init__(self, kernel_size, n_filters, stage, block, strides=(2, 2)):
        super(conv_block, self).__init__()
        filters1, filters2, filters3 = n_filters
        _in_channels = in_channels_conv[stage-2]
        _in_channels = in_channels_conv[stage - 2]
        conv_name_base = 'res' + str(stage) + block + '_branch'
        bn_name_base = 'bn' + str(stage) + block + '_branch'
        self.conv1 = Conv2d(filters1, (1, 1), strides=strides, W_init=henorm, name=conv_name_base + '2a', in_channels=_in_channels)
        self.conv1 = Conv2d(
            filters1, (1, 1), strides=strides, W_init=henorm, name=conv_name_base + '2a', in_channels=_in_channels
        )
        self.bn1 = BatchNorm(name=bn_name_base + '2a', act='relu', num_features=filters1)

        ks = (kernel_size, kernel_size)
        self.conv2 = Conv2d(filters2, ks, padding='SAME', W_init=henorm, name=conv_name_base + '2b', in_channels=filters1)
        self.conv2 = Conv2d(
            filters2, ks, padding='SAME', W_init=henorm, name=conv_name_base + '2b', in_channels=filters1
        )
        self.bn2 = BatchNorm(name=bn_name_base + '2b', act='relu', num_features=filters2)

        self.conv3 = Conv2d(filters3, (1, 1), W_init=henorm, name=conv_name_base + '2c', in_channels=filters2)
        self.bn3 = BatchNorm(name=bn_name_base + '2c', num_features=filters3)

        self.shortcut_conv = Conv2d(filters3, (1, 1), strides=strides, W_init=henorm, name=conv_name_base + '1', in_channels=_in_channels)
        self.shortcut_conv = Conv2d(
            filters3, (1, 1), strides=strides, W_init=henorm, name=conv_name_base + '1', in_channels=_in_channels
        )
        self.shortcut_bn = BatchNorm(name=bn_name_base + '1', num_features=filters3)

        self.add = Elementwise(tl.add, act='relu')
@@ -117,6 +128,7 @@ class conv_block(Module):


 class ResNet50_model(Module):

    def __init__(self, end_with='fc1000', n_classes=1000):
        super(ResNet50_model, self).__init__()
        self.end_with = end_with
@@ -141,7 +153,9 @@ class ResNet50_model(Module):
                block = block_name[1]
                if block == 'a':
                    strides = (1, 1) if stage == 2 else (2, 2)
                    layer_list.append(conv_block(3, block_filters[stage - 2], stage=stage, block=block, strides=strides))
                    layer_list.append(
                        conv_block(3, block_filters[stage - 2], stage=stage, block=block, strides=strides)
                    )
                else:
                    layer_list.append(identity_block(3, block_filters[stage - 2], stage=stage, block=block))
            elif block_name == 'avg_pool':
@@ -155,7 +169,8 @@ class ResNet50_model(Module):


 def ResNet50(pretrained=False, end_with='fc1000', n_classes=1000):
    """Pre-trained MobileNetV1 model (static mode). Input shape [?, 224, 224, 3].
    """Pre-trained ResNet50 model. Input shape [?, 224, 224, 3].

    To use pretrained model, input should be in BGR format and subtracted from ImageNet mean [103.939, 116.779, 123.68].

    Parameters
@@ -175,14 +190,14 @@ def ResNet50(pretrained=False, end_with='fc1000', n_classes=1000):
    Classify ImageNet classes, see `tutorial_models_resnet50.py`
    TODO Modify the usage example according to the model storage location
    >>> # get the whole model with pretrained weights
    >>> resnet = tl.models.ResNet50(pretrained=True)
    >>> resnet = ResNet50(pretrained=True)
    >>> # use for inferencing
    >>> output = resnet(img1, is_train=False)
    >>> prob = tf.nn.softmax(output)[0].numpy()
    >>> output = resnet(img1)
    >>> prob = tl.ops.softmax(output)[0].numpy()

    Extract the features before fc layer
    >>> resnet = tl.models.ResNet50(pretrained=True, end_with='5c')
    >>> output = resnet(img1, is_train=False)
    >>> resnet = ResNet50(pretrained=True, end_with='5c')
    >>> output = resnet(img1)

    Returns
    -------
@@ -212,14 +227,15 @@ def restore_params(network, path='models'):

    f = h5py.File(os.path.join(path, 'resnet50_weights_tf_dim_ordering_tf_kernels.h5'), 'r')

    for layer in network.all_layers:
        if len(layer.all_weights) == 0:
            continue
        w_names = list(f[layer.name])
        params = [f[layer.name][n][:] for n in w_names]
        # if 'bn' in layer.name:
        #     params = [x.reshape(1, 1, 1, -1) for x in params]
        assign_weights(params, layer)
        del params
    # TODO Update parameter loading
    # for layer in network.all_layers:
    #     if len(layer.all_weights) == 0:
    #         continue
    #     w_names = list(f[layer.name])
    #     params = [f[layer.name][n][:] for n in w_names]
    #     # if 'bn' in layer.name:
    #     #     params = [x.reshape(1, 1, 1, -1) for x in params]
    #     assign_weights(params, layer)
    #     del params

    f.close()
--- a/examples/model_zoo/vgg.py
+++ b/examples/model_zoo/vgg.py
@@ -151,6 +151,7 @@ def make_layers(config, batch_norm=False, end_with='outputs'):
            break
    return SequentialLayer(layer_list)


 def restore_model(model, layer_type):
    logging.info("Restore pre-trained weights")
    # download weights
@@ -177,6 +178,7 @@ def restore_model(model, layer_type):
    assign_weights(weights, model)
    del weights


 def vgg16(pretrained=False, end_with='outputs', mode='dynamic', name=None):
    """Pre-trained VGG16 model.

@@ -196,38 +198,17 @@ def vgg16(pretrained=False, end_with='outputs', mode='dynamic', name=None):
    Classify ImageNet classes with VGG16, see `tutorial_models_vgg.py <https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_models_vgg.py>`__
    With TensorLayer
    TODO Modify the usage example according to the model storage location

    >>> # get the whole model, without pre-trained VGG parameters
    >>> vgg = tl.models.vgg16()
    >>> vgg = vgg16()
    >>> # get the whole model, restore pre-trained VGG parameters
    >>> vgg = tl.models.vgg16(pretrained=True)
    >>> vgg = vgg16(pretrained=True)
    >>> # use for inferencing
    >>> output = vgg(img, is_train=False)
    >>> probs = tf.nn.softmax(output)[0].numpy()

    Extract features with VGG16 and Train a classifier with 100 classes

    >>> # get VGG without the last layer
    >>> cnn = tl.models.vgg16(end_with='fc2_relu', mode='static').as_layer()
    >>> # add one more layer and build a new model
    >>> ni = Input([None, 224, 224, 3], name="inputs")
    >>> nn = cnn(ni)
    >>> nn = tl.layers.Dense(n_units=100, name='out')(nn)
    >>> model = tl.models.Model(inputs=ni, outputs=nn)
    >>> # train your own classifier (only update the last layer)
    >>> train_params = model.get_layer('out').trainable_weights

    Reuse model

    >>> # in dynamic model, we can directly use the same model
    >>> # in static model
    >>> vgg_layer = tl.models.vgg16().as_layer()
    >>> ni_1 = tl.layers.Input([None, 224, 244, 3])
    >>> ni_2 = tl.layers.Input([None, 224, 244, 3])
    >>> a_1 = vgg_layer(ni_1)
    >>> a_2 = vgg_layer(ni_2)
    >>> M = Model(inputs=[ni_1, ni_2], outputs=[a_1, a_2])
    >>> output = vgg(img)
    >>> probs = tl.ops.softmax(output)[0].numpy()

    """

    if mode == 'dynamic':
        model = VGG(layer_type='vgg16', batch_norm=False, end_with=end_with, name=name)
    elif mode == 'static':
@@ -259,35 +240,12 @@ def vgg19(pretrained=False, end_with='outputs', mode='dynamic', name=None):
    With TensorLayer

    >>> # get the whole model, without pre-trained VGG parameters
    >>> vgg = tl.models.vgg19()
    >>> vgg = vgg19()
    >>> # get the whole model, restore pre-trained VGG parameters
    >>> vgg = tl.models.vgg19(pretrained=True)
    >>> vgg = vgg19(pretrained=True)
    >>> # use for inferencing
    >>> output = vgg(img, is_train=False)
    >>> probs = tf.nn.softmax(output)[0].numpy()

    Extract features with VGG19 and Train a classifier with 100 classes

    >>> # get VGG without the last layer
    >>> cnn = tl.models.vgg19(end_with='fc2_relu', mode='static').as_layer()
    >>> # add one more layer and build a new model
    >>> ni = Input([None, 224, 224, 3], name="inputs")
    >>> nn = cnn(ni)
    >>> nn = tl.layers.Dense(n_units=100, name='out')(nn)
    >>> model = tl.models.Model(inputs=ni, outputs=nn)
    >>> # train your own classifier (only update the last layer)
    >>> train_params = model.get_layer('out').trainable_weights

    Reuse model

    >>> # in dynamic model, we can directly use the same model
    >>> # in static model
    >>> vgg_layer = tl.models.vgg19().as_layer()
    >>> ni_1 = tl.layers.Input([None, 224, 244, 3])
    >>> ni_2 = tl.layers.Input([None, 224, 244, 3])
    >>> a_1 = vgg_layer(ni_1)
    >>> a_2 = vgg_layer(ni_2)
    >>> M = Model(inputs=[ni_1, ni_2], outputs=[a_1, a_2])
    >>> output = vgg(img)
    >>> probs = tl.ops.softmax(output)[0].numpy()

    """
    if mode == 'dynamic':
--- a/examples/model_zoo/yolo.py
+++ b/examples/model_zoo/yolo.py
@@ -15,6 +15,8 @@ from tensorlayer.layers import Conv2d, MaxPool2d, BatchNorm2d, ZeroPad2d, UpSamp
 from tensorlayer.layers import Module, SequentialLayer
 from tensorlayer import logging

 __all__ = ['YOLOv4']

 INPUT_SIZE = 416
 weights_url = {'link': 'https://pan.baidu.com/s/1MC1dmEwpxsdgHO1MZ8fYRQ', 'password': 'idsz'}

@@ -24,7 +26,8 @@ class Convolutional(Module):
    Create Convolution layer
    Because it is only a stack of reference layers, there is no build, so self._built=True
    """
    def __init__(self, filters_shape, downsample=False, activate=True, bn=True, activate_type='leaky',name=None):

    def __init__(self, filters_shape, downsample=False, activate=True, bn=True, activate_type='leaky', name=None):
        super(Convolutional, self).__init__()
        self.act = activate
        self.act_type = activate_type
@@ -44,11 +47,13 @@ class Convolutional(Module):
            b_init = tl.initializers.constant(value=0.0)

        self.zeropad = ZeroPad2d(((1, 0), (1, 0)))
        self.conv = Conv2d(n_filter=filters_shape[-1], in_channels=filters_shape[2], filter_size=(filters_shape[0], filters_shape[1]),
                           strides=(strides, strides),padding=padding, b_init=b_init, name=name)
        self.conv = Conv2d(
            n_filter=filters_shape[-1], in_channels=filters_shape[2], filter_size=(filters_shape[0], filters_shape[1]),
            strides=(strides, strides), padding=padding, b_init=b_init, name=name
        )

        if bn:
            if activate == True:
            if activate ==True:
                if activate_type == 'leaky':
                    self.batchnorm2d = BatchNorm2d(act='leaky_relu0.1', num_features=filters_shape[-1])
                elif activate_type == 'mish':
@@ -66,7 +71,9 @@ class Convolutional(Module):
            output = self.batchnorm2d(output)
        return output


 class residual_block(Module):

    def __init__(self, input_channel, filter_num1, filter_num2, activate_type='leaky'):
        super(residual_block, self).__init__()
        self.conv1 = Convolutional(filters_shape=(1, 1, input_channel, filter_num1), activate_type=activate_type)
@@ -79,13 +86,16 @@ class residual_block(Module):
        output = self.add([inputs, output])
        return output


 def residual_block_num(num, input_channel, filter_num1, filter_num2, activate_type='leaky'):
    residual_list = []
    for i in range(num):
        residual_list.append(residual_block(input_channel, filter_num1, filter_num2, activate_type=activate_type))
    return SequentialLayer(residual_list)


 class cspdarknet53(Module):

    def __init__(self):
        super(cspdarknet53, self).__init__()
        self._built = True
@@ -124,7 +134,6 @@ class cspdarknet53(Module):
        self.conv5_5 = Convolutional((1, 1, 1024, 512), activate_type='mish')
        self.residual_5 = residual_block_num(4, 512, 512, 512, activate_type="mish")


        self.conv6_1 = Convolutional((1, 1, 512, 512), activate_type='mish')
        self.conv6_2 = Convolutional((1, 1, 1024, 1024), activate_type='mish')
        self.conv6_3 = Convolutional((1, 1, 1024, 512))
@@ -206,6 +215,7 @@ class cspdarknet53(Module):


 class YOLOv4_model(Module):

    def __init__(self, NUM_CLASS):
        super(YOLOv4_model, self).__init__()
        self.cspdarnnet = cspdarknet53()
@@ -310,6 +320,7 @@ class YOLOv4_model(Module):

        return conv_sbbox, conv_mbbox, conv_lbbox


 def YOLOv4(NUM_CLASS, pretrained=False):
    """Pre-trained YOLOv4 model.

@@ -327,11 +338,11 @@ def YOLOv4(NUM_CLASS, pretrained=False):
    With TensorLayer

    >>> # get the whole model, without pre-trained YOLOv4 parameters
    >>> yolov4 = tl.app.YOLOv4(NUM_CLASS=80, pretrained=False)
    >>> yolov4 = YOLOv4(NUM_CLASS=80, pretrained=False)
    >>> # get the whole model, restore pre-trained YOLOv4 parameters
    >>> yolov4 = tl.app.YOLOv4(NUM_CLASS=80, pretrained=True)
    >>> yolov4 = YOLOv4(NUM_CLASS=80, pretrained=True)
    >>> # use for inferencing
    >>> output = yolov4(img, is_train=False)
    >>> output = yolov4(img)

    """

@@ -359,7 +370,11 @@ def restore_params(network, model_path='models.npz'):
        network.all_weights[i].assign(npz[line[i].strip()])
        logging.info("  Loading weights %s in %s" % (network.all_weights[i].shape, network.all_weights[i].name))

 def tl2_weights_to_tl3_weights(weights_2_path='model/weights_2.txt', weights_3_path='model/weights_3.txt', txt_path='model/yolov4_weights_config.txt'):

 def tl2_weights_to_tl3_weights(
    weights_2_path='model/weights_2.txt', weights_3_path='model/weights_3.txt',
    txt_path='model/yolov4_weights_config.txt'
 ):
    weights_2_path = weights_2_path
    weights_3_path = weights_3_path
    txt_path = txt_path
--- a/img/tensorlayer_v.png
+++ b/img/tensorlayer_v.png
--- a/requirements/requirements_paddle.txt
+++ b/requirements/requirements_paddle.txt
@@ -0,0 +1 @@
 paddlepaddle>=2.0.2
--- a/run_compile.py
+++ b/run_compile.py
@@ -1,74 +0,0 @@
 import tensorlayer as T
 from dragon.vm.tensorlayer.layers import Dense
 from dragon.vm.tensorlayer.models import Model
 import dragon.vm.tensorlayer as tl

 import dragon as dg
 import argparse
 import numpy as np

 X_train, y_train, X_val, y_val, X_test, y_test = T.files.load_mnist_dataset(shape=(-1, 784))


 class MLP(Model):

    def __init__(self):
        super(MLP, self).__init__()
        self.dense1 = Dense(n_units=800, act=tl.act.relu, in_channels=784)
        self.dense2 = Dense(n_units=800, act=tl.act.relu, in_channels=800)
        self.dense3 = Dense(n_units=10, act=tl.act.relu, in_channels=800)

    def forward(self, x):
        z = self.dense1(x)
        z = self.dense2(z)
        out = self.dense3(z)
        return out


 class Classifier(object):
    """The base classifier class."""

    # TensorSpec for graph execution
    image_spec = dg.Tensor([None, 3, 32, 32], 'float32')
    label_spec = dg.Tensor([None], 'int64')

    def __init__(self, optimizer):
        super(Classifier, self).__init__()
        self.net = MLP()
        self.optimizer = optimizer
        self.params = self.net.trainable_weights

    def step(self, image, label):
        with dg.GradientTape() as tape:
            logit = self.net(image)
            # logit = dg.cast(logit, 'float64')
            logit = dg.cast(dg.math.argmax(logit, -1), 'int32')
            #             label = dg.cast(label, 'float32')
            # print("logit :\n", logit, label)
            # loss = dg.losses.smooth_l1_loss([logit, label])
            #             loss = tl.losses.sparse_softmax_crossentropy(logit, label)
            loss = dg.math.sum(
                (logit - label) * (logit - label)
            )  # dg.losses.sparse_softmax_cross_entropy([logit, label])
        accuracy = dg.math.mean(dg.math.equal([logit, label]).astype('float32'))
        grads = tape.gradient(loss, self.params)
        self.optimizer.apply_gradients(zip(self.params, grads))
        return loss, accuracy, self.optimizer


 if __name__ == '__main__':
    dg.autograph.set_execution('EAGER_MODE')
    # Define the model
    model = Classifier(dg.optimizers.SGD(base_lr=0.001, momentum=0.9, weight_decay=1e-4))
    # Main loop
    batch_size = 200
    for i in range(50):
        for X_batch, y_batch in T.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
            image = dg.EagerTensor(X_batch, copy=False)
            label = dg.EagerTensor(y_batch, copy=False, dtype='float32')
            loss, accuracy, _ = model.step(image, label)
            if i % 20 == 0:
                dg.logging.info(
                    'Iteration %d, lr = %s, loss = %.5f, accuracy = %.3f' %
                    (i, str(model.optimizer.base_lr), loss, accuracy)
                )
--- a/setup.cfg
+++ b/setup.cfg
@@ -23,7 +23,7 @@ based_on_style=google
 # The number of columns to use for indentation.
 indent_width = 4

 # The column limit.
 # The column limit. (larger than usual)
 column_limit=120

 # Place each dictionary entry onto its own line.
@@ -76,4 +76,4 @@ no_spaces_around_selected_binary_operators = True
 allow_multiline_lambdas = True

 SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = 10
 SPLIT_PENALTY_AFTER_OPENING_BRACKET = 500
 SPLIT_PENALTY_AFTER_OPENING_BRACKET = 500
--- a/setup.py
+++ b/setup.py
@@ -5,39 +5,24 @@ import sys

 os.environ['TENSORLAYER_PACKAGE_BUILDING'] = 'True'


 try:
    from setuptools import find_packages, setup, Extension
    from setuptools.command.build_ext import build_ext

 except ImportError:
    from distutils.core import (
        setup,
        find_packages
    )

    from distutils.core import (setup, find_packages)

 from tensorlayer import (
    __contact_emails__,
    __contact_names__,
    __description__,
    __download_url__,
    __homepage__,
    __keywords__,
    __license__,
    __package_name__,
    __repository_url__,
    __version__
    __contact_emails__, __contact_names__, __description__, __download_url__, __homepage__, __keywords__, __license__,
    __package_name__, __repository_url__, __version__
 )


 # =================== Reading Readme file as TXT files ===================

 if os.path.exists('README.rst'):
    # codec is used for consistent encoding
    long_description = codecs.open(
        os.path.join(os.path.abspath(os.path.dirname(__file__)), 'README.rst'),
        'r', 'utf-8'
        os.path.join(os.path.abspath(os.path.dirname(__file__)), 'README.rst'), 'r', 'utf-8'
    ).read()

 else:
@@ -53,8 +38,8 @@ def req_file(filename, folder="requirements"):
    # Example: `\n` at the end of each line
    return [x.strip() for x in content]

 # ======================= Defining the requirements var =======================

 # ======================= Defining the requirements var =======================

 install_requires = req_file("requirements.txt")

@@ -83,11 +68,9 @@ extras_require['all_dev'] = sum([extras_require.get(key) for key in ['all', 'db'
 extras_require['all_cpu_dev'] = sum([extras_require.get(key) for key in ['all_dev', 'tf_cpu']], list())
 extras_require['all_gpu_dev'] = sum([extras_require.get(key) for key in ['all_dev', 'tf_gpu']], list())


 cmdclass = dict()
 ext_modules = []


 # Readthedocs requires TF 1.5.0 to build properly
 if 'READTHEDOCS' in os.environ:
    ext_modules = [
@@ -95,16 +78,14 @@ if 'READTHEDOCS' in os.environ:
    ]

    class custom_build_ext(build_ext):

        def build_extensions(self):
            os.system('./scripts/install-requirements-for-rtd.sh %s' %
                      os.path.dirname(sys.executable))
            os.system('./scripts/install-requirements-for-rtd.sh %s' % os.path.dirname(sys.executable))

    cmdclass = {'build_ext': custom_build_ext}


 # ======================= Define the package setup =======================


 setup(
    name=__package_name__,

@@ -112,7 +93,6 @@ setup(
    # the version across setup.py and the project code, see
    # https://packaging.python.org/en/latest/single_source_version.html
    version=__version__,

    description=__description__,
    long_description=long_description,

@@ -130,7 +110,6 @@ setup(

    # The licence under which the project is released
    license=__license__,

    classifiers=[
        # How mature is this project? Common values are
        #  1 - Planning
@@ -170,7 +149,6 @@ setup(
        'Natural Language :: English',
        'Operating System :: OS Independent',
    ],

    keywords=__keywords__,
    packages=find_packages(),

@@ -179,7 +157,6 @@ setup(
    # requirements files see:
    # https://packaging.python.org/en/latest/requirements.html
    install_requires=install_requires,

    cmdclass=cmdclass,

    # List additional groups of dependencies here (e.g. development
@@ -187,7 +164,6 @@ setup(
    # $ pip install -e .[test]
    extras_require=extras_require,
    ext_modules=ext_modules,

    scripts=[
        'tl',
    ],
--- a/tensorlayer/init.py
+++ b/tensorlayer/init.py
@@ -51,6 +51,7 @@ if 'TENSORLAYER_PACKAGE_BUILDING' not in os.environ:
    from tensorlayer import utils
    from tensorlayer import dataflow
    from tensorlayer import metric
    from tensorlayer import vision

    from tensorlayer.lazy_imports import LazyImport

--- a/tensorlayer/backend/init.py
+++ b/tensorlayer/backend/init.py
@@ -3,4 +3,4 @@

 # load ops
 from .ops import *
 from tensorlayer.backend import ops
 from tensorlayer.backend import ops
--- a/tensorlayer/backend/ops/init.py
+++ b/tensorlayer/backend/ops/init.py
@@ -45,8 +45,12 @@ from .load_backend import Conv1D
 from .load_backend import Conv2D
 from .load_backend import Conv3D
 from .load_backend import BiasAdd
 from .load_backend import MaxPool1d
 from .load_backend import MaxPool
 from .load_backend import MaxPool3d
 from .load_backend import AvgPool1d
 from .load_backend import AvgPool
 from .load_backend import AvgPool3d
 from .load_backend import Dropout
 from .load_backend import BatchNorm
 from .load_backend import DepthwiseConv2d
@@ -58,6 +62,8 @@ from .load_backend import AdaptiveMeanPool3D
 from .load_backend import AdaptiveMaxPool1D
 from .load_backend import AdaptiveMaxPool2D
 from .load_backend import AdaptiveMaxPool3D
 from .load_backend import Floor
 from .load_backend import Ceil

 # load ops
 from .load_backend import Variable
@@ -68,6 +74,7 @@ from .load_backend import minimum
 from .load_backend import reshape
 from .load_backend import concat
 from .load_backend import convert_to_tensor
 from .load_backend import convert_to_numpy
 from .load_backend import sqrt
 from .load_backend import reduce_mean
 from .load_backend import reduce_min
@@ -112,10 +119,10 @@ from .load_backend import OneHot
 from .load_backend import L2Normalize
 from .load_backend import EmbeddingLookup
 from .load_backend import NCELoss
 from .load_backend import Not_equal
 from .load_backend import NotEqual
 from .load_backend import Cast
 from .load_backend import ExpandDims
 from .load_backend import Count_nonzero
 from .load_backend import CountNonzero
 from .load_backend import FlattenReshape
 from .load_backend import Transpose
 from .load_backend import MatMul
--- a/tensorlayer/backend/ops/dragon_backend.py
+++ b/tensorlayer/backend/ops/dragon_backend.py
--- a/tensorlayer/backend/ops/dragon_nn.py
+++ b/tensorlayer/backend/ops/dragon_nn.py
@@ -1,910 +0,0 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 import dragon as D
 from dragon.core.ops import vision_ops
 from dragon.core.ops import activation_ops


 def padding_format(padding):
    """
    Checks that the padding format correspond format.

    Parameters
    ----------
    padding : str
        Must be one of the following:"same", "SAME", "VALID", "valid"

    Returns
    -------
        str "SAME" or "VALID"
    """

    if padding in ["SAME", "same"]:
        padding = "SAME"
    elif padding in ["VALID", "valid"]:
        padding = "VALID"
    elif padding == None:
        padding = None
    else:
        raise Exception("Unsupported padding: " + str(padding))
    return padding


 def preprocess_1d_format(data_format, padding):
    """
    Checks that the 1-D dataformat format correspond format.

    Parameters
    ----------
    data_format : str
        Must be one of the following:"channels_last","NWC","NCW","channels_first"
    padding : str
        Must be one of the following:"same","valid","SAME","VALID"

    Returns
    -------
        str "NWC" or "NCW" and "SAME" or "VALID"
    """

    if data_format in ["channels_last", "NWC"]:
        data_format = "NWC"
    elif data_format in ["channels_first", "NCW"]:
        data_format = "NCW"
    elif data_format == None:
        data_format = None
    else:
        raise Exception("Unsupported data format: " + str(data_format))
    padding = padding_format(padding)
    return data_format, padding


 def preprocess_2d_format(data_format, padding):
    """
    Checks that the 2-D dataformat format correspond format.

    Parameters
    ----------
    data_format : str
        Must be one of the following:"channels_last","NHWC","NCHW","channels_first"
    padding : str
        Must be one of the following:"same","valid","SAME","VALID"

    Returns
    -------
        str "NHWC" or "NCHW" and "SAME" or "VALID"
    """

    if data_format in ["channels_last", "NHWC", "nhwc"]:
        data_format = "NHWC"
    elif data_format in ["channels_first", "NCHW", "nchw"]:
        data_format = "NCHW"
    elif data_format == None:
        data_format = None
    else:
        raise Exception("Unsupported data format: " + str(data_format))
    padding = padding_format(padding)
    return data_format, padding


 def preprocess_3d_format(data_format, padding):
    """
    Checks that the 3-D dataformat format correspond format.

    Parameters
    ----------
    data_format : str
        Must be one of the following:"channels_last","NDHWC","NCDHW","channels_first"
    padding : str
        Must be one of the following:"same","valid","SAME","VALID"

    Returns
    -------
        str "NDHWC" or "NCDHW" and "SAME" or "VALID"
    """

    if data_format in ['channels_last', 'NDHWC']:
        data_format = 'NDHWC'
    elif data_format in ['channels_first', 'NCDHW']:
        data_format = 'NCDHW'
    elif data_format == None:
        data_format = None
    else:
        raise Exception("Unsupported data format: " + str(data_format))
    padding = padding_format(padding)
    return data_format, padding


 def nchw_to_nhwc(x):
    """
    Channels first to channels last

    Parameters
    ----------
    x : tensor
        channels first tensor data

    Returns
    -------
        channels last tensor data
    """

    pass


 def nhwc_to_nchw(x):
    """
    Channles last to channels first

    Parameters
    ----------
    x : tensor
        channels last tensor data

    Returns
    -------
        channels first tensor data
    """

    pass


 class ReLU(object):

    def __init__(self):
        pass

    def __call__(self, x):
        return D.nn.relu(x)


 def relu(x):
    """
    Computes rectified linear: max(features, 0).

    Parameters
    ----------
    x : tensor
        Must be one of the following types: float32, float64, int32, uint8, int16,
        int8, int64, bfloat16, uint16, half, uint32, uint64, qint8.

    Returns
    -------
        A Tensor. Has the same type as features.
    """
    return D.nn.relu(x)


 class ReLU6(object):

    def __init__(self):
        pass

    def __call__(self, x):
        return D.nn.relu6(x)


 def relu6(x):
    """
    Computes Rectified Linear 6: min(max(features, 0), 6).

    Parameters
    ----------
    x : tensor
        Must be one of the following types: float32, float64, int32, uint8, int16,
        int8, int64, bfloat16, uint16, half, uint32, uint64, qint8.

    Returns
    -------
        A Tensor with the same type as features.
    """
    return D.nn.relu6(x)


 class LeakyReLU(object):

    def __init__(self, alpha=0.2):
        self.alpha = alpha

    def __call__(self, x):
        return D.nn.leaky_relu(x, alpha=self.alpha)


 def leaky_relu(x):
    """
    Compute the Leaky ReLU activation function.

    Parameters
    ----------
    x : tensor
        representing preactivation values. Must be one of the following types:
        float16, float32, float64, int32, int64.

    Returns
    -------
        The activation value.
    """

    return D.nn.leaky_relu(x)


 class Softplus(object):

    def __init__(self):
        pass

    def __call__(self, x):
        raise NotImplementedError


 def softplus(x):
    """
    Computes softplus: log(exp(features) + 1).

    Parameters
    ----------
    x : tensor
        Must be one of the following types: half, bfloat16, float32, float64.

    Returns
    -------
        A Tensor. Has the same type as features.
    """

    raise NotImplementedError


 class Tanh(object):

    def __init__(self):
        pass

    def __call__(self, x):
        return activation_ops.tanh(x)


 def tanh(x):
    """
    Computes hyperbolic tangent of x element-wise.

    Parameters
    ----------
    x : tensor
        Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128.

    Returns
    -------
        A Tensor. Has the same type as x.
    """

    return activation_ops.tanh(x)


 class Sigmoid(object):

    def __init__(self):
        pass

    def __call__(self, x):
        return activation_ops.sigmoid(x)


 def sigmoid(x):
    """
    Computes sigmoid of x element-wise.

    Parameters
    ----------
    x : tensor
        A Tensor with type float16, float32, float64, complex64, or complex128.

    Returns
    -------
        A Tensor with the same type as x.
    """
    return activation_ops.sigmoid(x)


 class Softmax(object):

    def __init__(self):
        pass

    def __call__(self, x):
        return D.nn.softmax(x)


 def softmax(logits, axis=None):
    """
    Computes softmax activations.

    Parameters
    ----------
    logits : tensor
        Must be one of the following types: half, float32, float64.
    axis : int
        The dimension softmax would be performed on. The default is -1 which indicates the last dimension.

    Returns
    -------
        A Tensor. Has the same type and shape as logits.
    """
    return D.nn.softmax(logits)


 class Dropout(object):

    def __init__(self, keep, seed=1):
        self.keep = 1 - keep
        self.seed = seed

    def __call__(self, inputs):
        return D.nn.dropout(inputs, prob=self.keep)


 class BiasAdd(object):
    """
    Adds bias to value.

    Parameters
    ----------
    x : tensor
        A Tensor with type float, double, int64, int32, uint8, int16, int8, complex64, or complex128.
    bias : tensor
        Must be the same type as value unless value is a quantized type,
        in which case a different quantized type may be used.
    Returns
    -------
        A Tensor with the same type as value.
    """

    def __init__(self, data_format='NHWC'):
        self.data_format = data_format

    def __call__(self, x, bias):
        inputs = [x, bias]
        return vision_ops.bias_add(inputs, data_format=self.data_format)


 def bias_add(x, bias):
    """
    Adds bias to value.

    Parameters
    ----------
    x : tensor
        A Tensor with type float, double, int64, int32, uint8, int16, int8, complex64, or complex128.
    bias : tensor
        Must be the same type as value unless value is a quantized type,
        in which case a different quantized type may be used.
    data_format : A string.
        'N...C' and 'NC...' are supported.
    name : str
        A name for the operation (optional).
    Returns
    -------
        A Tensor with the same type as value.
    """
    inputs = [x, bias]
    return vision_ops.bias_add(inputs, data_format='NHWC')


 class Conv1D(object):
    pass
    # raise NotImplementedError


 def conv1d(input, filters, stride, padding, data_format='NWC', dilations=None, name=None):
    """
    Computes a 1-D convolution given 3-D input and filter tensors.

    Parameters
    ----------
    input : tensor
        A 3D Tensor. Must be of type float16, float32, or float64
    filters : tensor
        A 3D Tensor. Must have the same type as input.
    stride : int of list
         An int or list of ints that has length 1 or 3. The number of entries by which the filter is moved right at each step.
    padding : string
         'SAME' or 'VALID'
    data_format : string
        An optional string from "NWC", "NCW". Defaults to "NWC", the data is stored in the order of
        [batch, in_width, in_channels]. The "NCW" format stores data as [batch, in_channels, in_width].
    dilations : int or list
        An int or list of ints that has length 1 or 3 which defaults to 1.
        The dilation factor for each dimension of input. If set to k > 1,
        there will be k-1 skipped cells between each filter element on that dimension.
        Dilations in the batch and depth dimensions must be 1.
    name : string
        A name for the operation (optional).
    Returns
    -------
        A Tensor. Has the same type as input.
    """

    pass


 class Conv2D(object):

    def __init__(self, strides, padding, data_format='NHWC', dilations=None, out_channel=None, k_size=None):
        self.data_format, self.padding = preprocess_2d_format(data_format, padding)
        self.ksize = k_size[0]
        if self.data_format is 'NHWC':
            self.dg_stride = strides[1]
            self.dg_dilation = dilations[1]
        elif self.data_format is 'NCHW':
            self.dg_stride = strides[2]
            self.dg_dilation = dilations[2]

    def __call__(self, inputs, filters):
        outputs = vision_ops.conv2d(
            [inputs, filters],
            kernel_shape=self.ksize,
            strides=self.dg_stride,
            padding=self.padding,
            dilations=self.dg_dilation,
            data_format=self.data_format,
        )
        return outputs


 def conv2d(input, filters, strides, padding, data_format='NCHW', dilations=None):
    """
    Computes a 2-D convolution given 4-D input and filters tensors.

    Parameters
    ----------
    input : tensor
        Must be one of the following types: half, bfloat16, float32, float64. A 4-D tensor.
        The dimension order is interpreted according to the value of data_format, see below for details.
    filters : tensor
         Must have the same type as input. A 4-D tensor of shape [filter_height, filter_width, in_channels, out_channels]
    strides : int of list
        The stride of the sliding window for each dimension of input. If a single value is given it is replicated in the H and W dimension.
        By default the N and C dimensions are set to 1. The dimension order is determined by the value of data_format, see below for details.
    padding : string
        "SAME" or "VALID"
    data_format : string
        "NHWC", "NCHW". Defaults to "NCHW".
    dilations : list or ints
        list of ints that has length 1, 2 or 4, defaults to 1. The dilation factor for each dimension ofinput.

    Returns
    -------
        A Tensor. Has the same type as input.
    """
    raise NotImplementedError


 class Conv3D(object):
    pass
    # raise NotImplementedError


 def conv3d(input, filters, strides, padding, data_format='NDHWC', dilations=None, name=None):
    """
    Computes a 3-D convolution given 5-D input and filters tensors.

    Parameters
    ----------
    input : tensor
        Must be one of the following types: half, bfloat16, float32, float64.
        Shape [batch, in_depth, in_height, in_width, in_channels].
    filters : tensor
        Must have the same type as input. Shape [filter_depth, filter_height, filter_width, in_channels, out_channels].
        in_channels must match between input and filters.
    strides : list of ints
        A list of ints that has length >= 5. 1-D tensor of length 5.
        The stride of the sliding window for each dimension of input.
        Must have strides[0] = strides[4] = 1.
    padding : string
        A string from: "SAME", "VALID". The type of padding algorithm to use.
    data_format : string
        An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC". The data format of the input and output data.
        With the default format "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, in_width, in_channels].
        Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width].
    dilations : list of ints
        Defaults to [1, 1, 1, 1, 1]. 1-D tensor of length 5. The dilation factor for each dimension of input.
        If set to k > 1, there will be k-1 skipped cells between each filter element on that dimension.
        The dimension order is determined by the value of data_format, see above for details.
        Dilations in the batch and depth dimensions must be 1.
    name : string
        A name for the operation (optional).

    Returns
    -------
        A Tensor. Has the same type as input.
    """

    raise NotImplementedError


 def lrn(inputs, depth_radius, bias, alpha, beta):
    """
    Local Response Normalization.

    Parameters
    ----------
    inputs : tensor
        Must be one of the following types: half, bfloat16, float32. 4-D.
    depth_radius : int
        Defaults to 5. 0-D. Half-width of the 1-D normalization window.
    bias : float
        Defaults to 1. An offset (usually positive to avoid dividing by 0).
    alpha : float
        Defaults to 1. A scale factor, usually positive.
    beta : float
         Defaults to 0.5. An exponent.

    Returns
    -------
        A Tensor. Has the same type as input.
    """
    pass


 def moments(x, axes, shift=None, keepdims=False):
    """
    Calculates the mean and variance of x.

    Parameters
    ----------
    x : tensor
        A Tensor
    axes : ints
        Axes along which to compute mean and variance.
    shift : int
        Not used in the current implementation.
    keepdims : bool
        produce moments with the same dimensionality as the input.

    Returns
    -------
        Two Tensor objects: mean and variance.
    """

    pass


 class MaxPool(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_2d_format(data_format, padding)
        self.ksize = ksize
        self.strides = strides

    def __call__(self, inputs):
        return vision_ops.pool2d(
            inputs,
            kernel_shape=self.ksize,
            strides=self.strides,
            padding=self.padding,
            mode='MAX',
            global_pooling=False,
            data_format=self.data_format,
        )


 def max_pool(input, ksize, strides, padding, data_format=None):
    """
    Performs the max pooling on the input.

    Parameters
    ----------
    input : tensor
        Tensor of rank N+2, of shape [batch_size] + input_spatial_shape + [num_channels] if data_format does not start
        with "NC" (default), or [batch_size, num_channels] + input_spatial_shape if data_format starts with "NC".
        Pooling happens over the spatial dimensions only.
    ksize : int or list of ints
        An int or list of ints that has length 1, N or N+2.
        The size of the window for each dimension of the input tensor.
    strides : int or list of ints
        An int or list of ints that has length 1, N or N+2.
        The stride of the sliding window for each dimension of the input tensor.
    padding : string
        'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.

    Returns
    -------
        A Tensor of format specified by data_format. The max pooled output tensor.
    """
    pass


 class AvgPool(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_2d_format(data_format, padding)
        self.filter_size = ksize
        self.strides = strides

    def __call__(self, inputs):
        return vision_ops.pool2d(
            inputs,
            kernel_shape=self.filter_size,
            strides=self.strides,
            padding=self.padding,
            mode='AVG',
            global_pooling=False,
            data_format=self.data_format,
        )


 def avg_pool(input, ksize, strides, padding):
    """
    Performs the avg pooling on the input.

    Parameters
    ----------
    input : tensor
        Tensor of rank N+2, of shape [batch_size] + input_spatial_shape + [num_channels]
        if data_format does not start with "NC" (default), or [batch_size, num_channels] + input_spatial_shape
        if data_format starts with "NC". Pooling happens over the spatial dimensions only.
    ksize : int or list of ints
        An int or list of ints that has length 1, N or N+2.
        The size of the window for each dimension of the input tensor.
    strides : int or list of ints
        An int or list of ints that has length 1, N or N+2.
        The stride of the sliding window for each dimension of the input tensor.
    padding : string
        'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.

    Returns
    -------
        A Tensor of format specified by data_format. The average pooled output tensor.
    """
    pass


 def max_pool3d(input, ksize, strides, padding, data_format=None, name=None):
    """
    Performs the max pooling on the input.

    Parameters
    ----------
    input : tensor
         A 5-D Tensor of the format specified by data_format.
    ksize : int or list of ints
        An int or list of ints that has length 1, 3 or 5.
        The size of the window for each dimension of the input tensor.
    strides : int or list of ints
        An int or list of ints that has length 1, 3 or 5.
        The stride of the sliding window for each dimension of the input tensor.
    padding : string
        'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
    data_format : string
         "NDHWC", "NCDHW". Defaults to "NDHWC". The data format of the input and output data.
         With the default format "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, in_width, in_channels].
         Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width].
    name : string
         A name for the operation (optional).

    Returns
    -------
        A Tensor of format specified by data_format. The max pooled output tensor.
    """
    pass


 def avg_pool3d(input, ksize, strides, padding, data_format=None, name=None):
    """
    Performs the average pooling on the input.

    Parameters
    ----------
    input : tensor
        A 5-D Tensor of shape [batch, height, width, channels] and type float32, float64, qint8, quint8, or qint32.
    ksize : int or list of ints
        An int or list of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
    strides : int or list of ints
        An int or list of ints that has length 1, 3 or 5.
        The stride of the sliding window for each dimension of the input tensor.
    padding : string
        'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
    data_format : string
        'NDHWC' and 'NCDHW' are supported.
    name : string
        Optional name for the operation.

    Returns
    -------
        A Tensor with the same type as value. The average pooled output tensor.
    """
    pass


 def pool(input, window_shape, pooling_type, strides=None, padding='VALID', data_format=None, dilations=None, name=None):
    """
    Performs an N-D pooling operation.

    Parameters
    ----------
    input : tensor
        Tensor of rank N+2, of shape [batch_size] + input_spatial_shape + [num_channels]
        if data_format does not start with "NC" (default), or [batch_size, num_channels] + input_spatial_shape
        if data_format starts with "NC". Pooling happens over the spatial dimensions only.
    window_shape : int
        Sequence of N ints >= 1.
    pooling_type : string
        Specifies pooling operation, must be "AVG" or "MAX".
    strides : ints
        Sequence of N ints >= 1. Defaults to [1]*N. If any value of strides is > 1, then all values of dilation_rate must be 1.
    padding : string
        The padding algorithm, must be "SAME" or "VALID". Defaults to "SAME".
        See the "returns" section of tf.ops.convolution for details.
    data_format : string
        Specifies whether the channel dimension of the input and output is the last dimension (default, or if data_format does not start with "NC"),
        or the second dimension (if data_format starts with "NC").
        For N=1, the valid values are "NWC" (default) and "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW".
        For N=3, the valid values are "NDHWC" (default) and "NCDHW".
    dilations : list of ints
        Dilation rate. List of N ints >= 1. Defaults to [1]*N. If any value of dilation_rate is > 1, then all values of strides must be 1.
    name : string
        Optional. Name of the op.

    Returns
    -------
        Tensor of rank N+2, of shape [batch_size] + output_spatial_shape + [num_channels]
    """
    pass


 class DepthwiseConv2d(object):

    def __init__(self, strides, padding, data_format=None, dilations=None, ksize=None, channel_multiplier=1):
        self.data_format, self.padding = preprocess_2d_format(data_format, padding)
        self.stride = strides
        self.dilations = dilations

    def __call__(self, input, filter):
        raise NotImplementedError("Not implemented depthwiseconv2d")


 def depthwise_conv2d(input, filter, strides, padding, data_format=None, dilations=None, name=None):
    """
    Depthwise 2-D convolution.

    Parameters
    ----------
    input : tensor
        4-D with shape according to data_format.
    filter : tensor
        4-D with shape [filter_height, filter_width, in_channels, channel_multiplier].
    strides : list
        1-D of size 4. The stride of the sliding window for each dimension of input.
    padding : string
        'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
    data_format : string
        The data format for input. Either "NHWC" (default) or "NCHW".
    dilations : list
        1-D of size 2. The dilation rate in which we sample input values across the height and width dimensions in atrous convolution.
        If it is greater than 1, then all values of strides must be 1.
    name : string
        A name for this operation (optional).

    Returns
    -------
        A 4-D Tensor with shape according to data_format.
        E.g., for "NHWC" format, shape is [batch, out_height, out_width, in_channels * channel_multiplier].
    """

    pass


 def conv1d_transpose(
    input, filters, output_shape, strides, padding='SAME', data_format='NWC', dilations=None, name=None
 ):
    """
    The transpose of conv1d.

    Parameters
    ----------
    input : tensor
        A 3-D Tensor of type float and shape [batch, in_width, in_channels]
        for NWC data format or [batch, in_channels, in_width] for NCW data format.
    filters : tensor
        A 3-D Tensor with the same type as value and shape [filter_width, output_channels, in_channels].
        filter's in_channels dimension must match that of value.
    output_shape : tensor
        A 1-D Tensor, containing three elements, representing the output shape of the deconvolution op.
    strides : list
        An int or list of ints that has length 1 or 3. The number of entries by which the filter is moved right at each step.
    padding : string
        'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
    data_format : string
        'NWC' and 'NCW' are supported.
    dilations : list
         An int or list of ints that has length 1 or 3 which defaults to 1.
         The dilation factor for each dimension of input. If set to k > 1,
         there will be k-1 skipped cells between each filter element on that dimension.
         Dilations in the batch and depth dimensions must be 1.
    name : string
        Optional name for the returned tensor.

    Returns
    -------
        A Tensor with the same type as value.
    """
    pass


 def conv2d_transpose(
    input, filters, output_shape, strides, padding='SAME', data_format='NHWC', dilations=None, name=None
 ):
    """
    The transpose of conv2d.

    Parameters
    ----------
    input : tensor
        A 4-D Tensor of type float and shape [batch, height, width, in_channels]
        for NHWC data format or [batch, in_channels, height, width] for NCHW data format.
    filters : tensor
        A 4-D Tensor with the same type as input and shape [height, width,
        output_channels, in_channels]. filter's in_channels dimension must match that of input.
    output_shape : tensor
        A 1-D Tensor representing the output shape of the deconvolution op.
    strides : list
        An int or list of ints that has length 1, 2 or 4. The stride of the sliding window for each dimension of input.
        If a single value is given it is replicated in the H and W dimension.
        By default the N and C dimensions are set to 0.
        The dimension order is determined by the value of data_format, see below for details.
    padding : string
        'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
    data_format : string
         'NHWC' and 'NCHW' are supported.
    dilations : list
        An int or list of ints that has length 1, 2 or 4, defaults to 1.
    name : string
        Optional name for the returned tensor.

    Returns
    -------
        A Tensor with the same type as input.
    """
    pass


 def conv3d_transpose(
    input, filters, output_shape, strides, padding='SAME', data_format='NDHWC', dilations=None, name=None
 ):
    """
    The transpose of conv3d.

    Parameters
    ----------
    input : tensor
         A 5-D Tensor of type float and shape [batch, height, width, in_channels] for
         NHWC data format or [batch, in_channels, height, width] for NCHW data format.
    filters : tensor
        A 5-D Tensor with the same type as value and shape [height, width, output_channels, in_channels].
        filter's in_channels dimension must match that of value.
    output_shape : tensor
        A 1-D Tensor representing the output shape of the deconvolution op.
    strides : list
        An int or list of ints that has length 1, 3 or 5.
    padding : string
        'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
    data_format : string
        'NDHWC' and 'NCDHW' are supported.
    dilations : list of ints
        An int or list of ints that has length 1, 3 or 5, defaults to 1.
    name : string
        Optional name for the returned tensor.

    Returns
    -------
        A Tensor with the same type as value.
    """

    pass


 class BatchNorm(object):

    def __init__(self):
        pass

    def __call__(self, *args, **kwargs):
        pass
--- a/tensorlayer/backend/ops/load_backend.py
+++ b/tensorlayer/backend/ops/load_backend.py
@@ -7,7 +7,7 @@ import sys

 BACKEND = 'tensorflow'
 # BACKEND = 'mindspore'
 # BACKEND = 'dragon'
 # BACKEND = 'paddle'

 # Check for backend.json files
 tl_backend_dir = os.path.expanduser('~')
@@ -34,7 +34,7 @@ else:
    else:
        BACKEND = load_dict['backend']

 # Set backend based on TL_BACKEND flag.
 # Set backend based on TL_BACKEND.
 if 'TL_BACKEND' in os.environ:
    backend = os.environ['TL_BACKEND']
    if backend:
@@ -57,20 +57,13 @@ elif BACKEND == 'mindspore':
    import mindspore.context as context
    import os
    os.environ['DEVICE_ID'] = '0'
    #context.set_context(mode=context.PYNATIVE_MODE,device_target='GPU'),
    context.set_context(mode=context.GRAPH_MODE, device_target='CPU'),
    context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU'),
    # context.set_context(mode=context.GRAPH_MODE, device_target='CPU'),
    # enable_task_sink=True, enable_loop_sink=True)
    # context.set_context(mode=context.GRAPH_MODE, backend_policy='ms',
    #                     device_target='Ascend', enable_task_sink=True, enable_loop_sink=True)
    sys.stderr.write('Using MindSpore backend.\n')

 elif BACKEND == 'dragon':
    from .dragon_backend import *
    from .dragon_nn import *
    import dragon as dg
    BACKEND_VERSION = dg.__version__
    sys.stderr.write('Using Dragon backend.\n')

 elif BACKEND == 'paddle':
    from .paddle_backend import *
    from .paddle_nn import *
--- a/tensorlayer/backend/ops/mindspore_backend.py
+++ b/tensorlayer/backend/ops/mindspore_backend.py
@@ -11,12 +11,13 @@ from mindspore.common.initializer import (
    initializer, Constant, Normal, TruncatedNormal, Initializer, _assignment, _calculate_in_and_out, One, Zero
 )
 from mindspore.common.tensor import Tensor
 from mindspore._c_expression import Tensor as Tensor_
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 import mindspore.context as context
 from mindspore.nn import Cell
 from mindspore.ops import count_nonzero
 import mindspore.numpy as msnp

 import numpy as np
 from scipy.stats import truncnorm
@@ -588,6 +589,10 @@ def convert_to_tensor(value, dtype=None):
    return Tensor(value, dtype=dtype)


 def convert_to_numpy(value):
    return value.asnumpy()


 def sqrt(x):
    """
    Computes square root of x element-wise.
@@ -611,7 +616,7 @@ class ReduceSum(Cell):
    def __init__(self, axis):
        super(ReduceSum, self).__init__()
        self.axis = axis
        self.reduce_sum = P.ReduceSum(keep_dims=True)
        self.reduce_sum = P.ReduceSum(keep_dims=False)

    def construct(self, input):
        return self.reduce_sum(input, self.axis)
@@ -919,7 +924,7 @@ class Cast(Cell):
        self.cast = P.Cast()

    def construct(self, input):
        return self.cast(input, dtype=self.dtype)
        return self.cast(input, self.dtype)


 def cast(x, dtype):
@@ -1047,6 +1052,12 @@ def split(value, num_or_size_splits, axis=0, num=None):
    pass


 class Floor(Cell):

    def __call__(self, *args, **kwargs):
        raise NotImplementedError


 def floor(x):
    return NotImplementedError

@@ -1087,44 +1098,79 @@ class L2Normalize(Cell):
        super(L2Normalize, self).__init__()
        pass

    def __call__(self, input, *args, **kwargs):
    def construct(self, input, *args, **kwargs):
        pass


 class EmbeddingLookup(Cell):

    def __init__(self, max_norm=None):
    def __init__(self, max_norm=0):
        super(EmbeddingLookup, self).__init__()
        self.max_norm = max_norm
        self.embedding_lookup = P.EmbeddingLookup()

    def __call__(self, params, ids, *args, **kwargs):
        pass
    def construct(self, params, ids, *args, **kwargs):
        return self.embedding_lookup(params, ids, self.max_norm)


 class NCELoss(object):
 class NCELoss(Cell):

    def __init__(self, num_true=1, sampled_values=None, remove_accidental_hits=False):
        super(NCELoss, self).__init__()

    def __call__(self, weights, biases, labels, inputs, num_sampled, num_classes):
        pass

    def construct(self, weights, biases, labels, inputs, num_sampled, num_classes):
        raise NotImplementedError


 class Not_equal(object):
 class NotEqual(Cell):

    def __init__(self):
        pass
        super(NotEqual, self).__init__()
        self.not_equal = P.NotEqual()

    def __call__(self, x, y):
        pass
    def construct(self, x, y):
        outputs = self.not_equal(x, y)
        return outputs


 class Count_nonzero(object):
 class CountNonzero(object):

    def __init__(self, keepdims=None, dtype=int64):
        pass
        self.keepdims = keepdims
        self.dtype = dtype

    def __call__(self, *args, **kwargs):
        pass
    def __call__(self, input, axis=None):
        input = self.convert_dtype(input)
        return count_nonzero(x=input, axis=axis, keep_dims=self.keepdims, dtype=self.dtype)

    def bool_convert_to_tensor(self, x):
        x = x.asnumpy()
        shapes = x.shape
        b = np.ones(shapes)
        if len(shapes) == 1:
            for i in range(shapes - 1):
                if x[i] ==True:
                    b[i] = 1
                else:
                    b[i] = 0
        if len(shapes) == 2:
            for i in range(shapes[0] - 1):
                for j in range(shapes[1] - 1):
                    if x[i][j] ==True:
                        b[i][j] = 1
                    else:
                        b[i][j] = 0
        return Tensor(b, dtype=float32)

    def convert_dtype(self, input):
        if input.shape == 1 and type(input[0]) is bool:
            output = self.bool_convert_to_tensor(input)
        elif input.shape == 2 and type(input[0][0]) is bool:
            output = self.bool_convert_to_tensor(input)
        else:
            output = input
        return output


 class Resize(Cell):
@@ -1208,6 +1254,16 @@ class Sign(Cell):
        return self.sign(x)


 class Ceil(Cell):

    def __init__(self):
        super(Ceil, self).__init__()
        self.ceil = P.Ceil()

    def construct(self, x):
        return self.ceil(x)


 def ceil(x):
    _ceil = P.Ceil()
    return _ceil(x)
@@ -1218,7 +1274,7 @@ def multiply(x, y):


 def divide(x, y):
    raise NotImplementedError
    return msnp.divide(x, y)


 def identity(x):
--- a/tensorlayer/backend/ops/mindspore_nn.py
+++ b/tensorlayer/backend/ops/mindspore_nn.py
@@ -1,18 +1,20 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 from __future__ import absolute_import, division, print_function

 from mindspore.nn.cell import Cell
 from mindspore import context
 import itertools
 import mindspore as ms
 import mindspore.ops as P
 from mindspore import context
 from mindspore.nn.cell import Cell
 from mindspore._checkparam import Rel
 from mindspore.ops import functional as F
 from mindspore.communication.management import get_group_size, get_rank
 from mindspore.communication import management
 from mindspore._checkparam import check_int_positive
 from mindspore.ops.operations import _inner_ops as inner
 from mindspore._extends import cell_attr_register
 from mindspore.ops._grad.grad_base import bprop_getters
 from mindspore._checkparam import Validator as validator
 from mindspore.communication.management import get_group_size, get_rank


 def padding_format(padding):
@@ -537,25 +539,17 @@ class Conv2D(Cell):
        if self.data_format is 'NHWC':
            self.ms_stride = strides[1]
            self.ms_dilation = dilations[1]
            # self.transpose = P.Transpose()
        elif self.data_format is 'NCHW':
            self.ms_stride = strides[2]
            self.ms_dilation = dilations[2]

        # print(out_channel, k_size, self.padding, self.ms_stride, self.ms_dilation)
        self.conv2d = P.Conv2D(
            out_channel=out_channel, kernel_size=k_size, pad_mode=self.padding, stride=self.ms_stride,
            dilation=self.ms_dilation, mode=1, group=1
            dilation=self.ms_dilation, mode=1, group=1, data_format=self.data_format
        )

    def construct(self, inputs, filters):
        if self.data_format == 'NHWC':
            inputs = nhwc_to_nchw(inputs)

        outputs = self.conv2d(inputs, filters)

        if self.data_format == 'NHWC':
            outputs = nchw_to_nhwc(outputs)
        return outputs


@@ -588,8 +582,27 @@ def conv2d(input, filters, strides, padding, data_format='NCHW', dilations=None)


 class Conv3D(Cell):
    pass
    # raise NotImplementedError

    def __init__(self, strides, padding, data_format='NDHWC', dilations=None, out_channel=None, k_size=None):
        super(Conv3D, self).__init__()
        self.data_format, self.padding = preprocess_3d_format(data_format, padding)

        if self.data_format is 'NDHWC':
            self.ms_stride = strides[1]
            self.ms_dilation = dilations[1]
            raise NotImplementedError("The optional value for data format. Currently only support “NCDHW”.")
        elif self.data_format is 'NCDHW':
            self.ms_stride = strides[2]
            self.ms_dilation = dilations[2]

        self.conv3d = P.Conv3D(
            out_channel=out_channel, kernel_size=k_size, pad_mode=self.padding, stride=self.ms_stride,
            dilation=self.ms_dilation, data_format=data_format
        )

    def construct(self, input, filters):
        outputs = self.conv3d(input, filters)
        return outputs


 def conv3d(input, filters, strides, padding, data_format='NDHWC', dilations=None, name=None):
@@ -677,23 +690,48 @@ def moments(x, axes, shift=None, keepdims=False):
    pass


 class MaxPool1d(Cell):

    def __init__(self, ksize, strides, padding, data_format=None):
        super(MaxPool1d, self).__init__()
        self.data_format, padding = preprocess_1d_format(data_format=data_format, padding=padding)
        self.expand = P.ExpandDims()
        _strides = (1, strides[0])
        _ksize = (1, ksize[0])
        if self.data_format == 'NWC':
            self.squeeze = P.Squeeze(1)
            _data_format = 'NHWC'
        if self.data_format == 'NCW':
            self.squeeze = P.Squeeze(2)
            _data_format = 'NCHW'

        self.max_pool = P.MaxPool(kernel_size=_ksize, strides=_strides, pad_mode=padding, data_format=_data_format)

    def construct(self, inputs):
        if self.data_format == 'NWC':
            x = self.expand(inputs, 1)
        if self.data_format == 'NCW':
            x = self.expand(inputs, 2)
        output = self.max_pool(x)
        output = self.squeeze(output)
        return output


 class MaxPool(Cell):

    def __init__(self, ksize, strides, padding, data_format=None):
        super(MaxPool, self).__init__()
        self.data_format, self.padding = preprocess_2d_format(data_format=data_format, padding=padding)
        ms_ksize = ksize[1]
        ms_strides = strides[1]
        self.maxpool = P.MaxPool(ksize=ms_ksize, strides=ms_strides, padding=self.padding)
        data_format, padding = preprocess_2d_format(data_format=data_format, padding=padding)

    def construct(self, inputs):
        if self.data_format == 'NHWC':
            inputs = nhwc_to_nchw(inputs)
        if data_format == 'NHWC':
            _strides = (strides[1], strides[2])
        if data_format == 'NCHW':
            _strides = (strides[2], strides[3])

        outputs = self.maxpool(inputs)
        self.maxpool = P.MaxPool(kernel_size=ksize, strides=_strides, pad_mode=padding, data_format=data_format)

        if self.data_format == 'NHWC':
            outputs = nchw_to_nhwc(outputs)
    def construct(self, inputs):
        outputs = self.maxpool(inputs)
        return outputs


@@ -710,7 +748,7 @@ def max_pool(input, ksize, strides, padding, data_format=None):
    ksize : int or list of ints
        An int or list of ints that has length 1, N or N+2.
        The size of the window for each dimension of the input tensor.
    strides : int or list of ints
    strides : list or list of ints
        An int or list of ints that has length 1, N or N+2.
        The stride of the sliding window for each dimension of the input tensor.
    padding : string
@@ -722,17 +760,54 @@ def max_pool(input, ksize, strides, padding, data_format=None):
    """
    data_format, padding = preprocess_2d_format(data_format=data_format, padding=padding)
    if data_format == 'NHWC':
        input = nhwc_to_nchw(input)

    ms_ksize = ksize[1]
    ms_strides = strides[2]
    outputs = P.MaxPool(ksize=ms_ksize, strides=ms_strides, padding=padding)(input)
    # channel first to channel last
    if data_format == 'NHWC':
        outputs = nchw_to_nhwc(outputs)
        _strides = (strides[1], strides[2])
    if data_format == 'NCHW':
        _strides = (strides[2], strides[3])
    outputs = P.MaxPool(kernel_size=ksize, strides=_strides, pad_mode=padding, data_format=data_format)(input)
    return outputs


 class AvgPool1d(Cell):

    def __init__(self, ksize, strides, padding, data_format=None):
        super(AvgPool1d, self).__init__()
        self.data_format, self.padding = preprocess_1d_format(data_format=data_format, padding=padding)
        self.kernel_size = (1, ksize[0])
        self.stride = (1, strides[0])

        if self.data_format == 'NWC':
            _data_format = 'NHWC'
            self.squeeze = P.Squeeze(1)
        if self.data_format == 'NCW':
            _data_format = 'NCHW'
            self.squeeze = P.Squeeze(2)

        self.avg_pool = P.AvgPool(
            kernel_size=self.kernel_size, strides=self.stride, pad_mode=self.padding, data_format=_data_format
        )
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.slice = P.Slice()
        self.expand = P.ExpandDims()
        self.shape = P.Shape()

    def construct(self, inputs):
        x = inputs
        batch, channel, width = self.shape(inputs)
        if width == self.kernel_size[1]:
            x = self.reduce_mean(x, 2)
        elif width - self.kernel_size[1] < self.stride[1]:
            x = self.slice(x, (0, 0, 0), (batch, channel, self.kernel_size[1]))
            x = self.reduce_mean(x, 2)
        else:
            if self.data_format == 'NCW':
                x = self.expand(x, 2)
            if self.data_format == 'NWC':
                x = self.expand(x, 1)
            x = self.avg_pool(x)
            x = self.squeeze(x)
        return x


 class AvgPool(Cell):

    def __init__(self, ksize, strides, padding, data_format=None):
@@ -740,16 +815,10 @@ class AvgPool(Cell):
        self.data_format, self.padding = preprocess_2d_format(data_format=data_format, padding=padding)
        ms_ksize = ksize[1]
        ms_strides = strides[1]
        self.avgpool = P.AvgPool(ksize=ms_ksize, strides=ms_strides, padding=padding)
        self.avgpool = P.AvgPool(ksize=ms_ksize, strides=ms_strides, padding=padding, data_format=self.data_format)

    def construct(self, inputs):
        if self.data_format == 'NHWC':
            inputs = nhwc_to_nchw(inputs)

        outputs = self.avgpool(inputs)

        if self.data_format == 'NHWC':
            outputs = nchw_to_nhwc(outputs)
        return outputs


@@ -783,6 +852,24 @@ def avg_pool(input, ksize, strides, padding):
    return outputs(input)


 class MaxPool3d(Cell):

    def __init__(self, ksize, strides, padding, data_format=None):
        super(MaxPool3d, self).__init__()
        self.data_format, self.padding = preprocess_3d_format(data_format, padding)
        if data_format == 'NDHWC':
            _strides = (strides[1], strides[2], strides[3])
        if data_format == 'NCDHW':
            _strides = (strides[2], strides[3], strides[4])
        self.max_pool3d = P.MaxPool3D(
            kernel_size=ksize, strides=_strides, padding=padding, data_format=self.data_format
        )

    def __call__(self, inputs):
        outputs = self.max_pool3d(inputs)
        return outputs


 def max_pool3d(input, ksize, strides, padding, data_format=None, name=None):
    """
    Performs the max pooling on the input.
@@ -813,6 +900,21 @@ def max_pool3d(input, ksize, strides, padding, data_format=None, name=None):
    pass


 class AvgPool3d(Cell):

    def __init__(self, ksize, strides, padding, data_format=None):
        super(AvgPool3d, self).__init__()
        self.data_format, self.padding = preprocess_3d_format(data_format, padding)
        if data_format == 'NDHWC':
            _strides = (strides[1], strides[2], strides[3])
        if data_format == 'NCDHW':
            _strides = (strides[2], strides[3], strides[4])
        raise NotImplementedError

    def __call__(self, inputs):
        pass


 def avg_pool3d(input, ksize, strides, padding, data_format=None, name=None):
    """
    Performs the average pooling on the input.
@@ -929,22 +1031,27 @@ def depthwise_conv2d(input, filter, strides, padding, data_format=None, dilation

 class Conv1d_transpose(Cell):

    def __init__(self, strides, padding, data_format, dilations=None, out_channel=None, k_size=None, in_channels=None):
    def __init__(self, stride, padding, data_format, dilations=None, out_channel=None, k_size=None, in_channels=None):
        super(Conv1d_transpose, self).__init__()
        self.data_format, self.padding = preprocess_1d_format(data_format, padding)
        self.in_channels = in_channels
        self.out_channel = out_channel
        self.strides = (1, strides)
        self.stride = (1, stride)
        self.dilations = (1, dilations)
        self.k_size = (1, k_size)

        if self.data_format == 'NWC':
            self.data_format = 'NHWC'
            self.h_axis = 1
        else:
            self.data_format = 'NCHW'
            self.h_axis = 2
        self.conv2d_transpose = P.Conv2DBackpropInput(
            out_channel=self.in_channels, kernel_size=self.k_size, pad_mode=self.padding, stride=self.strides,
            dilation=self.dilations, mode=1, group=1
            out_channel=self.in_channels, kernel_size=self.k_size, pad_mode=self.padding, stride=self.stride,
            dilation=self.dilations, mode=1, group=1, data_format=self.data_format
        )
        self.shape = P.Shape()
        self.expand_dims = P.ExpandDims()
        self.squeeze = P.Squeeze(2)
        self.squeeze = P.Squeeze(self.h_axis)

    def _deconv_output_length(self, input_length, filter_size, stride_size, dilation_size):
        length = 0
@@ -958,19 +1065,22 @@ class Conv1d_transpose(Cell):
        return length

    def construct(self, x, filters):
        if self.data_format == 'NWC':
            x = nhwc_to_nchw(x)
        x = self.expand_dims(x, 2)
        filters = self.expand_dims(filters, 2)
        n, _, h, w = self.shape(x)

        h_out = self._deconv_output_length(h, self.k_size[0], self.strides[0], self.dilations[0])
        w_out = self._deconv_output_length(w, self.k_size[1], self.strides[1], self.dilations[1])
        output = self.conv2d_transpose(x, filters, (n, self.out_channel, h_out, w_out))
        x = self.expand_dims(x, self.h_axis)
        filters = self.expand_dims(filters, self.h_axis)
        if self.data_format == 'NCHW':
            n, _, h, w = self.shape(x)
        else:
            n, h, w, _ = self.shape(x)
        h_out = self._deconv_output_length(h, self.k_size[0], self.stride[0], self.dilations[0])
        w_out = self._deconv_output_length(w, self.k_size[1], self.stride[1], self.dilations[1])
        if self.data_format == 'NCHW':
            output_size = (n, self.out_channel, h_out, w_out)
        else:
            output_size = (n, h_out, w_out, self.out_channel)
        output = self.conv2d_transpose(x, filters, output_size)
        output = self.squeeze(output)

        if self.data_format == 'NWC':
            output = nchw_to_nhwc(output)
        return output


@@ -1018,18 +1128,13 @@ class Conv2d_transpose(Cell):
        self.data_format, self.padding = preprocess_2d_format(data_format, padding)
        self.in_channels = in_channels
        self.out_channel = out_channel

        self.k_size = k_size
        if self.data_format == 'NHWC':
            self.strides = (strides[1], strides[2])
            self.dilations = (dilations[1], dilations[2])
        elif self.data_format == 'NCHW':
            self.strides = (strides[2], strides[3])
            self.dilations = (dilations[2], dilations[3])
        self.strides = strides
        self.dilations = dilations

        self.conv2d_transpose = P.Conv2DBackpropInput(
            out_channel=self.in_channels, kernel_size=self.k_size, pad_mode=self.padding, stride=self.strides,
            dilation=self.dilations, mode=1, group=1
            dilation=self.dilations, mode=1, group=1, data_format=self.data_format
        )
        self.shape = P.Shape()

@@ -1046,17 +1151,45 @@ class Conv2d_transpose(Cell):

    def construct(self, x, filters):
        if self.data_format == 'NHWC':
            x = nhwc_to_nchw(x)

        n, _, h, w = self.shape(x)

        h_out = self._deconv_output_length(h, self.k_size[0], self.strides[0], self.dilations[0])
        w_out = self._deconv_output_length(w, self.k_size[1], self.strides[1], self.dilations[1])

        output = self.conv2d_transpose(x, filters, (n, self.out_channel, h_out, w_out))
            h_axis, w_axis = 1, 2
            n, h, w, _ = self.shape(x)
        else:
            h_axis, w_axis = 2, 3
            n, _, h, w = self.shape(x)

        if self.data_format == 'NHWC':
            output = nchw_to_nhwc(x)
        if isinstance(self.strides, int):
            strides_h = self.strides
            strides_w = self.strides
        else:
            strides_list = list(self.strides)
            if len(strides_list) == 2:
                strides_h = strides_list[0]
                strides_w = strides_list[1]
            elif len(strides_list) == 4:
                strides_h = strides_list[h_axis]
                strides_w = strides_list[w_axis]

        if self.dilations is not None:
            if isinstance(self.dilations, int):
                dilations_h = self.dilations
                dilations_w = self.dilations
            else:
                dilations_list = list(self.dilations)
                if len(dilations_list) == 2:
                    dilations_h = dilations_list[0]
                    dilations_w = dilations_list[1]
                elif len(dilations_list) == 4:
                    dilations_h = dilations_list[h_axis]
                    dilations_w = dilations_list[w_axis]

        h_out = self._deconv_output_length(h, self.k_size[0], strides_h, dilations_h)
        w_out = self._deconv_output_length(w, self.k_size[1], strides_w, dilations_w)

        if self.data_format == 'NCHW':
            output_size = (n, self.out_channel, h_out, w_out)
        else:
            output_size = (n, h_out, w_out, self.out_channel)
        output = self.conv2d_transpose(x, filters, output_size)

        return output

@@ -1099,7 +1232,22 @@ def conv2d_transpose(


 class Conv3d_transpose(Cell):
    pass

    def __init__(
        self, strides, padding, data_format='NDHWC', dilations=None, name=None, out_channel=None, k_size=None,
        in_channels=None
    ):
        super(Conv3d_transpose, self).__init__()
        self.data_format, self.padding = preprocess_3d_format(data_format, padding)

        self.conv3d_transpose = P.Conv3DTranspose(
            in_channel=in_channels, out_channel=out_channel, kernel_size=k_size, mode=1, pad_mode=self.padding,
            stride=strides, dilation=dilations, data_format=self.data_format
        )

    def construct(self, input, filters):
        output = self.conv3d_transpose(input, filters)
        return output


 def conv3d_transpose(
@@ -1143,16 +1291,22 @@ class BatchNorm(Cell):
    @cell_attr_register
    def __init__(
        self, num_features, epsilon=1e-5, decay=0.9, gamma=None, beta=None, moving_mean=None, moving_var=None,
        is_train=None, device_num_each_group=1, data_format='channels_last'
        is_train=None, device_num_each_group=1, process_groups=0, data_format='NCHW'
    ):
        super(BatchNorm, self).__init__()
        if data_format in ["channels_last", "NHWC", "nhwc"]:
            data_format = "NHWC"
        elif data_format in ["channels_first", "NCHW", "nchw"]:
            data_format = "NCHW"
        validator.check_value_type('num_features', num_features, [int], self.cls_name)
        if num_features < 1:
            raise ValueError("num_features must be at least 1")

        if decay < 0 or decay > 1:
            raise ValueError("momentum should be a number in range [0, 1], but got {}".format(decay))

        self.data_format = data_format
        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name)
        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
            raise ValueError("NHWC format only support in GPU target.")
        self.use_batch_statistics = is_train
        self.num_features = num_features
        self.eps = epsilon
@@ -1160,19 +1314,47 @@ class BatchNorm(Cell):
        self.moving_variance = moving_var
        self.gamma = gamma
        self.beta = beta
        self.group = check_int_positive(device_num_each_group)
        self.group_device_num = validator.check_positive_int(device_num_each_group)
        self.process_groups = process_groups
        self.is_global = False
        if self.group != 1:
        self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
        global SYNC_BN_GROUP_NAME
        # for GlobalBatchNorm
        if self.group_device_num != 1:
            self.rank_id = get_rank()
            self.rank_size = get_group_size()
            self.device_list = [i for i in range(0, self.rank_size)]
            self.rank_list = self.list_group(self.device_list, self.group)
            self.rank_list = self.list_group(self.device_list, self.group_device_num)
            self.rank_list_idx = len(self.rank_list)
            for i in range(self.rank_list_idx):
                if self.rank_id in self.rank_list[i] and self.group != 1:
                if self.rank_id in self.rank_list[i]:
                    self.is_global = True
                    management.create_group('group' + str(i), self.rank_list[i])
                    self.all_reduce = P.AllReduce(P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1)
                    if SYNC_BN_GROUP_NAME == "":
                        SYNC_BN_GROUP_NAME = "sync_bn_group" + str(i)
                        management.create_group(SYNC_BN_GROUP_NAME, self.rank_list[i])
        # for SyncBatchNorm
        if self.process_groups != 0:
            self.rank_id = get_rank()
            self.rank_size = get_group_size()
            if self.process_groups is not None:
                validator.check_isinstance("process_groups", self.process_groups, list)
                self._check_rank_ids(self.process_groups, self.rank_size)
                for i in range(len(self.process_groups)):
                    validator.check_isinstance("process_groups[" + str(i) + "]", self.process_groups[i], list)
                    self.group_device_num = len(self.process_groups[i])
                    if self.rank_id in self.process_groups[i] and self.group_device_num > 1:
                        self.is_global = True
                        if SYNC_BN_GROUP_NAME == "":
                            SYNC_BN_GROUP_NAME = "sync_bn_group" + str(i)
                            management.create_group(SYNC_BN_GROUP_NAME, self.process_groups[i])
            elif self.rank_size > 1:
                self.is_global = True
                self.group_device_num = self.rank_size
                self.device_list = [i for i in range(0, self.rank_size)]
                if SYNC_BN_GROUP_NAME == "":
                    SYNC_BN_GROUP_NAME = "sync_bn_group0"
                    management.create_group(SYNC_BN_GROUP_NAME, self.device_list)

        self.shape = P.Shape()
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.square = P.Square()
@@ -1180,8 +1362,7 @@ class BatchNorm(Cell):
        self.cast = P.Cast()
        self.dtype = P.DType()
        self.reshape = P.Reshape()
        self.is_ascend = context.get_context("device_target") == "Ascend"
        self.is_gpu = context.get_context("device_target") == "GPU"
        self._target = context.get_context("device_target")
        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
        self.momentum = 1.0 - decay
        if context.get_context("enable_ge"):
@@ -1189,16 +1370,13 @@ class BatchNorm(Cell):
        else:
            self.is_ge_backend = False

        if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
            self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps)
        elif self.is_gpu:
            self.bn_train = P.FusedBatchNormEx(mode=1, epsilon=self.eps, momentum=self.momentum)
        else:
            self.bn_train = P.FusedBatchNorm(mode=1, epsilon=self.eps, momentum=self.momentum)
        self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps)
        self.enable_global_sync = self.is_global and (self.is_ge_backend or (self.is_graph_mode and self.is_ascend))
        self.enable_default_train = self.is_graph_mode and not self.is_global and \
                                    (self.is_ge_backend or self.is_ascend)
        self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps, momentum=self.momentum, data_format=self.format)
        if self.is_global:
            self.bn_train = inner.SyncBatchNorm(
                epsilon=self.eps, momentum=self.momentum, group=SYNC_BN_GROUP_NAME, device_num=self.group_device_num
            )

        self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps, data_format=self.format)

        data_parallel_strategy = ((1, ), (1, ))
        data_parallel_strategy_one = ((1, ), ())
@@ -1209,9 +1387,6 @@ class BatchNorm(Cell):
        self.assign_sub_mean = P.AssignSub().shard(data_parallel_strategy)
        self.assign_sub_var = P.AssignSub().shard(data_parallel_strategy)

    def _check_data_dim(self, x):
        raise NotImplementedError

    def list_group(self, world_rank, group_size):
        if group_size > get_group_size():
            raise ValueError(
@@ -1224,101 +1399,37 @@ class BatchNorm(Cell):
        group_list = [list(i) for i in world_rank_list]
        return group_list

    def _global_sync(self, x, axes, re_shape):
        """calculate global batch normalization output"""
        x_mean = self.reduce_mean(x, axes)
        x_mean_square = self.reduce_mean(self.square(x), axes)
        global_batch_mean = self.all_reduce(x_mean) / self.group
        global_batch_mean_square = self.all_reduce(x_mean_square) / self.group
        global_mean = global_batch_mean
        global_var = global_batch_mean_square - self.square(global_mean)
        var_sqrt = self.sqrt(global_var + self.eps)
        mean_first = (x - global_mean) / var_sqrt
        y = mean_first * self.reshape(self.gamma, re_shape) + self.reshape(self.beta, re_shape)

        mean_sub = self.sub_mean(self.reshape(self.moving_mean, re_shape), global_mean)
        tmp_mean = self.mul_mean(mean_sub, self.cast(self.momentum, self.dtype(mean_sub)))
        mean_sub2 = self.sub_var(self.reshape(self.moving_mean, re_shape), global_var)
        tmp_variance = self.mul_var(mean_sub2, self.cast(self.momentum, self.dtype(mean_sub2)))
        y = F.depend(y, self.assign_sub_mean(self.moving_mean, self.reshape(tmp_mean, self.shape(self.moving_mean))))
        y = F.depend(
            y, self.assign_sub_var(self.moving_variance, self.reshape(tmp_variance, self.shape(self.moving_variance)))
        )
        return y

    def get_dim(self, input):
        dim = len(self.shape(input))
        if dim == 2:
            return '1d'
        elif dim == 4:
            return '2d'
        else:
            raise ValueError("The input must has 2 dims or 4 dims.")

    def _shape_check_bn(self, in_shape, in_dims):
        dim = len(in_shape)
        if in_dims == '1d' and dim != 2:
            raise ValueError("The input must has 2 dims.")
        if in_dims == '2d' and dim != 4:
            raise ValueError("The input must has 4 dims.")
        if in_dims == 'both' and dim != 2 and dim != 4:
            raise ValueError("The input must has 2 dims or 4 dims.")

    def _shape_infer(self, x_shape, num_feature):
        """global batch normalization shape and axes infer"""
        if len(x_shape) == 4:
            axes = (0, 2, 3)
            re_shape = (1, num_feature, 1, 1)
        else:
            axes = (0, )
            re_shape = (1, num_feature)
        return axes, re_shape
    def _check_rank_ids(self, process_groups, rank_size):
        seen = set()
        for rid in itertools.chain(*process_groups):
            validator.check_int_range(rid, 0, rank_size, Rel.INC_LEFT, "rank id in process_groups")
            if rid in seen:
                raise ValueError("rank id in process_groups should not be duplicated.")
            seen.add(rid)

    def construct(self, inputs):
        x = inputs
        self._shape_check_bn(self.shape(x), self.get_dim(x))
        if self.use_batch_statistics is None:
            flag = self.training
        else:
            flag = self.use_batch_statistics
        x_shape = F.shape(inputs)
        if len(x_shape) == 5:
            inputs = self.reshape(inputs, (x_shape[0], x_shape[1], x_shape[2] * x_shape[3], x_shape[4]))

        flag = self.use_batch_statistics

        if flag:
            if self.enable_global_sync:
                if self.data_format == 'channels_last' and self.get_dim(x) == '2d':
                    x = nhwc_to_nchw(x)
                axes, re_shape = self._shape_infer(F.shape(x), self.num_features)
                y = self._global_sync(x, axes, re_shape)
                if self.data_format == 'channels_last' and self.get_dim(x) == '2d':
                    y = nchw_to_nhwc(y)
                return y

            if self.enable_default_train:
                if self.data_format == 'channels_last' and self.get_dim(x) == '2d':
                    x = nhwc_to_nchw(x)
                y, batch_mean, batch_var, _, _ = self.bn_train(x, self.gamma, self.beta, None, None)

                mean_sub = self.sub_mean(self.moving_mean, batch_mean)
                temp_mean = self.mul_mean(mean_sub, self.momentum)
                mean_sub2 = self.sub_var(self.moving_variance, batch_var)
                temp_variance = self.mul_var(mean_sub2, self.momentum)
                y = F.depend(y, self.assign_sub_mean(self.moving_mean, temp_mean))
                y = F.depend(y, self.assign_sub_var(self.moving_variance, temp_variance))
                if self.data_format == 'channels_last' and self.get_dim(x) == '2d':
                    y = nchw_to_nhwc(y)
                return y

            if self.data_format == 'channels_last' and self.get_dim(x) == '2d':
                x = nhwc_to_nchw(x)
            y = self.bn_train(x, self.gamma, self.beta, self.moving_mean, self.moving_variance)[0]
            if self.data_format == 'channels_last' and self.get_dim(x) == '2d':
                y = nchw_to_nhwc(y)
            return y
        if self.data_format == 'channels_last' and self.get_dim(x) == '2d':
            x = nhwc_to_nchw(x)
        y = self.bn_infer(x, self.gamma, self.beta, self.moving_mean, self.moving_variance)[0]
        if self.data_format == 'channels_last' and self.get_dim(x) == '2d':
            y = nchw_to_nhwc(y)
        return y
            output = self.bn_train(inputs, self.gamma, self.beta, self.moving_mean, self.moving_variance)[0]

            if len(x_shape) == 5:
                output = self.reshape(output, x_shape)
            return output

        output = self.bn_infer(inputs, self.gamma, self.beta, self.moving_mean, self.moving_variance)[0]
        if len(x_shape) == 5:
            output = self.reshape(output, x_shape)
        return output

    def extend_repr(self):
        return 'num_features={}, eps={}, momentum={}, gamma={}, beta={}, moving_mean={}, moving_variance={}'.format(
            self.num_features, self.eps, self.momentum, self.gamma, self.beta, self.moving_mean, self.moving_variance
        )


 class GroupConv2D(Cell):
@@ -1337,17 +1448,11 @@ class GroupConv2D(Cell):

        self.conv2d = P.Conv2D(
            out_channel=out_channel, kernel_size=k_size, pad_mode=self.padding, stride=self.ms_stride,
            dilation=self.ms_dilation, mode=1, group=groups
            dilation=self.ms_dilation, mode=1, group=groups, data_format=self.data_format
        )

    def construct(self, inputs, filters):
        if self.data_format == 'NHWC':
            inputs = nhwc_to_nchw(inputs)

        outputs = self.conv2d(inputs, filters)

        if self.data_format == 'NHWC':
            outputs = nchw_to_nhwc(outputs)
        return outputs


@@ -1407,30 +1512,24 @@ class SeparableConv2D(Cell):
        if self.data_format is 'NHWC':
            self.ms_stride = strides[1]
            self.ms_dilation = dilations[1]
            # self.transpose = P.Transpose()
        elif self.data_format is 'NCHW':
            self.ms_stride = strides[2]
            self.ms_dilation = dilations[2]

        self.depthwise_conv = P.Conv2D(
            out_channel=self.in_channel * self.depth_multiplier, kernel_size=self.k_size, pad_mode=self.padding,
            stride=self.ms_stride, dilation=self.ms_dilation, mode=1, group=self.in_channel
            stride=self.ms_stride, dilation=self.ms_dilation, mode=1, group=self.in_channel,
            data_format=self.data_format
        )

        self.pointwise_conv = P.Conv2D(
            out_channel=self.out_channel, kernel_size=(1, 1), pad_mode=self.padding, stride=(1, 1), dilation=(1, 1),
            mode=1, group=1
            mode=1, group=1, data_format=self.data_format
        )

    def construct(self, x, depthwise_filters, pointwise_filters):
        if self.data_format == 'NHWC':
            x = nhwc_to_nchw(x)

        outputs = self.depthwise_conv(x, depthwise_filters)
        outputs = self.pointwise_conv(outputs, pointwise_filters)

        if self.data_format == 'NHWC':
            outputs = nchw_to_nhwc(outputs)
        return outputs


@@ -1440,26 +1539,27 @@ class AdaptiveMeanPool1D(Cell):
        super(AdaptiveMeanPool1D, self).__init__()
        self.data_format, _ = preprocess_1d_format(data_format, None)
        self.output_size = output_size
        if self.data_format == 'NWC':
            self.data_format = 'NHWC'
            self.h_axis = 1
        else:
            self.data_format = 'NCHW'
            self.h_axis = 2
        self.expand_dims = P.ExpandDims()
        self.squeeze = P.Squeeze(2)
        self.squeeze = P.Squeeze(self.h_axis)
        self.shape = P.Shape()

    def construct(self, inputs):

        if self.data_format == 'NWC':
            n, w, c = inputs.shape
            inputs = nhwc_to_nchw(inputs)
        if self.data_format == 'NHWC':
            n, w, c = self.shape(inputs)
        else:
            n, c, w = inputs.shape
        inputs = self.expand_dims(inputs, 2)

            n, c, w = self.shape(inputs)
        inputs = self.expand_dims(inputs, self.h_axis)
        stride = (1, w // self.output_size)
        kernel = (1, w - (self.output_size - 1) * stride[1])
        outputs = P.AvgPool(kernel_size=kernel, strides=stride, pad_mode='VALID')(inputs)
        outputs = P.AvgPool(kernel_size=kernel, strides=stride, pad_mode='VALID', data_format=self.data_format)(inputs)
        outputs = self.squeeze(outputs)

        if self.data_format == 'NWC':
            outputs = nchw_to_nhwc(outputs)

        return outputs


@@ -1469,31 +1569,38 @@ class AdaptiveMeanPool2D(Cell):
        super(AdaptiveMeanPool2D, self).__init__()
        self.data_format, _ = preprocess_2d_format(data_format, None)
        self.output_size = output_size
        if self.data_format == 'NHWC':
            self.h_axis = 1
        else:
            self.h_axis = 2
        self.shape = P.Shape()

    def construct(self, inputs):

        if self.data_format == 'NHWC':
            n, h, w, c = inputs.shape
            inputs = nhwc_to_nchw(inputs)
            n, h, w, c = self.shape(inputs)
        else:
            n, c, h, w = inputs.shape
            n, c, h, w = self.shape(inputs)

        out_h, out_w = self.output_size
        stride_h = h // out_h
        kernel_h = h - (out_h - 1) * stride_h
        stride_w = w // out_w
        kernel_w = w - (out_w - 1) * stride_w
        outputs = P.AvgPool(kernel_size=(kernel_h, kernel_w), strides=(stride_h, stride_w), pad_mode='VALID')(inputs)

        if self.data_format == 'NHWC':
            outputs = nchw_to_nhwc(outputs)
        outputs = P.AvgPool(
            kernel_size=(kernel_h, kernel_w), strides=(stride_h, stride_w), pad_mode='VALID',
            data_format=self.data_format
        )(inputs)

        return outputs


 class AdaptiveMeanPool3D(Cell):

    pass
    def __init__(self, output_size, data_format):
        pass

    def __call__(self, inputs):
        raise NotImplementedError


 class AdaptiveMaxPool1D(Cell):
@@ -1502,26 +1609,28 @@ class AdaptiveMaxPool1D(Cell):
        super(AdaptiveMaxPool1D, self).__init__()
        self.data_format, _ = preprocess_1d_format(data_format, None)
        self.output_size = output_size
        if self.data_format == 'NWC':
            self.data_format = 'NHWC'
            self.h_axis = 1
        else:
            self.data_format = 'NCHW'
            self.h_axis = 2
        self.expand_dims = P.ExpandDims()
        self.squeeze = P.Squeeze(2)
        self.squeeze = P.Squeeze(self.h_axis)
        self.shape = P.Shape()

    def construct(self, inputs):

        if self.data_format == 'NWC':
            n, w, c = inputs.shape
            inputs = nhwc_to_nchw(inputs)
        if self.data_format == 'NHWC':
            n, w, c = self.shape(inputs)
        else:
            n, c, w = inputs.shape
        inputs = self.expand_dims(inputs, 2)

            n, c, w = self.shape(inputs)
        inputs = self.expand_dims(inputs, self.h_axis)
        stride = (1, w // self.output_size)
        kernel = (1, w - (self.output_size - 1) * stride[1])
        outputs = P.MaxPool(kernel_size=kernel, strides=stride, pad_mode='VALID')(inputs)
        outputs = P.MaxPool(kernel_size=kernel, strides=stride, pad_mode='VALID', data_format=self.data_format)(inputs)
        outputs = self.squeeze(outputs)

        if self.data_format == 'NWC':
            outputs = nchw_to_nhwc(outputs)

        return outputs


@@ -1531,31 +1640,37 @@ class AdaptiveMaxPool2D(Cell):
        super(AdaptiveMaxPool2D, self).__init__()
        self.data_format, _ = preprocess_2d_format(data_format, None)
        self.output_size = output_size
        if self.data_format == 'NHWC':
            self.h_axis = 1
        else:
            self.h_axis = 2
        self.shape = P.Shape()

    def construct(self, inputs):

        if self.data_format == 'NHWC':
            n, h, w, c = inputs.shape
            inputs = nhwc_to_nchw(inputs)
            n, h, w, c = self.shape(inputs)
        else:
            n, c, h, w = inputs.shape

            n, c, h, w = self.shape(inputs)
        out_h, out_w = self.output_size
        stride_h = h // out_h
        kernel_h = h - (out_h - 1) * stride_h
        stride_w = w // out_w
        kernel_w = w - (out_w - 1) * stride_w
        outputs = P.MaxPool(kernel_size=(kernel_h, kernel_w), strides=(stride_h, stride_w), pad_mode='VALID')(inputs)

        if self.data_format == 'NHWC':
            outputs = nchw_to_nhwc(outputs)
        outputs = P.MaxPool(
            kernel_size=(kernel_h, kernel_w), strides=(stride_h, stride_w), pad_mode='VALID',
            data_format=self.data_format
        )(inputs)

        return outputs


 class AdaptiveMaxPool3D(Cell):

    pass
    def __init__(self, output_size, data_format):
        pass

    def __call__(self, inputs):
        raise NotImplementedError


 class BinaryConv2D(Cell):
@@ -1566,14 +1681,13 @@ class BinaryConv2D(Cell):
        if self.data_format is 'NHWC':
            self.ms_stride = strides[1]
            self.ms_dilation = dilations[1]
            # self.transpose = P.Transpose()
        elif self.data_format is 'NCHW':
            self.ms_stride = strides[2]
            self.ms_dilation = dilations[2]

        self.conv2d = P.Conv2D(
            out_channel=out_channel, kernel_size=k_size, pad_mode=self.padding, stride=self.ms_stride,
            dilation=self.ms_dilation, mode=1, group=1
            dilation=self.ms_dilation, mode=1, group=1, data_format=self.data_format
        )

        @bprop_getters.register(P.Sign)
@@ -1590,16 +1704,9 @@ class BinaryConv2D(Cell):

    def construct(self, inputs, filters):

        if self.data_format == 'NHWC':
            inputs = nhwc_to_nchw(inputs)

        filters = self.sign(filters)

        outputs = self.conv2d(inputs, filters)

        if self.data_format == 'NHWC':
            outputs = nchw_to_nhwc(outputs)

        return outputs


--- a/tensorlayer/backend/ops/paddle_backend.py
+++ b/tensorlayer/backend/ops/paddle_backend.py
@@ -342,7 +342,7 @@ def reshape(tensor, shape):
    -------
        A Tensor. Has the same type as tensor
    """
    raise NotImplementedError
    return pd.reshape(tensor, shape)


 class Concat(object):
@@ -372,7 +372,7 @@ def concat(values, axis):
    raise NotImplementedError


 def convert_to_tensor(value, dtype=None):
 def convert_to_tensor(value, dtype=float32):
    """
    Converts the given value to a Tensor.

@@ -387,7 +387,11 @@ def convert_to_tensor(value, dtype=None):
    -------
        A Tensor based on value.
    """
    raise NotImplementedError
    return pd.to_tensor(value, dtype=dtype)


 def convert_to_numpy(value):
    return value.numpy()


 def sqrt(x):
@@ -418,15 +422,10 @@ class ReduceSum(object):
 class ReduceMean(object):

    def __init__(self, axis):
        if axis == [1, 2]:
            self.data_format = 'NHWC'
        elif axis == [2, 3]:
            self.data_format = 'NCHW'
        else:
            raise ("`data_format` should have one of the following values: [`channels_last`, `channels_first`]")
        self.axis = axis

    def __call__(self, inputs):
        raise NotImplementedError
        return pd.mean(inputs, axis=self.axis)


 def reduce_mean(input_tensor, axis=None):
@@ -454,15 +453,10 @@ def reduce_mean(input_tensor, axis=None):
 class ReduceMax(object):

    def __init__(self, axis):
        if axis == [1, 2]:
            self.data_format = 'NHWC'
        elif axis == [2, 3]:
            self.data_format = 'NCHW'
        else:
            raise ("`data_format` should have one of the following values: [`channels_last`, `channels_first`]")
        self.axis = axis

    def __call__(self, inputs):
        raise NotImplementedError
        return pd.max(inputs, axis=self.axis)


 def reduce_max(input_tensor, axis=None):
@@ -817,6 +811,12 @@ def split(value, num_or_size_splits, axis=0, num=None):
    pass


 class Floor(object):

    def __call__(self, *args, **kwargs):
        raise NotImplementedError


 def floor(x):
    raise NotImplementedError

@@ -875,7 +875,7 @@ class NCELoss(object):
        pass


 class Not_equal(object):
 class NotEqual(object):

    def __init__(self):
        pass
@@ -884,7 +884,7 @@ class Not_equal(object):
        pass


 class Count_nonzero(object):
 class CountNonzero(object):

    def __init__(self, keepdims=None, dtype="int64"):
        pass
@@ -950,6 +950,12 @@ class Sign(object):
        raise NotImplementedError


 class Ceil(object):

    def __call__(self, *args, **kwargs):
        raise NotImplementedError


 def ceil(x):
    raise NotImplementedError

--- a/tensorlayer/backend/ops/paddle_nn.py
+++ b/tensorlayer/backend/ops/paddle_nn.py
@@ -46,10 +46,10 @@ def preprocess_1d_format(data_format, padding):
        str "NWC" or "NCW" and "SAME" or "VALID"
    """

    if data_format in ["channels_last", "NWC"]:
        data_format = "NWC"
    elif data_format in ["channels_first", "NCW"]:
        data_format = "NCW"
    if data_format in ["channels_last", "NWC", "NLC"]:
        data_format = "NLC"
    elif data_format in ["channels_first", "NCW", "NCL"]:
        data_format = "NCL"
    elif data_format == None:
        data_format = None
    else:
@@ -128,7 +128,15 @@ def nchw_to_nhwc(x):
        channels last tensor data
    """

    pass
    if len(x.shape) == 3:
        x = pd.transpose(x, (0, 2, 1))
    elif len(x.shape) == 4:
        x = pd.transpose(x, (0, 2, 3, 1))
    elif len(x.shape) == 5:
        x = pd.transpose(x, (0, 2, 3, 4, 1))
    else:
        raise Exception("Unsupported dimensions")
    return x


 def nhwc_to_nchw(x):
@@ -145,7 +153,15 @@ def nhwc_to_nchw(x):
        channels first tensor data
    """

    pass
    if len(x.shape) == 3:
        x = pd.transpose(x, (0, 2, 1))
    elif len(x.shape) == 4:
        x = pd.transpose(x, (0, 3, 1, 2))
    elif len(x.shape) == 5:
        x = pd.transpose(x, (0, 4, 1, 2, 3))
    else:
        raise Exception("Unsupported dimensions")
    return x


 class ReLU(object):
@@ -338,7 +354,8 @@ class Dropout(object):
        self.seed = seed

    def __call__(self, inputs):
        raise NotImplementedError
        output = F.dropout(inputs, p=self.keep, mode='upscale_in_train')
        return output


 class BiasAdd(object):
@@ -357,11 +374,22 @@ class BiasAdd(object):
        A Tensor with the same type as value.
    """

    def __init__(self, data_format='NHWC'):
        self.data_format = data_format
    def __init__(self, data_format='channels_last'):
        super(BiasAdd, self).__init__()
        if data_format in ['channels_first', 'NCL', 'NCHW', 'NCDHW']:
            self.data_format = 'channels_first'
        elif data_format in ['channels_last', 'NLC', 'NHWC', 'NDHWC']:
            self.data_format = 'channels_last'
        else:
            raise ("Unsupported data format: " + str(data_format))

    def __call__(self, x, bias):
        return pd.add(x, bias)
        if len(x.shape) > 2 and self.data_format == 'channels_first':
            x = nchw_to_nhwc(x)
        outputs = pd.add(x, bias)
        if len(x.shape) > 2 and self.data_format == 'channels_first':
            outputs = nhwc_to_nchw(outputs)
        return outputs


 def bias_add(x, bias):
@@ -383,12 +411,26 @@ def bias_add(x, bias):
    -------
        A Tensor with the same type as value.
    """
    raise NotImplementedError

    #TODO the bias_add only supports channels_last
    outputs = pd.add(x, bias)
    return outputs


 class Conv1D(object):
    pass
    # raise NotImplementedError

    def __init__(self, stride, padding, data_format='NWC', dilations=None, out_channel=None, k_size=None):
        super(Conv1D, self).__init__()
        self.data_format, self.padding = preprocess_1d_format(padding=padding, data_format=data_format)
        self.stride = stride
        self.dilations = dilations

    def __call__(self, input, filters):
        output = F.conv1d(
            x=input, weight=filters, stride=self.stride, dilation=self.dilations, data_format=self.data_format,
            padding=self.padding
        )
        return output


 def conv1d(input, filters, stride, padding, data_format='NWC', dilations=None, name=None):
@@ -420,23 +462,29 @@ def conv1d(input, filters, stride, padding, data_format='NWC', dilations=None, n
        A Tensor. Has the same type as input.
    """

    pass
    outputs = F.conv1d(
        x=input, weight=filters, stride=stride, padding=padding, data_format=data_format, dilation=dilations, name=name
    )
    return outputs


 class Conv2D(object):

    def __init__(self, strides, padding, data_format='NHWC', dilations=None, out_channel=None, k_size=None):
        self.data_format, self.padding = preprocess_2d_format(data_format, padding)
        self.ksize = k_size[0]
        if self.data_format is 'NHWC':
            self.dg_stride = strides[1]
            self.dg_dilation = dilations[1]
            self._stride = (strides[1], strides[2])
            self._dilation = (dilations[1], dilations[2])
        elif self.data_format is 'NCHW':
            self.dg_stride = strides[2]
            self.dg_dilation = dilations[2]
            self._stride = (strides[2], strides[3])
            self._dilation = (dilations[2], dilations[3])

    def __call__(self, inputs, filters):
        raise NotImplementedError
        outputs = F.conv2d(
            x=inputs, weight=filters, stride=self._stride, dilation=self._dilation, padding=self.padding,
            data_format=self.data_format
        )
        return outputs


 def conv2d(input, filters, strides, padding, data_format='NCHW', dilations=None):
@@ -464,12 +512,36 @@ def conv2d(input, filters, strides, padding, data_format='NCHW', dilations=None)
    -------
        A Tensor. Has the same type as input.
    """
    raise NotImplementedError
    data_format, padding = preprocess_2d_format(data_format, padding)
    if data_format is 'NHWC':
        _stride = (strides[1], strides[2])
        _dilation = (dilations[1], dilations[2])
    elif data_format is 'NCHW':
        _stride = (strides[2], strides[3])
        _dilation = (dilations[2], dilations[3])
    outputs = F.conv2d(
        x=input, weight=filters, stride=_stride, dilation=_dilation, padding=padding, data_format=data_format
    )
    return outputs


 class Conv3D(object):
    pass
    # raise NotImplementedError

    def __init__(self, strides, padding, data_format='NDHWC', dilations=None, out_channel=None, k_size=None):
        self.data_format, self.padding = preprocess_3d_format(data_format, padding)
        if data_format is 'NDHWC':
            self._strides = (strides[1], strides[2], strides[3])
            self._dilations = (dilations[1], dilations[2], dilations[3])
        elif data_format is 'NCDHW':
            self._strides = (strides[2], strides[3], strides[4])
            self._dilations = (dilations[2], dilations[3], dilations[4])

    def __call__(self, input, filters):
        outputs = F.conv3d(
            x=input, weight=filters, stride=self._strides, dilation=self._dilations, data_format=self.data_format,
            padding=self.padding
        )
        return outputs


 def conv3d(input, filters, strides, padding, data_format='NDHWC', dilations=None, name=None):
@@ -484,7 +556,7 @@ def conv3d(input, filters, strides, padding, data_format='NDHWC', dilations=None
    filters : tensor
        Must have the same type as input. Shape [filter_depth, filter_height, filter_width, in_channels, out_channels].
        in_channels must match between input and filters.
    strides : list of ints
    strides : tuple of ints
        A list of ints that has length >= 5. 1-D tensor of length 5.
        The stride of the sliding window for each dimension of input.
        Must have strides[0] = strides[4] = 1.
@@ -494,7 +566,7 @@ def conv3d(input, filters, strides, padding, data_format='NDHWC', dilations=None
        An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC". The data format of the input and output data.
        With the default format "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, in_width, in_channels].
        Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width].
    dilations : list of ints
    dilations : touple of ints
        Defaults to [1, 1, 1, 1, 1]. 1-D tensor of length 5. The dilation factor for each dimension of input.
        If set to k > 1, there will be k-1 skipped cells between each filter element on that dimension.
        The dimension order is determined by the value of data_format, see above for details.
@@ -506,8 +578,18 @@ def conv3d(input, filters, strides, padding, data_format='NDHWC', dilations=None
    -------
        A Tensor. Has the same type as input.
    """

    raise NotImplementedError
    data_format, padding = preprocess_3d_format(data_format, padding)
    if data_format is 'NDHWC':
        _strides = (strides[1], strides[2], strides[3])
        _dilations = (dilations[1], dilations[2], dilations[3])
    elif data_format is 'NCDHW':
        _strides = (strides[2], strides[3], strides[4])
        _dilations = (dilations[2], dilations[3], dilations[4])
    outputs = F.conv3d(
        x=input, weight=filters, stride=_strides, dilation=_dilations, data_format=data_format, padding=padding,
        name=name
    )
    return outputs


 def lrn(inputs, depth_radius, bias, alpha, beta):
@@ -557,15 +639,37 @@ def moments(x, axes, shift=None, keepdims=False):
    pass


 class MaxPool1d(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_1d_format(data_format=data_format, padding=padding)
        self.ksize = ksize
        self.strides = strides

    def __call__(self, inputs):
        if self.data_format == 'NLC':
            inputs = nhwc_to_nchw(inputs)
        outputs = F.max_pool1d(inputs, self.ksize, self.strides, self.padding)
        if self.data_format == 'NLC':
            outputs = nchw_to_nhwc(outputs)
        return outputs


 class MaxPool(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_2d_format(data_format, padding)
        self.ksize = ksize
        self.strides = strides
        if self.data_format is 'NHWC':
            self._stride = (strides[1], strides[2])
        elif self.data_format is 'NCHW':
            self._stride = (strides[2], strides[3])

    def __call__(self, inputs):
        raise NotImplementedError
        outputs = F.max_pool2d(
            x=inputs, kernel_size=self.ksize, stride=self._stride, padding=self.padding, data_format=self.data_format
        )
        return outputs


 def max_pool(input, ksize, strides, padding, data_format=None):
@@ -594,15 +698,38 @@ def max_pool(input, ksize, strides, padding, data_format=None):
    pass


 class AvgPool1d(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_1d_format(data_format=data_format, padding=padding)
        self.ksize = ksize
        self.strides = strides

    def __call__(self, inputs):
        if self.data_format == 'NLC':
            inputs = nhwc_to_nchw(inputs)
        outputs = F.avg_pool1d(inputs, self.ksize, self.strides, self.padding)
        if self.data_format == 'NLC':
            outputs = nchw_to_nhwc(outputs)
        return outputs


 class AvgPool(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_2d_format(data_format, padding)
        self.filter_size = ksize
        self.strides = strides
        if self.data_format is 'NHWC':
            self._stride = (strides[1], strides[2])
        elif self.data_format is 'NCHW':
            self._stride = (strides[2], strides[3])

    def __call__(self, inputs):
        raise NotImplementedError
        outputs = F.avg_pool2d(
            inputs, kernel_size=self.filter_size, stride=self._stride, padding=self.padding,
            data_format=self.data_format
        )
        return outputs


 def avg_pool(input, ksize, strides, padding):
@@ -631,6 +758,23 @@ def avg_pool(input, ksize, strides, padding):
    pass


 class MaxPool3d(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_3d_format(data_format, padding)
        self.ksize = ksize
        if self.data_format == 'NCDHW':
            self.strides = (strides[2], strides[3], strides[4])
        if self.data_format == 'NDHWC':
            self.strides = (strides[1], strides[2], strides[3])

    def __call__(self, inputs):
        outputs = F.max_pool3d(
            inputs, kernel_size=self.ksize, stride=self.strides, padding=self.padding, data_format=self.data_format
        )
        return outputs


 def max_pool3d(input, ksize, strides, padding, data_format=None, name=None):
    """
    Performs the max pooling on the input.
@@ -661,6 +805,23 @@ def max_pool3d(input, ksize, strides, padding, data_format=None, name=None):
    pass


 class AvgPool3d(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_3d_format(data_format, padding)
        self.ksize = ksize
        if self.data_format == 'NCDHW':
            self.strides = (strides[2], strides[3], strides[4])
        if self.data_format == 'NDHWC':
            self.strides = (strides[1], strides[2], strides[3])

    def __call__(self, inputs):
        outputs = F.avg_pool3d(
            inputs, kernel_size=self.ksize, stride=self.strides, padding=self.padding, data_format=self.data_format
        )
        return outputs


 def avg_pool3d(input, ksize, strides, padding, data_format=None, name=None):
    """
    Performs the average pooling on the input.
@@ -769,18 +930,26 @@ def depthwise_conv2d(input, filter, strides, padding, data_format=None, dilation
 class Conv1d_transpose(object):

    def __init__(
        self, strides, padding, data_format='NWC', dilations=None, out_channel=None, k_size=None, in_channels=None
        self, stride, padding, data_format='NWC', dilations=None, out_channel=None, k_size=None, in_channels=None
    ):
        self.strides = strides
        self.stride = stride
        self.dilations = dilations
        self.data_format, self.padding = preprocess_1d_format(data_format, padding)

    def __call__(self, input, filters):
        raise NotImplementedError
        out = F.conv1d_transpose(
            x=input,
            weight=filters,
            padding=self.padding,
            stride=self.stride,
            dilation=self.dilations,
            data_format=self.data_format,
        )
        return out


 def conv1d_transpose(
    input, filters, output_shape, strides, padding='SAME', data_format='NWC', dilations=None, name=None
    input, filters, output_shape, stride, padding='SAME', data_format='NWC', dilations=None, name=None
 ):
    """
    The transpose of conv1d.
@@ -813,7 +982,17 @@ def conv1d_transpose(
    -------
        A Tensor with the same type as value.
    """
    pass
    data_format, padding = preprocess_1d_format(data_format, padding)
    output = F.conv1d_transpose(
        x=input,
        weight=filters,
        stride=stride,
        padding=padding,
        dilation=dilations,
        data_format=data_format,
        output_size=output_shape,
    )
    return output


 class Conv2d_transpose(object):
@@ -824,11 +1003,14 @@ class Conv2d_transpose(object):
    ):
        self.strides = strides
        self.dilations = dilations
        self.name = name
        self.data_format, self.padding = preprocess_2d_format(data_format, padding)

    def __call__(self, input, filters):
        raise NotImplementedError
        output = F.conv2d_transpose(
            x=input, weight=filters, stride=self.strides, padding=self.padding, dilation=self.dilations,
            data_format=self.data_format
        )
        return output


 def conv2d_transpose(
@@ -865,7 +1047,17 @@ def conv2d_transpose(
    -------
        A Tensor with the same type as input.
    """
    pass
    data_format, padding = preprocess_2d_format(data_format, padding)
    output = F.conv2d_transpose(
        x=input,
        weight=filters,
        output_size=output_shape,
        stride=strides,
        padding=padding,
        dilation=dilations,
        data_format=data_format,
    )
    return output


 class Conv3d_transpose(object):
@@ -876,12 +1068,14 @@ class Conv3d_transpose(object):
    ):
        self.strides = strides
        self.dilations = dilations
        self.name = name
        self.out_channel = out_channel
        self.data_format, self.padding = preprocess_3d_format(data_format, padding)

    def __call__(self, input, filters):
        raise NotImplementedError

        output = F.conv3d_transpose(
            x=input, weight=filters, stride=self.strides, padding=self.padding, dilation=self.dilations,
            data_format=self.data_format
        )


 def conv3d_transpose(
@@ -915,17 +1109,63 @@ def conv3d_transpose(
    -------
        A Tensor with the same type as value.
    """

    pass
    data_format, padding = preprocess_3d_format(data_format, padding)
    output = F.conv3d_transpose(
        x=input,
        weight=filters,
        output_size=output_shape,
        stride=strides,
        padding=padding,
        dilation=dilations,
        data_format=data_format,
    )
    return output


 class BatchNorm(object):

    def __init__(self):
        pass
    def __init__(
        self, decay=0.9, epsilon=0.00001, beta=None, gamma=None, moving_mean=None, moving_var=None, num_features=None,
        data_format='channels_last', is_train=False
    ):
        self.decay = decay
        self.epsilon = epsilon
        self.data_format = data_format
        self.beta = beta
        self.gamma = gamma
        self.moving_mean = moving_mean
        self.moving_var = moving_var
        self.num_features = num_features
        self.is_train = is_train
        self.axes = None

    def __call__(self, *args, **kwargs):
        raise NotImplementedError
    def __call__(self, inputs):
        data_format = self.channel_format(inputs)
        outputs = pd.nn.functional.batch_norm(
            inputs, self.moving_mean, self.moving_var, weight=self.gamma, bias=self.beta, training=self.is_train,
            momentum=self.decay, epsilon=self.epsilon, data_format=data_format
        )
        return outputs

    def channel_format(self, inputs):
        """ return "NC", "NCL", "NCHW", "NCDHW", "NLC", "NHWC" or "NDHWC". """
        len_in_shape = len(inputs.shape)
        if len_in_shape == 2:
            return 'NC'
        if self.data_format == 'channels_last':
            if len_in_shape == 3:
                return 'NLC'
            if len_in_shape == 4:
                return 'NHWC'
            if len_in_shape == 5:
                return 'NDHWC'
        if self.data_format == 'channels_first':
            if len_in_shape == 3:
                return 'NCL'
            if len_in_shape == 4:
                return 'NCHW'
            if len_in_shape == 5:
                return 'NCDHW'


 class GroupConv2D(object):
@@ -958,58 +1198,98 @@ class SeparableConv2D(object):
 class AdaptiveMeanPool1D(object):

    def __init__(self, output_size, data_format):
        pass
        self.data_format, _ = preprocess_1d_format(data_format, None)
        self.output_size = output_size

    def __call__(self, input):

        raise NotImplementedError
        if self.data_format == 'NLC':
            input = nhwc_to_nchw(input)

        output = F.adaptive_avg_pool1d(input, self.output_size)

        if self.data_format == 'NLC':
            output = nchw_to_nhwc(output)

        return output


 class AdaptiveMeanPool2D(object):

    def __init__(self, output_size, data_format):
        pass
        self.data_format, _ = preprocess_2d_format(data_format, None)
        self.output_size = output_size

    def __call__(self, inputs):

        raise NotImplementedError
        return F.adaptive_avg_pool2d(inputs, output_size=self.output_size, data_format=self.data_format)


 class AdaptiveMeanPool3D(object):

    def __init__(self, output_size, data_format):
        pass
        self.data_format, _ = preprocess_3d_format(data_format, None)
        self.output_size = output_size

    def __call__(self, inputs):
        raise NotImplementedError

        return F.adaptive_avg_pool3d(inputs, output_size=self.output_size, data_format=self.data_format)


 class AdaptiveMaxPool1D(object):

    def __init__(self, output_size, data_format):
        pass

        self.data_format, _ = preprocess_1d_format(data_format, None)
        self.output_size = output_size

    def __call__(self, input):

        raise NotImplementedError
        if self.data_format == 'NLC':
            input = nhwc_to_nchw(input)

        output = F.adaptive_max_pool1d(input, self.output_size)

        if self.data_format == 'NLC':
            output = nchw_to_nhwc(output)

        return output


 class AdaptiveMaxPool2D(object):

    def __init__(self, output_size, data_format):
        pass
        self.data_format, _ = preprocess_2d_format(data_format, None)
        self.output_size = output_size

    def __call__(self, inputs):
        raise NotImplementedError
        if self.data_format == 'NHWC':
            inputs = nhwc_to_nchw(inputs)

        output = F.adaptive_max_pool2d(inputs, self.output_size)

        if self.data_format == 'NHWC':
            output = nchw_to_nhwc(output)

        return output


 class AdaptiveMaxPool3D(object):

    def __init__(self, output_size, data_format):
        pass
        self.data_format, _ = preprocess_3d_format(data_format, None)
        self.output_size = output_size

    def __call__(self, inputs):
        raise NotImplementedError
        if self.data_format == 'NDHWC':
            inputs = nhwc_to_nchw(inputs)

        output = F.adaptive_max_pool3d(inputs, self.output_size)

        if self.data_format == 'NDHWC':
            output = nchw_to_nhwc(output)

        return output


 class BinaryConv2D(object):
--- a/tensorlayer/backend/ops/tensorflow_backend.py
+++ b/tensorlayer/backend/ops/tensorflow_backend.py
@@ -414,6 +414,10 @@ def convert_to_tensor(value, dtype=None):
    return tf.convert_to_tensor(value, dtype)


 def convert_to_numpy(value):
    return value.numpy()


 def sqrt(x):
    """
    Computes square root of x element-wise.
@@ -845,6 +849,12 @@ def split(value, num_or_size_splits, axis=0, num=None):
    return tf.split(value=value, num_or_size_splits=num_or_size_splits, axis=axis, num=num)


 class Floor(object):

    def __call__(self, x):
        return tf.floor(x)


 def floor(x):
    return tf.floor(x)

@@ -917,7 +927,7 @@ class NCELoss(object):
        return outputs


 class Not_equal(object):
 class NotEqual(object):

    def __init__(self):
        pass
@@ -926,7 +936,7 @@ class Not_equal(object):
        return tf.not_equal(x, y)


 class Count_nonzero(object):
 class CountNonzero(object):

    def __init__(self, keepdims=None, dtype=int64):
        self.keepdims = keepdims
@@ -997,6 +1007,12 @@ class Sign(object):
        return tf.sign(x)


 class Ceil(object):

    def __call__(self, x):
        return tf.math.ceil(x)


 def ceil(x):
    return tf.math.ceil(x)

--- a/tensorlayer/backend/ops/tensorflow_nn.py
+++ b/tensorlayer/backend/ops/tensorflow_nn.py
@@ -652,6 +652,20 @@ def moments(x, axes, shift=None, keepdims=False):
    return outputs


 class MaxPool1d(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_1d_format(data_format=data_format, padding=padding)
        self.ksize = ksize
        self.strides = strides

    def __call__(self, inputs):
        outputs = tf.nn.max_pool(
            input=inputs, ksize=self.ksize, strides=self.strides, padding=self.padding, data_format=self.data_format
        )
        return outputs


 class MaxPool(object):

    def __init__(self, ksize, strides, padding, data_format=None):
@@ -711,6 +725,25 @@ def max_pool(input, ksize, strides, padding, data_format=None):
    return outputs


 class AvgPool1d(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_1d_format(data_format=data_format, padding=padding)
        self.ksize = ksize
        self.strides = strides

    def __call__(self, inputs):
        outputs = tf.nn.pool(
            input=inputs,
            window_shape=self.ksize,
            pooling_type="AVG",
            strides=self.strides,
            padding=self.padding,
            data_format=self.data_format,
        )
        return outputs


 class AvgPool(object):

    def __init__(self, ksize, strides, padding, data_format=None):
@@ -762,6 +795,24 @@ def avg_pool(input, ksize, strides, padding):
    return outputs


 class MaxPool3d(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_3d_format(data_format, padding)
        self.ksize = ksize
        self.strides = strides

    def __call__(self, inputs):
        outputs = tf.nn.max_pool3d(
            input=inputs,
            ksize=self.ksize,
            strides=self.strides,
            padding=self.padding,
            data_format=self.data_format,
        )
        return outputs


 def max_pool3d(input, ksize, strides, padding, data_format=None):
    """
    Performs the max pooling on the input.
@@ -801,6 +852,24 @@ def max_pool3d(input, ksize, strides, padding, data_format=None):
    return outputs


 class AvgPool3d(object):

    def __init__(self, ksize, strides, padding, data_format=None):
        self.data_format, self.padding = preprocess_3d_format(data_format, padding)
        self.ksize = ksize
        self.strides = strides

    def __call__(self, inputs):
        outputs = tf.nn.avg_pool3d(
            input=inputs,
            ksize=self.ksize,
            strides=self.strides,
            padding=self.padding,
            data_format=self.data_format,
        )
        return outputs


 def avg_pool3d(input, ksize, strides, padding, data_format=None):
    """
    Performs the average pooling on the input.
@@ -953,9 +1022,9 @@ def depthwise_conv2d(input, filter, strides, padding, data_format=None, dilation
 class Conv1d_transpose(object):

    def __init__(
        self, strides, padding, data_format='NWC', dilations=None, out_channel=None, k_size=None, in_channels=None
        self, stride, padding, data_format='NWC', dilations=None, out_channel=None, k_size=None, in_channels=None
    ):
        self.strides = strides
        self.stride = stride
        self.dilations = dilations
        self.data_format, self.padding = preprocess_1d_format(data_format, padding)

@@ -973,10 +1042,10 @@ class Conv1d_transpose(object):
        output_channels = filters_shape[1]
        dilations_w = 1

        if isinstance(self.strides, int):
            strides_w = self.strides
        if isinstance(self.stride, int):
            strides_w = self.stride
        else:
            strides_list = list(self.strides)
            strides_list = list(self.stride)
            strides_w = strides_list[w_axis]

        if self.dilations is not None:
@@ -1002,7 +1071,7 @@ class Conv1d_transpose(object):
            input=input,
            filters=filters,
            output_shape=output_shape,
            strides=self.strides,
            strides=self.stride,
            padding=self.padding,
            data_format=self.data_format,
            dilations=self.dilations,
@@ -1089,10 +1158,10 @@ class Conv2d_transpose(object):
            strides_w = self.strides
        else:
            strides_list = list(self.strides)
            if len(strides_list) != 4:
            if len(strides_list) == 2:
                strides_h = strides_list[0]
                strides_w = strides_list[1]
            else:
            elif len(strides_list) == 4:
                strides_h = strides_list[h_axis]
                strides_w = strides_list[w_axis]

@@ -1102,10 +1171,10 @@ class Conv2d_transpose(object):
                dilations_w = self.dilations
            else:
                dilations_list = list(self.dilations)
                if len(dilations_list) != 4:
                if len(dilations_list) == 2:
                    dilations_h = dilations_list[0]
                    dilations_w = dilations_list[1]
                else:
                elif len(dilations_list) == 4:
                    dilations_h = dilations_list[h_axis]
                    dilations_w = dilations_list[w_axis]

@@ -1212,12 +1281,12 @@ class Conv3d_transpose(object):
            strides_d, strides_h, strides_w = self.strides
        else:
            strides_list = list(self.strides)
            if len(strides_list) != 5:
            if len(strides_list) == 3:
                strides_d, strides_h, strides_w = \
                    strides_list[0], \
                    strides_list[1], \
                    strides_list[2]
            else:
            elif len(strides_list) == 5:
                strides_d, strides_h, strides_w = \
                    strides_list[d_axis], \
                    strides_list[h_axis], \
@@ -1228,12 +1297,12 @@ class Conv3d_transpose(object):
                dilations_d, dilations_h, dilations_w = self.dilations
            else:
                dilations_list = list(self.dilations)
                if len(dilations_list) != 5:
                if len(dilations_list) == 3:
                    dilations_d, dilations_h, dilations_w = \
                        dilations_list[0], \
                        dilations_list[1], \
                        dilations_list[2]
                else:
                elif len(dilations_list) == 5:
                    dilations_d, dilations_h, dilations_w = \
                        dilations_list[d_axis],\
                        dilations_list[h_axis], \
--- a/tensorlayer/cost/init.py
+++ b/tensorlayer/cost/init.py
@@ -7,8 +7,6 @@ if BACKEND == 'tensorflow':
    from .tensorflow_cost import *
 elif BACKEND == 'mindspore':
    from .mindspore_cost import *
 elif BACKEND == 'dragon':
    pass
 elif BACKEND == 'paddle':
    from .paddle_cost import *
 else:
--- a/tensorlayer/cost/mindspore_cost.py
+++ b/tensorlayer/cost/mindspore_cost.py
@@ -6,7 +6,7 @@ from mindspore.nn import Cell
 import mindspore.ops as P

 __all__ = [
    'cross_entropy',
    'softmax_cross_entropy_with_logits',
    'sigmoid_cross_entropy',
    'binary_cross_entropy',
    'mean_squared_error',
@@ -25,24 +25,9 @@ __all__ = [
    'maxnorm_i_regularizer',
 ]

 cross_entropy = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
 softmax_cross_entropy_with_logits = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')


 def sigmoid_cross_entropy(output, target, name=None):
    """Sigmoid cross-entropy operation, see ``tf.ops.sigmoid_cross_entropy_with_logits``.

    Parameters
    ----------
    output : Tensor
        A batch of distribution with shape: [batch_size, num of classes].
    target : Tensor
        A batch of index with shape: [batch_size, ].
    name : string
        Name of this loss.

    """
    outputs = P.ReduceMean(cross_entropy(output, target))
    return outputs
 sigmoid_cross_entropy = P.SigmoidCrossEntropyWithLogits()


 def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'):
@@ -73,40 +58,7 @@ def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'):
    raise NotImplementedError("Not Implemented.")


 def mean_squared_error(output, target, is_mean=False, axis=-1, name="mean_squared_error"):
    """Return the TensorFlow expression of mean-square-error (L2) of two batch of data.

    Parameters
    ----------
    output : Tensor
        2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel].
    target : Tensor
        The target distribution, format the same with `output`.
    is_mean : boolean
        Whether compute the mean or sum for each example.
            - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data.
            - If False, use ``tf.reduce_sum`` (default).
    axis : int or list of int
        The dimensions to reduce.
    name : str
        An optional name to attach to this function.

    References
    ------------
    - `Wiki Mean Squared Error <https://en.wikipedia.org/wiki/Mean_squared_error>`__

    """
    # with tf.name_scope(name):
    # if len(output.shape) == 2:  # [batch_size, n_feature]
    #     axis = 1
    # elif len(output.shape) == 3:  # [batch_size, w, h]
    #     axis = [1, 2]
    # elif len(output.shape) == 4:  # [batch_size, w, h, c]
    #     axis = [1, 2, 3]
    # else:
    #     raise Exception("Unknow dimension")

    return nn.MSELoss()(output, target)
 mean_squared_error = nn.MSELoss()


 def normalized_mean_square_error(output, target, axis=-1, name="normalized_mean_squared_error_loss"):
--- a/tensorlayer/cost/paddle_cost.py
+++ b/tensorlayer/cost/paddle_cost.py
@@ -5,7 +5,7 @@ import paddle.nn.functional as F
 import paddle as pd

 __all__ = [
    'cross_entropy',
    'softmax_cross_entropy_with_logits',
    'sigmoid_cross_entropy',
    'binary_cross_entropy',
    'mean_squared_error',
@@ -24,7 +24,8 @@ __all__ = [
    'maxnorm_i_regularizer',
 ]

 def cross_entropy(output, target):

 def softmax_cross_entropy_with_logits(output, target):
    """Softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy for two distributions,
    it implements softmax internally. See ``tf.ops.sparse_softmax_cross_entropy_with_logits``.

@@ -40,7 +41,7 @@ def cross_entropy(output, target):
    Examples
    --------
    >>> import tensorlayer as tl
    >>> ce = tl.cost.cross_entropy(y_logits, y_target_logits)
    >>> ce = tl.cost.softmax_cross_entropy_with_logits(y_logits, y_target_logits)

    References
    -----------
@@ -70,8 +71,8 @@ def sigmoid_cross_entropy(output, target):
        pass
    else:
        depth = output.shape[-1]
        label = pd.fluid.layers.one_hot(target, depth=depth)
    out = pd.fluid.layers.sigmoid_cross_entropy_with_logits(x=output, label=label)
        target = pd.fluid.layers.one_hot(target, depth=depth)
    out = pd.fluid.layers.sigmoid_cross_entropy_with_logits(x=output, label=target)
    out = pd.fluid.layers.reduce_mean(out)
    return out

@@ -102,8 +103,8 @@ def binary_cross_entropy(output, target, epsilon=1e-8):
        depth = output.shape[-1]
        target = pd.fluid.layers.one_hot(target, depth=depth)
    out = pd.fluid.layers.reduce_sum(
            -(target * pd.log(output + epsilon) + (1. - target) * pd.log(1. - output + epsilon))
        )
        -(target * pd.log(output + epsilon) + (1. - target) * pd.log(1. - output + epsilon))
    )
    return out


@@ -192,7 +193,6 @@ def absolute_difference_error(output, target, is_mean=False, axis=-1, name="abso

    """


    if is_mean:
        loss = pd.fluid.layers.reduce_mean(pd.fluid.layers.reduce_mean(pd.abs(output - target), axis))
    else:
@@ -600,4 +600,4 @@ def huber_loss(

    """

    raise NotImplementedError("Not Implemented.")
    raise NotImplementedError("Not Implemented.")
--- a/tensorlayer/cost/tensorflow_cost.py
+++ b/tensorlayer/cost/tensorflow_cost.py
@@ -10,7 +10,7 @@ from tensorflow.python.ops import array_ops, math_ops, nn_ops, standard_ops
 from tensorlayer import logging

 __all__ = [
    'cross_entropy',
    'softmax_cross_entropy_with_logits',
    'sigmoid_cross_entropy',
    'binary_cross_entropy',
    'mean_squared_error',
@@ -30,7 +30,7 @@ __all__ = [
 ]


 def cross_entropy(output, target, name=None):
 def softmax_cross_entropy_with_logits(output, target, name=None):
    """Softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy for two distributions,
    it implements softmax internally. See ``tf.ops.sparse_softmax_cross_entropy_with_logits``.

@@ -46,7 +46,7 @@ def cross_entropy(output, target, name=None):
    Examples
    --------
    >>> import tensorlayer as tl
    >>> ce = tl.cost.cross_entropy(y_logits, y_target_logits, 'my_loss')
    >>> ce = tl.cost.softmax_cross_entropy_with_logits(y_logits, y_target_logits, 'my_loss')

    References
    -----------
@@ -236,7 +236,7 @@ def dice_coe(output, target, loss_type='jaccard', axis=(1, 2, 3), smooth=1e-5):
    Examples
    ---------
    >>> import tensorlayer as tl
    >>> outputs = tl.act.pixel_wise_softmax(outputs)
    >>> outputs = tl.ops.softmax(outputs)
    >>> dice_loss = 1 - tl.cost.dice_coe(outputs, y_)

    References
@@ -492,20 +492,21 @@ def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=
    >>> vocab_size = 10000
    >>> embedding_size = 256
    >>> ni = tl.layers.Input([batch_size, None], dtype=tf.int64)
    >>> net = tl.layers.Embedding(
    >>> net_lits = []
    >>> net_list.append(tl.layers.Embedding(
    ...         vocabulary_size = vocab_size,
    ...         embedding_size = embedding_size,
    ...         name = 'seq_embedding')(ni)
    >>> net = tl.layers.RNN(
    ...         name = 'seq_embedding'))
    >>> net_list.append(tl.layers.RNN(
    ...         cell =tf.keras.layers.LSTMCell(units=embedding_size, dropout=0.1),
    ...         return_seq_2d = True,
    ...         name = 'dynamicrnn')(net)
    >>> net = tl.layers.Dense(n_units=vocab_size, name="output")(net)
    >>> model = tl.models.Model(inputs=ni, outputs=net)
    ...         name = 'dynamicrnn'))
    >>> net_list.append(tl.layers.Dense(n_units=vocab_size, name="output"))
    >>> model = tl.layers.SequentialLayer(net_list)
    >>> input_seqs = np.random.randint(0, 10, size=(batch_size, 10), dtype=np.int64)
    >>> target_seqs = np.random.randint(0, 10, size=(batch_size, 10), dtype=np.int64)
    >>> input_mask = np.random.randint(0, 2, size=(batch_size, 10), dtype=np.int64)
    >>> outputs = model(input_seqs, is_train=True)
    >>> outputs = model(input_seqs)
    >>> loss = tl.cost.cross_entropy_seq_with_mask(outputs, target_seqs, input_mask)

    """
--- a/tensorlayer/dataflow/init.py
+++ b/tensorlayer/dataflow/init.py
@@ -3,7 +3,6 @@
 from __future__ import absolute_import, division, print_function

 from tensorlayer.backend.ops.load_backend import BACKEND
 from tensorlayer.dataflow import image

 if BACKEND == 'tensorflow':
    from .tensorflow_data import *
--- a/tensorlayer/dataflow/dataflow_examples.py
+++ b/tensorlayer/dataflow/dataflow_examples.py
@@ -1,56 +0,0 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 import tensorlayer as tl
 from tensorlayer.dataflow import Dataset
 import numpy as np

 X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)


 def generator_train():
    inputs = X_train
    targets = y_train
    if len(inputs) != len(targets):
        raise AssertionError("The length of inputs and targets should be equal")
    for _input, _target in zip(inputs, targets):
        # yield _input.encode('utf-8'), _target.encode('utf-8')
        yield (_input, np.array(_target))


 batch_size = 128
 shuffle_buffer_size = 128
 n_epoch = 10

 import tensorflow as tf


 def _map_fn_train(img, target):
    # 1. Randomly crop a [height, width] section of the image.
    img = tf.image.random_crop(img, [24, 24, 3])
    # 2. Randomly flip the image horizontally.
    img = tf.image.random_flip_left_right(img)
    # 3. Randomly change brightness.
    img = tf.image.random_brightness(img, max_delta=63)
    # 4. Randomly change contrast.
    img = tf.image.random_contrast(img, lower=0.2, upper=1.8)
    # 5. Subtract off the mean and divide by the variance of the pixels.
    img = tf.image.per_image_standardization(img)
    target = tf.reshape(target, ())
    return img, target


 import multiprocessing
 train_ds = Dataset.from_generator(
    generator=generator_train, output_types=(tl.float32, tl.int32)
 )  # , output_shapes=((24, 24, 3), (1)))

 train_ds = train_ds.map(_map_fn_train, num_parallel_calls=multiprocessing.cpu_count())

 train_ds = train_ds.repeat(n_epoch)
 train_ds = train_ds.shuffle(shuffle_buffer_size)
 train_ds = train_ds.prefetch(buffer_size=4096)
 train_ds = train_ds.batch(batch_size)

 for X_batch, y_batch in train_ds:
    print(X_batch.shape, y_batch.shape)
--- a/tensorlayer/dataflow/image/mindspore_image.py
+++ b/tensorlayer/dataflow/image/mindspore_image.py
--- a/tensorlayer/dataflow/image/paddle_image.py
+++ b/tensorlayer/dataflow/image/paddle_image.py
@@ -1,19 +0,0 @@
 import paddle
 import numpy as np
 from PIL import Image
 from paddle.vision.transforms import functional as F

 __all_ = [
    'Standardization',
 ]


 def Standardization(img, mean, std, data_format='HWC'):

    if data_format == 'CHW':
        mean = paddle.to_tensor(mean).reshape([-1, 1, 1])
        std = paddle.to_tensor(std).reshape([-1, 1, 1])
    else:
        mean = paddle.to_tensor(mean)
        std = paddle.to_tensor(std)
    return (img - mean) / std
--- a/tensorlayer/dataflow/image/tensorflow_image.py
+++ b/tensorlayer/dataflow/image/tensorflow_image.py
@@ -1,760 +0,0 @@
 import tensorflow as tf
 import numpy as np
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.framework import ops
 from tensorflow.python.ops.image_ops_impl import _AssertAtLeast3DImage
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops.image_ops_impl import convert_image_dtype
 import numbers

 __all__ = [
    'CentralCrop',
    'HsvToRgb',
    'AdjustBrightness',
    'AdjustContrast',
    'AdjustHue',
    'AdjustSaturation',
    'Crop',
    'FlipHorizontal',
    'FlipVertical',
    'GrayToRgb',
    'Standardization',
    'RgbToGray',
    'PadToBoundingbox',
    'Pad',
    'RandomBrightness',
    'RandomContrast',
    'RandomHue',
    'RandomSaturation',
    'RandomCrop',
    'Resize',
    'CropAndResize',
    'CropOrPad',
    'ResizeAndPad',
    'RgbToHsv',
    'Transpose',
    'RandomRotation',
    'RandomShift',
    'RandomShear',
    'RandomZoom',
    'Rescale',
    'RandomFlipVertical',
    'RandomFlipHorizontal',
    'HWC2CHW',
    'CHW2HWC',
 ]


 def CentralCrop(image, central_fraction=None, size=None):
    '''

 	Parameters
 	----------
 	image :
 		input Either a 3-D float Tensor of shape [height, width, depth],
 		or a 4-D Tensor of shape [batch_size, height, width, depth].
 	central_fraction :
 		float (0, 1], fraction of size to crop
 	size:
 		size (Union[int, sequence]) – The output size of the cropped image. If size is an integer, a square crop of size (size, size) is returned.
 		If size is a sequence of length 2, it should be (height, width).
 	Returns :
 		3-D / 4-D float Tensor, as per the input.
 	-------
 	If backend is tensorflow, central_fraction will be used preferentially. if size is used,the height-width ratio will be equivalent to original ratio..
 	If backend is mindspore, size will be used preferentially.
 	'''
    if size is None and central_fraction is None:
        raise ValueError('central_fraction and size can not be both None')

    if central_fraction is None:
        outshape = np.shape(image)
        if len(outshape) == 3:
            h_axis = 0
            w_axis = 1
        elif len(outshape) == 4:
            h_axis = 1
            w_axis = 2

        if isinstance(size, numbers.Number):
            target_height = size
            target_width = size
        elif isinstance(size, tuple) or isinstance(size, list):
            if len(size) == 2:
                target_height = size[0]
                target_width = size[1]
            else:
                raise ValueError('The length of size must be 2')
        else:
            raise ValueError("Size should be a single integer or a list/tuple (h, w) of length 2.")
        if target_height > outshape[h_axis] or target_width > outshape[w_axis]:
            raise ValueError("Centralcrop image size must < original image size.")
        central_fraction = max(target_height / outshape[h_axis], target_width / outshape[w_axis])
    else:
        if central_fraction > 1 or central_fraction <= 0:
            raise ValueError('central_fraction must be in (0,1].')

    return tf.image.central_crop(image, central_fraction)


 def HsvToRgb(image):

    return tf.image.hsv_to_rgb(image)


 def AdjustBrightness(image, factor):

    return tf.image.adjust_brightness(image, delta=factor)


 def AdjustContrast(image, factor):

    return tf.image.adjust_contrast(image, contrast_factor=factor)


 def AdjustHue(image, factor):

    return tf.image.adjust_hue(image, delta=factor)


 def AdjustSaturation(image, factor):

    return tf.image.adjust_saturation(image, saturation_factor=factor)


 def Crop(image, offset_height, offset_width, target_height, target_width, is_hwc=True):
    '''

 	Parameters
 	----------
 	image:
 		A image or  a batch of images
 	offset_height:
 		Vertical coordinate of the top-left corner of the result in the input.
 	offset_width:
 		Horizontal coordinate of the top-left corner of the result in the input.
 	target_height:
 		Height of the result.
 	target_width:
 		Width of the result.

 	Returns:
 		Output [batch, target_height, target_width, channels] or [target_height, target_width, channels]
 	-------
 	'''

    return tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width)


 def FlipHorizontal(image):

    return tf.image.flip_left_right(image)


 def FlipVertical(image):

    return tf.image.flip_up_down(image)


 def GrayToRgb(image):

    return tf.image.grayscale_to_rgb(image)


 def RgbToGray(image):

    return tf.image.rgb_to_grayscale(image)


 def PadToBoundingbox(image, offset_height, offset_width, target_height, target_width, padding_value=0, is_hwc=True):

    return tf.image.pad_to_bounding_box(
        image,
        offset_height,
        offset_width,
        target_height,
        target_width,
    )


 def Pad(image, padding, padding_value=0, mode='constant'):
    '''

    Parameters
    ----------
    image:
        A 3-D or 4-D Tensor.
    padding:
        An integer or a list/tuple.  If a single number is provided, pad all borders with this value.
        If a tuple or list of 2 values is provided, pad the left and top with the first value and the right and bottom with the second value.
        If 4 values are provided as a list or tuple, pad the  (top, bottom, left, right)  respectively.
    padding_value:
        In "CONSTANT" mode, the scalar pad value to use. Must be same type as tensor.
    mode:
        One of "CONSTANT", "REFLECT", or "SYMMETRIC" (case-insensitive)
    Returns:
        A padded Tensor. Has the same type as tensor.
    -------

    '''
    image_shape = image.shape
    if len(image_shape) == 3:
        batch_size = 0
    elif len(image_shape) == 4:
        batch_size = image_shape[0]
    else:
        raise TypeError('Image must  be a 3-D tensor or 4-D tensor.')

    if isinstance(padding, int):
        padding = ((padding, padding), (padding, padding))
    elif isinstance(padding, list) or isinstance(padding, tuple):
        if len(padding) == 2:
            padding = ((padding[0], padding[0]), (padding[1], padding[1]))
        elif len(padding) == 4:
            padding = ((padding[0], padding[1]), (padding[2], padding[3]))
        else:
            raise ValueError('The length of padding should be 2 or 4, but got {}.'.format(len(padding)))
    else:
        raise TypeError('Padding should be an integer or a list/tuple, but got {}.'.format(type(padding)))
    if batch_size == 0:
        padding = (padding[0], padding[1], (0, 0))
    else:
        padding = ((0, 0), padding[0], padding[1], (0, 0))

    return tf.pad(image, padding, mode=mode, constant_values=padding_value)


 def Standardization(image, mean=None, std=None, channel_mode=False):
    '''

 	Parameters
 	----------
 	image:
 		An n-D Tensor with at least 3 dimensions, the last 3 of which are the dimensions of each image.
 	mean:
 		List or tuple of mean values for each channel, with respect to channel order.
 	std:
 		 List or tuple of standard deviations for each channel.
 	channel_mode:
 		Decide to implement standardization on whole image or each channel of image.
 	Returns:
 		A Tensor with the same shape and dtype as image.
 	-------
 	'''
    image = tf.cast(image, tf.float32)
    with ops.name_scope(None, 'Standardization', [image]) as scope:
        image = ops.convert_to_tensor(image, name='image')
        image = _AssertAtLeast3DImage(image)

    orig_dtype = image.dtype
    if orig_dtype not in [dtypes.float16, dtypes.float32]:
        image = convert_image_dtype(image, dtypes.float32)

    if mean is not None and std is not None:
        mean = np.array(mean, dtype=np.float32)
        std = np.array(std, dtype=np.float32)
        image -= mean
        image = math_ops.divide(image, std, name=scope)
        return convert_image_dtype(image, orig_dtype, saturate=True)

    elif mean is None and std is None:
        if channel_mode:
            num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:-1])
            #`num_pixels` is the number of elements in each channels of 'image'
            image_mean = math_ops.reduce_mean(image, axis=[-2, -3], keepdims=True)
            # `image_mean` is the mean of elements in each channels of 'image'

            stddev = math_ops.reduce_std(image, axis=[-2, -3], keepdims=True)
            min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, image.dtype))
            adjusted_sttdev = math_ops.maximum(stddev, min_stddev)

            image -= image_mean
            image = math_ops.divide(image, adjusted_sttdev, name=scope)
            return convert_image_dtype(image, orig_dtype, saturate=True)

        else:
            num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:])
            #`num_pixels` is the number of elements in `image`
            image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True)

            # Apply a minimum normalization that protects us against uniform images.
            stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True)
            min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, image.dtype))
            adjusted_stddev = math_ops.maximum(stddev, min_stddev)

            image -= image_mean
            image = math_ops.divide(image, adjusted_stddev, name=scope)
            return convert_image_dtype(image, orig_dtype, saturate=True)
    else:
        raise ValueError('std and mean must both be None or not None')


 def RandomBrightness(image, factor):
    '''

    Parameters
    ----------
    image:
        An image or images to adjust
    factor:
        Float, must be non-negative. Factor must be (0,1). Random range will be [-factor, factor).
    Returns:
        The brightness-adjusted image(s).
    -------

    '''

    return tf.image.random_brightness(image, factor)


 def RandomContrast(image, lower, upper, seed=None):
    '''

    Parameters
    ----------
    image:
        An image tensor with 3 or more dimensions.
    lower:
        float.  Lower bound for the random contrast factor.
    upper:
        float.  Upper bound for the random contrast factor.
    seed:
        A Python integer. Used to create a random seed.

    Returns:
         The contrast-adjusted image(s).
    -------
    '''

    return tf.image.random_contrast(image, lower, upper, seed)


 def RandomHue(image, factor, seed=None):
    '''

    Parameters
    ----------
    image:
        RGB image or images. The size of the last dimension must be 3.
    factor:
        float. The maximum value for the random factor.
    seed:
         An operation-specific seed.

    Returns:
        Adjusted image(s), same shape and DType as `image`.
    -------

    '''

    return tf.image.random_hue(image, factor, seed)


 def RandomSaturation(image, lower, upper, seed=None):
    '''
    Parameters
    ----------
    image:
        RGB image or images. The size of the last dimension must be 3.
    lower:
        float.  Lower bound for the random saturation factor.
    upper:
        float.  Upper bound for the random saturation factor.
    seed:
        An operation-specific seed.

    Returns:
        Adjusted image(s), same shape and DType as `image`.
    -------
    '''

    return tf.image.random_saturation(image, lower, upper, seed)


 def RandomCrop(image, size):
    '''

    Parameters
    ----------
    image:
        Input an image  to crop.
    size:
        a list or tuple. if size is an integer, shape of cropped image  will be [size, size, 3]. if length of size is 2.
        shape of cropped image  will be [height, width, 3].
    Returns:
        A cropped image of the same rank as image and shape size.
    -------
    '''

    if isinstance(size, int):
        crop_size = (size, size)
    elif isinstance(size, (list, tuple)) and len(size) == 2:
        crop_size = (size[0], size[1])
    else:
        raise ValueError("Size should be a single integer or a list/tuple (h, w) of length 2.")

    if len(image.shape) == 3:
        h, w, c = image.shape
        crop_size = crop_size + (c, )
    elif len(image.shape) == 4:
        b, h, w, c = image.shape
        crop_size = (b, ) + crop_size + (c, )

    return tf.image.random_crop(image, size=crop_size)


 def Resize(image, size, method='bilinear', preserve_aspect_ratio=False, antialias=False):
    '''

    Parameters
    ----------
    images:
        Input an image to resize
    size:
        if size is an integer, shape of resized image  will be [size, size, 3]. if length of size is 2.
        shape of resized image  will be [height, width, 3].
    method:
        An image.ResizeMethod, or string equivalent shoulid be in
        (bilinear, lanczos3, lanczos5, bicubic, gaussian, nearest, area, mitchellcubic).
        Defaults to bilinear.
    preserve_aspect_ratio:
        Whether to preserve the aspect ratio.
    antialias:
        Whether to use an anti-aliasing filter when downsampling an image.
    Returns:
        an resized image
    -------

    '''
    if isinstance(size, int):
        size = [size, size]
    elif len(size) != 2:
        raise ValueError('Size should be a single integer or a list/tuple (h, w) of length 2.')

    return tf.image.resize(image, size, method, preserve_aspect_ratio, antialias)


 def CropAndResize(image, boxes, box_indices, crop_size, method='bilinear', extrapolation_value=0, is_hwc=True):
    '''

    Parameters
    ----------
    image:
        A 4-D tensor of shape [batch, image_height, image_width, depth]. Both image_height and image_width need to be positive.
    boxes:
        A 2-D tensor of shape [num_boxes, 4].
    box_indices:
        A 1-D tensor of shape [num_boxes] with int32 values in [0,batch).
        The value of box_ind[i] specifies the image that the i-th box refers to.
    crop_size:
        A 1-D tensor of 2 elements, size = [crop_height, crop_width]. All cropped image patches are resized to this size.
        The aspect ratio of the image content is not preserved. Both crop_height and crop_width need to be positive.
    method:
        An optional string specifying the sampling method for resizing.
        It can be either "bilinear" or "nearest" and default to "bilinear".
    extrapolation_value:
        An optional float. Defaults to 0. Value used for extrapolation, when applicable.
    Returns:
        A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
    -------

    '''
    image_shape = image.shape
    boxes_num = 0
    if isinstance(boxes, tf.Tensor):
        boxes_num = boxes.shape[0]
    elif isinstance(boxes, np.ndarray) or isinstance(boxes, list) or isinstance(boxes, tuple):
        boxes = tf.constant(boxes)
        boxes_num = boxes.shape[0]

    if isinstance(crop_size, int):
        crop_size = (crop_size, crop_size)
        crop_size = tf.constant(crop_size)
    elif isinstance(crop_size, np.ndarray) or isinstance(crop_size, list) or isinstance(crop_size, tuple):
        crop_size = tf.constant(crop_size)

    if isinstance(box_indices, np.ndarray) or isinstance(box_indices, list) or isinstance(box_indices, tuple):
        box_indices = tf.constant(box_indices)
    # if input is an image.
    # a 3-D Tensor of shape [image_height, image_width, depth] should use 'tf.expand_dims(image, axis = 0)'
    # to convert input to a 4-D Tensor of shape [batch_size,image_height, image_width, depth]
    if len(image_shape) == 3:
        image = tf.expand_dims(image, axis=0)
        box_indices = np.zeros((boxes_num), dtype=np.int)
        box_indices = tf.constant(box_indices)

    return tf.image.crop_and_resize(
        image, boxes=boxes, box_indices=box_indices, crop_size=crop_size, method=method,
        extrapolation_value=extrapolation_value
    )


 def CropOrPad(image, target_height, target_width, is_hwc=True):
    '''
    Resizes an image to a target width and height by either centrally cropping the image or padding it evenly with zeros.
    Parameters
    ----------
    image:
        4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels].
    target_height:
        Target height.
    target_width:
        Target width.
    Returns:
        Cropped and/or padded image.
    -------
    '''

    return tf.image.resize_with_crop_or_pad(image, target_height, target_width)


 def ResizeAndPad(image, target_height, target_width, method='bilinear', antialias=False, is_hwc=True):
    '''

    Parameters
    ----------
    image:
        4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels].
    target_height:
        Target height.
    target_width:
        Target height.
    is_hwc:
         The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True).
    Returns:
        Resized and padded image. If images was 4-D, a 4-D float Tensor of shape [batch, new_height, new_width, channels].
        If images was 3-D, a 3-D float Tensor of shape [new_height, new_width, channels].
    -------

    '''

    return tf.image.resize_with_pad(image, target_height, target_width, method=method, antialias=antialias)


 def RgbToHsv(image):

    return tf.image.rgb_to_hsv(image)


 def Transpose(image, order):
    image = ops.convert_to_tensor(image)
    image = _AssertAtLeast3DImage(image)
    shape = image.get_shape()
    if shape.ndims == 3 or shape.ndims is None:
        if len(order) != 3:
            raise ValueError('if image is 3-D tensor, order should be a list/tuple with length of 3')
        return array_ops.transpose(image, order)
    elif shape.ndims == 4:
        if len(order) != 4:
            raise ValueError('if image is 4-D tensor, order should be a list/tuple with length of 4')
        return array_ops.transpose(image, order)
    else:
        raise ValueError('\'image\' must have either 3 or 4 dimensions.')


 def RandomRotation(
    image, degrees, fill_mode='nearest', fill_value=0, center=None, expand=False, is_hwc=True, interpolation_order=1
 ):
    if isinstance(image, tf.Tensor):
        image = np.asarray(image)
    if not isinstance(image, np.ndarray):
        raise TypeError('img should be NumPy image. Got {}'.format(type(image)))
    if is_hwc:
        h, w, c = 0, 1, 2
    else:
        h, w, c = 1, 2, 0
    if fill_mode not in ('constant', 'nearest', 'reflect', 'wrap'):
        raise TypeError('fill_mode must be in (constant, nearest, reflect, wrap)')

    image = tf.keras.preprocessing.image.random_rotation(
        image, degrees, h, w, c, fill_mode, fill_value, interpolation_order
    )
    return tf.convert_to_tensor(image)


 def RandomShift(image, shift, fill_mode='nearest', fill_value=0, is_hwc=True, interpolation_order=1):
    '''

    Parameters
    ----------
    image
        Input tensor. Must be 3D.
    shift:
        int or list/tuple, if shift is int, Width shift range will equal to height shift range.
        if shift is list/tuple,  shift range will be [width fraction, height fraction]
    is_hwc:
        The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True).
    fill_mode:
        Points outside the boundaries of the input are filled according to the given mode (one of {'constant', 'nearest', 'reflect', 'wrap'}).
    fill_value:
        Value used for points outside the boundaries of the input if mode='constant'.
    interpolation_order
        int, order of spline interpolation. see ndimage.interpolation.affine_transform
    Returns
        Shifted Numpy image tensor.
    -------

    '''
    if isinstance(image, tf.Tensor):
        image = np.asarray(image)
    if not isinstance(image, np.ndarray):
        raise TypeError('img should be NumPy image. Got {}'.format(type(image)))
    if isinstance(shift, numbers.Number):
        width_fraction = shift
        height_fraction = shift
    elif isinstance(shift, list) or isinstance(shift, tuple):
        if len(shift) == 2:
            width_fraction = shift[0]
            height_fraction = shift[1]
    else:
        raise ValueError('shift must be number or list/tuple of length 2')

    if is_hwc:
        h, w, c = 0, 1, 2
    else:
        h, w, c = 1, 2, 0
    if fill_mode not in ('constant', 'nearest', 'reflect', 'wrap'):
        raise TypeError('fill_mode must be in (constant, nearest, reflect, wrap)')

    image = tf.keras.preprocessing.image.random_shift(
        image, wrg=width_fraction, hrg=height_fraction, row_axis=h, col_axis=w, channel_axis=c, fill_mode=fill_mode,
        cval=fill_value, interpolation_order=interpolation_order
    )

    return tf.convert_to_tensor(image)


 def RandomShear(image, degree, fill_mode='nearest', fill_value=0, is_hwc=True, interpolation_order=1):
    '''

    Parameters
    ----------
    image
        Input tensor. Must be 3D.
    degree:
        Transformation intensity in degrees.
    is_hwc:
        The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True).
    fill_mode:
        Points outside the boundaries of the input are filled according to the given mode (one of {'constant', 'nearest', 'reflect', 'wrap'}).
    fill_value:
        Value used for points outside the boundaries of the input if mode='constant'.
    interpolation_order
        int, order of spline interpolation. see ndimage.interpolation.affine_transform
    Returns
        Shifted Numpy image tensor.
    -------

    '''
    if isinstance(image, tf.Tensor):
        image = np.asarray(image)
    if not isinstance(image, np.ndarray):
        raise TypeError('img should be NumPy image. Got {}'.format(type(image)))
    if is_hwc:
        h, w, c = 0, 1, 2
    else:
        h, w, c = 1, 2, 0

    image = tf.keras.preprocessing.image.random_shear(
        image, intensity=degree, row_axis=h, col_axis=w, channel_axis=c, fill_mode=fill_mode, cval=fill_value,
        interpolation_order=interpolation_order
    )
    return tf.convert_to_tensor(image)


 def RandomZoom(image, zoom_range, fill_mode='nearest', fill_value=0, is_hwc=True, interpolation_order=1):
    '''

    Parameters
    ----------
    image:
         Input tensor. Must be 3D.
    zoom_range:
        Tuple of floats; zoom range for width and height.
    is_hwc:
        The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True).
    fill_mode:
        Points outside the boundaries of the input are filled according to the given mode (one of {'constant', 'nearest', 'reflect', 'wrap'}).
    fill_value:
        Value used for points outside the boundaries of the input if mode='constant'.
    interpolation_order:
        int, order of spline interpolation. see ndimage.interpolation.affine_transform

    Returns
        Zoomed Numpy image tensor.
    -------

    '''
    if isinstance(image, tf.Tensor):
        image = np.asarray(image)
    if not isinstance(image, np.ndarray):
        raise TypeError('img should be NumPy image. Got {}'.format(type(image)))
    if isinstance(zoom_range, numbers.Number):
        zoom_range = (zoom_range, zoom_range)
    elif isinstance(zoom_range, list) or isinstance(zoom_range, tuple):
        if len(zoom_range) == 2:
            zoom_range = (zoom_range[0], zoom_range[1])
    else:
        raise ValueError('shift must be number or list/tuple of length 2')
    if is_hwc:
        h, w, c = 0, 1, 2
    else:
        h, w, c = 1, 2, 0

    image = tf.keras.preprocessing.image.random_zoom(
        image, zoom_range=zoom_range, row_axis=h, col_axis=w, channel_axis=c, fill_mode=fill_mode, cval=fill_value,
        interpolation_order=interpolation_order
    )
    return tf.convert_to_tensor(image)


 def Rescale(image, scale, offset=0):
    '''

    Parameters
    ----------
    image:
        3-D image or 4-D images
    scale:
        Float, the scale to apply to the inputs.
    offset:
        Float, the offset to apply to the inputs.
    Returns:
        rescaled images
    -------
    '''
    image = tf.cast(image, dtype=tf.float32)
    scale = tf.cast(scale, dtype=tf.float32)
    offset = tf.cast(offset, dtype=tf.float32)
    return image * scale + offset


 def RandomFlipVertical(image):

    return tf.image.random_flip_up_down(image)


 def RandomFlipHorizontal(image):

    return tf.image.random_flip_left_right(image)


 def HWC2CHW(image):

    if (len(image.shape) == 3):
        return Transpose(image, (2, 0, 1))
    elif (len(image.shape) == 4):
        return Transpose(image, (0, 3, 1, 2))
    else:
        raise ValueError('\'image\' must have either 3 or 4 dimensions.')


 def CHW2HWC(image):

    if (len(image.shape) == 3):
        return Transpose(image, (1, 2, 0))
    elif (len(image.shape) == 4):
        return Transpose(image, (0, 2, 3, 1))
    else:
        raise ValueError('\'image\' must have either 3 or 4 dimensions.')
--- a/tensorlayer/dataflow/mindspore_data.py
+++ b/tensorlayer/dataflow/mindspore_data.py
@@ -5,131 +5,78 @@ import mindspore.dataset as ds
 import mindspore as ms
 from enum import Enum
 __all__ = [
    'Apply',
    'Batch',
    'Concat',
    'CsvDataset',
    'Filter',
    'Flat_map',
    'FromGenerator',
    'FromSlices',
    'Map',
    'Prefetch',
    'Repeat',
    'Shuffle',
    'Skip',
    'Take',
    'TextFlieDataset',
    'TFRecordDataset',
    'Dataloader',
    'Dataset',
    'IterableDataset',
 ]


 class Shuffle(str, Enum):
    GLOBAL: str = "global"
    FILES: str = "file"
 class Dataset(object):

    def __init__(self):
        pass

 def Apply(dataset, transformation_func):
    def __getitem__(self, idx):
        raise NotImplementedError("'{}' not implement in class "\
                "{}".format('__getitem__', self.__class__.__name__))

    return dataset.apply(transformation_func)
    def __len__(self):
        raise NotImplementedError("'{}' not implement in class "\
                "{}".format('__len__', self.__class__.__name__))


 def Batch(
    dataset, batch_size, drop_remainder=False, num_parallel_workers=None, per_batch_map=None, inut_columns=None,
    output_columns=None, column_order=None, pad_info=None
 ):
 class IterableDataset(object):

    def __init__(self):
        pass

    def __iter__(self):
        raise NotImplementedError("'{}' not implement in class " \
                                  "{}".format('__iter__', self.__class__.__name__))


 def Batch(dataset, batch_size, drop_last=False):
    '''
    Combine batch_size number of consecutive rows into batches.

    Parameters
    ----------
    dataset
    batch_size
    drop_remainder
    num_parallel_workers
    per_batch_map
    inut_columns
    output_columns
    column_order
    pad_info

    drop_last
    Returns
    -------

    '''
    return dataset.batch(
        batch_size=batch_size, drop_remainder=drop_remainder, num_parallel_workers=num_parallel_workers,
        per_batch_map=per_batch_map, input_columns=inut_columns, output_columns=output_columns,
        column_order=column_order, pad_info=pad_info
    )

    return dataset.batch(batch_size=batch_size, drop_remainder=drop_last)

 def Concat(dataset_1, dataset_2):

    return dataset_1.concat(dataset_2)


 def CsvDataset(
    file_pattern, batch_size=1, column_names=None, column_defaults=None, label_name=None, select_columns=None,
    field_delim=',', use_quote_delim=True, na_value='', header=True, num_epochs=None, shuffle=Shuffle.GLOBAL,
    shuffle_buffer_size=10000, shuffle_seed=None, prefetch_buffer_size=None, num_parallel_reads=None, sloppy=False,
    num_rows_for_inference=100, compression_type=None, ignore_errors=False, numples_samples=None, num_shards=None,
    shard_id=None, cache=None
 ):
    """
        A source dataset that reads and parses comma-separated values (CSV) datasets.

     Examples:
        >>> import mindspore.dataset as dataset
        >>>
        >>> dataset_files = ["/path/to/1", "/path/to/2"] # contains 1 or multiple text files
        >>> dataset = dataset.CSVDataset(dataset_files=dataset_files, column_names=['col1', 'col2', 'col3', 'col4'])
    """
    return ds.CSVDataset(
        dataset_files=file_pattern, field_delim=field_delim, column_defaults=column_defaults, column_names=column_names,
        num_samples=numples_samples, num_parallel_workers=num_parallel_reads, shuffle=shuffle, num_shards=num_shards,
        shard_id=shard_id, cache=cache
    )

 def Concat(datasets):

 def Filter(dataset, predicate):

    return dataset.filter(predicate)


 def Flat_map(dataset, map_func):

    return dataset.flat_map(map_func)
    datasets = list(datasets)
    dataset = ds.Dataset.concat(datasets)
    return dataset


 def FromGenerator(
    generator, output_types, output_shapes=None, args=None, column_names=None, column_types=None, schema=None,
    num_samples=None, num_parallel_workers=1, shuffle=None, sampler=None, num_shards=None, shard_id=None,
    python_multiprocessing=True
 ):
 def FromGenerator(generator, output_types, column_names):

    return ds.GeneratorDataset(
        source=generator, column_names=column_names, column_types=column_types, schema=schema, num_samples=num_samples,
        num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler, num_shards=num_shards,
        shard_id=shard_id, python_multiprocessing=python_multiprocessing
    )
    output_types = list(output_types)
    column_names = list(column_names)
    return ds.GeneratorDataset(source=generator, column_names=column_names, column_types=output_types)


 def FromSlices(
    tensor, column_names=None, num_samples=None, num_parallel_workers=1, shuffle=None, sampler=None, num_shards=None,
    shard_id=None
 ):
 def FromSlices(datas, column_names):

    return ds.NumpySlicesDataset(
        data=tensor, column_names=column_names, num_samples=num_samples, num_parallel_workers=num_parallel_workers,
        shuffle=shuffle, sampler=sampler, num_shards=num_shards, shard_id=shard_id
    )
    return ds.NumpySlicesDataset(data=datas, column_names=column_names)


 def Map(
    dataset, map_func, num_parallel_calls=None, input_columns=None, output_columns=None, column_order=None,
    num_parallel_workers=None, python_multiprocessing=False, cache=None, callbacks=None
 ):
 def Map(dataset, map_func, input_columns=None):
    """ Maps map_func across the elements of this dataset.

    Parameters
@@ -144,19 +91,7 @@ def Map(
    -------

    """
    return dataset.map(
        operations=map_func, input_columns=input_columns, output_columns=output_columns, column_order=column_order,
        num_parallel_workers=num_parallel_workers, python_multiprocessing=python_multiprocessing, cache=cache,
        callbacks=callbacks
    )


 def Prefetch(dataset, buffer_size):

    batch_size = dataset.get_batch_size()
    prefetch_size = batch_size * buffer_size

    return dataset.config.set_prefetch_size(prefetch_size)
    return dataset.map(operations=map_func, input_columns=input_columns)


 def Repeat(dataset, count=None):
@@ -164,104 +99,11 @@ def Repeat(dataset, count=None):
    return dataset.repeat(count)


 def Shuffle(dataset, buffer_size, seed=None, reshuffle_each_iteration=None):

    #dataset.config.set_seed(seed)
 def Shuffle(dataset, buffer_size):

    return dataset.shuffle(buffer_size)


 def Skip(dataset, count):
    '''
    Creates a Dataset that skips count elements from this dataset.
    Parameters
    ----------
    dataset:
        A dataset
    count:
        A tf.int64 scalar tf.Tensor, representing the number of elements of this dataset that should be skipped to form the new dataset.


    Returns
    -------

    '''
    return dataset.skip(count)


 def Take(dataset, count):
    '''
    Creates a Dataset with at most count elements from this dataset.
    Parameters
    ----------
    dataset:
        A dataset
    count:
        A tf.int64 scalar tf.Tensor, representing the number of elements of this dataset that should be taken to form the new dataset.
         If count is -1, or if count is greater than the size of this dataset, the new dataset will contain all elements of this dataset.
    Returns
    -------

    '''
    return dataset.take(count)


 def TextFlieDataset(
    filenames, compression_type=None, buffer_size=None, num_parallel_reads=None, num_samples=None, shuffle=None,
    num_shards=None, shard_id=None, cache=None
 ):
    """
    A source dataset that reads and parses datasets stored on disk in text format.
    The generated dataset has one column ['text'].

        Examples:
        >>> import mindspore.dataset as dataset
        >>>
        >>> dataset_files = ["/path/to/1", "/path/to/2"] # contains 1 or multiple text files
        >>> dataset = dataset.TextFileDataset(dataset_files=dataset_files)
    """
    if shuffle is None:
        shuffle = Shuffle.GLOBAL
    return ds.TextFileDataset(
        dataset_files=filenames, num_samples=num_samples, num_parallel_workers=num_parallel_reads, shuffle=shuffle,
        num_shards=num_shards, shard_id=shard_id, cache=cache
    )


 def TFRecordDataset(
    filenames, compression_type=None, buffer_size=None, num_parallel_reads=None, schema=None, columns_list=None,
    num_samples=None, shuffle=None, num_shards=None, shard_id=None, shard_equal_rows=False, cache=None
 ):
    """
        A source dataset that reads and parses datasets stored on disk in TFData format.

 Examples:
        >>> import mindspore.dataset as dataset
        >>> import mindspore.common.dtype as mstype
        >>>
        >>> dataset_files = ["/path/to/1", "/path/to/2"] # contains 1 or multiple tf data files
        >>>
        >>> # 1) Get all rows from dataset_files with no explicit schema
        >>> # The meta-data in the first row will be used as a schema.
        >>> tfdataset = dataset.TFRecordDataset(dataset_files=dataset_files)
        >>>
        >>> # 2) Get all rows from dataset_files with user-defined schema
        >>> schema = dataset.Schema()
        >>> schema.add_column('col_1d', de_type=mindspore.int64, shape=[2])
        >>> tfdataset = dataset.TFRecordDataset(dataset_files=dataset_files, schema=schema)
        >>>
        >>> # 3) Get all rows from dataset_files with schema file "./schema.json"
        >>> tfdataset = dataset.TFRecordDataset(dataset_files=dataset_files, schema="./schema.json")
    """
    if shuffle is None:
        shuffle = Shuffle.GLOBAL
    return ds.TFRecordDataset(
        dataset_files=filenames, schema=schema, columns_list=columns_list, num_samples=num_samples,
        num_parallel_workers=num_parallel_reads, shuffle=shuffle, num_shards=num_shards, shard_id=shard_id,
        shard_equal_rows=shard_equal_rows, cache=cache
    )


 def Zip(datasets):
    '''
    Creates a Dataset by zipping together the given datasets.
@@ -273,15 +115,14 @@ def Zip(datasets):
    -------

    '''
    datasets = tuple(datasets)
    return ds.zip(datasets)


 def Dataloader(dataset, batch_size, shuffle=False, drop_last=False, prefetch=0, shuffle_buffer_size=0):
 def Dataloader(dataset, batch_size, shuffle=False, drop_last=False, shuffle_buffer_size=10000):

    if shuffle:
        dataset = Shuffle(dataset, buffer_size=shuffle_buffer_size)

    dataset = Batch(dataset, batch_size=batch_size, drop_remainder=drop_last)
    dataset = Prefetch(dataset, buffer_size=prefetch)
    dataset = Batch(dataset, batch_size=batch_size, drop_last=drop_last)

    return dataset
--- a/tensorlayer/dataflow/paddle_data.py
+++ b/tensorlayer/dataflow/paddle_data.py
@@ -3,129 +3,96 @@

 import numpy as np
 import paddle
 from paddle.io import Dataset, BatchSampler, DataLoader, IterableDataset
 from paddle.io import Dataset as dataset
 from paddle.io import IterableDataset as iterabledataset
 from paddle.io import DataLoader
 __all__ = [
    'Batch',
    'Concat',
    'FromGenerator',
    'FromSlices',
    'Map',
    # 'Shuffle',
    # 'Batch',
    'Repeat',
    'Shuffle',
    'Dataloader',
    'Dataset',
    'IterableDataset',
 ]


 def to_list(value):
    if value is None:
        return value
    if isinstance(value, (list, tuple)):
        return list(value)
    return [value]
 class Dataset(dataset):

    def __init__(self):
        pass

 class FromGenerator(Dataset):
    def __getitem__(self, idx):
        raise NotImplementedError("'{}' not implement in class "\
                "{}".format('__getitem__', self.__class__.__name__))

    def __init__(self, generator):
    def __len__(self):
        raise NotImplementedError("'{}' not implement in class "\
                "{}".format('__len__', self.__class__.__name__))

        if not callable(generator):
            raise TypeError("'generator' must be callable")
        self.generator = generator()
        self.datas = []
        self.labels = []
        for data, label in self.generator:
            self.datas.append(data)
            self.labels.append(label)

    def __getitem__(self, idx):
 class IterableDataset(iterabledataset):

        x = self.datas[idx]
        y = self.labels[idx]
    def __init__(self):
        pass

    def __iter__(self):
        raise NotImplementedError("'{}' not implement in class "\
                "{}".format('__iter__', self.__class__.__name__))

        return x, y
    def __getitem__(self, idx):
        raise RuntimeError("'{}' should not be called for IterableDataset" \
                "{}".format('__getitem__', self.__class__.__name__))

    def __len__(self):
        raise RuntimeError("'{}' should not be called for IterableDataset" \
                "{}".format('__len__', self.__class__.__name__))

        return self.datas.shape[0]

 def FromGenerator(generator, output_types=None, column_names=None):

 class FromSlices(Dataset):
    return generator

    def __init__(self, datas, transform = None):
        self.datas = datas[0]
        self.labels = datas[1]
        self.transform = transform

        if len(self.datas) != len(self.labels):
            raise ValueError('Datas and labels not have same shape of the 1st dimension.')
 def FromSlices(datas, column_names=None):

    def __getitem__(self, idx):
        data = paddle.to_tensor(self.datas[idx], dtype='float32')
        label = paddle.to_tensor(self.labels[idx], dtype='int64')
        if self.transform is not None:
            data = self.transform(data)
        return data, label
    datas = list(datas)
    return paddle.io.TensorDataset(datas)

    def __len__(self):

        return len(self.datas)
 def Concat(datasets):

    return paddle.io.ChainDataset(list(datasets))

 class Concat(IterableDataset):

    def __init__(self, datasets):
        self.datasets = list(datasets)
        assert len(self.datasets) > 0, "input datasets shoule not be empty"
        for i, dataset in enumerate(self.datasets):
            assert isinstance(dataset, IterableDataset), \
                "ChainDataset only support paddle.io.IterableDataset"
 def Zip(datasets):

    def __iter__(self):
        for dataset in self.datasets:
            for sample in dataset:
                yield sample


 class Map(Dataset):

    def __init__(self, dataset, transform):
        self.isDataset = False
        self.transform = transform
        if isinstance(dataset, Dataset):
            self.isDataset = True
            self.dataset = dataset
        elif isinstance(dataset, list) or isinstance(dataset, tuple):
            self.datas = dataset[0]
            self.labels = dataset[1]
        else:
            raise TypeError(
                " 'dataset' should be subclass instance of paddle.io.Dataset "
                "or a [data, label] list/tulpe, not a {}".format(type(dataset))
            )
    return paddle.io.ComposeDataset(list(datasets))

    def __getitem__(self, idx):
        if self.isDataset:
            x = self.dataset[idx][0]
            if not isinstance(x, np.ndarray):
                x = np.asarray(x)
            x = self.transform(x)
            y = self.dataset[idx][1]
        else:
            x = self.datas[idx]
            if not isinstance(x, np.ndarray):
                x = np.asarray(x)
            x = self.transform(x)
            y = self.labels[idx]

        return x, y

    def __len__(self):
 def Dataloader(dataset, batch_size=None, shuffle=False, drop_last=False, shuffle_buffer_size=0):

    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, return_list=True)


 def Batch(dataset, batch_size, drop_last=False):

    raise NotImplementedError('This function not implement in paddle backend.')


 def Shuffle(dataset, buffer_size, seed=None):

    raise NotImplementedError('This function not implement in paddle backend.')


 def Repeat(dataset, count=None):

        if self.isDataset:
            return len(self.dataset[0])
        else:
            return len(self.datas)
    raise NotImplementedError('This function not implement in paddle backend.')


 def Dataloader(dataset, batch_size=None, shuffle=False, drop_last=False, prefetch=0, shuffle_buffer_size=0):
 def Map(dataset, map_func, input_columns=None):

    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
    raise NotImplementedError('This function not implement in paddle backend.')
--- a/tensorlayer/dataflow/tensorflow_data.py
+++ b/tensorlayer/dataflow/tensorflow_data.py
@@ -2,265 +2,357 @@
 # -*- coding: utf-8 -*-

 import tensorflow as tf

 import tensorlayer as tl
 import numpy as np
 __all__ = [
    'Apply',
    'Batch',
    'Concat',
    'CsvDataset',
    'Filter',
    'Flat_map',
    'FromGenerator',
    'FromSlices',
    'Map',
    'Prefetch',
    'Repeat',
    'Shuffle',
    'Skip',
    'Take',
    'TextFlieDataset',
    'TFRecordDataset',
    'Zip',
    'Dataloader',
    'Dataset',
    'IterableDataset',
 ]


 def Apply(dataset, transformation_func):
    """Applies a transformation function to this dataset.
        `apply` enables chaining of custom `Dataset` transformations, which are
        represented as functions that take one `Dataset` argument and return a
        transformed `Dataset`.
        >>> dataset = tf.data.Dataset.range(100)
        >>> def dataset_fn(dataset):
        ...   return dataset.filter(lambda x: x < 5)
        >>> dataset = dataset.apply(dataset_fn)
        >>> list(dataset.as_numpy_iterator())
        [0, 1, 2, 3, 4]
        Args:
          transformation_func: A function that takes one `Dataset` argument and
            returns a `Dataset`.
        Returns:
          Dataset: The `Dataset` returned by applying `transformation_func` to this
              dataset.
        """
    return dataset.apply(transformation_func)


 def Batch(dataset, batch_size, drop_remainder=False):
    '''
 class Dataset(object):
    """An abstract class to encapsulate methods and behaviors of datasets.
    All datasets in map-style(dataset samples can be get by a given key) should be a subclass of 'tensorlayer.dataflow.Dataset'.
    ALl subclasses should implement following methods:
    :code:`__getitem__`: get sample from dataset with a given index.
    :code:`__len__`: return dataset sample number.

    Parameters
    Examples
    ----------
    dataset
    batch_size
    drop_remainder
    With TensorLayer

    >>> from tensorlayer.dataflow import Dataset
    >>> class mnistdataset(Dataset):
    >>>     def __init__(self, data, label,transform):
    >>>         self.data = data
    >>>         self.label = label
    >>>         self.transform = transform
    >>>     def __getitem__(self, index):
    >>>         data = self.data[index].astype('float32')
    >>>         data = self.transform(data)
    >>>         label = self.label[index].astype('int64')
    >>>         return data, label
    >>>     def __len__(self):
    >>>         return len(self.data)
    >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)

    Returns
    -------
    """

    '''
    return dataset.batch(batch_size=batch_size, drop_remainder=drop_remainder)
    def __init__(self):
        pass

    def __call__(self):

 def Concat(dataset_1, dataset_2):
        return self

    return dataset_1.concatenate(dataset_2)
    def __getitem__(self, idx):
        raise NotImplementedError("'{}' not implement in class "\
                "{}".format('__getitem__', self.__class__.__name__))

    def __len__(self):
        raise NotImplementedError("'{}' not implement in class "\
                "{}".format('__len__', self.__class__.__name__))


 class IterableDataset(object):
    """An abstract class to encapsulate methods and behaviors of iterable datasets.
    All datasets in iterable-style (can only get sample one by one sequentially, likea Python iterator) should be a subclass of `tensorlayer.dataflow.IterableDataset`.
    All subclasses should implement following methods:
    :code:`__iter__`: yield sample sequentially.

    Examples
    ----------
    With TensorLayer

    >>> class mnistdataset(IterableDataset):
    >>>     def __init__(self, data, label,transform):
    >>>         self.data = data
    >>>         self.label = label
    >>>         self.transform = transform
    >>>     def __iter__(self):
    >>>         for i in range(len(self.data)):
    >>>             data = self.data[i].astype('float32')
    >>>             data = self.transform(data)
    >>>             label = self.label[i].astype('int64')
    >>>             yield data, label
    >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)

 def CsvDataset(
    file_pattern, batch_size=1, column_names=None, column_defaults=None, label_name=None, select_columns=None,
    field_delim=',', use_quote_delim=True, na_value='', header=True, num_epochs=None, shuffle=True,
    shuffle_buffer_size=10000, shuffle_seed=None, prefetch_buffer_size=None, num_parallel_reads=None, sloppy=False,
    num_rows_for_inference=100, compression_type=None, ignore_errors=False, numples_samples=None, num_shards=None,
    shard_id=None, cache=None
 ):
    """Reads CSV files into a dataset.
     Reads CSV files into a dataset, where each element is a (features, labels)
     tuple that corresponds to a batch of CSV rows. The features dictionary
     maps feature column names to `Tensor`s containing the corresponding
     feature data, and labels is a `Tensor` containing the batch's label data.
    """
    return tf.data.experimental.make_csv_dataset(
        file_pattern, batch_size, column_names=None, column_defaults=None, label_name=None, select_columns=None,
        field_delim=',', use_quote_delim=True, na_value='', header=True, num_epochs=None, shuffle=True,
        shuffle_buffer_size=10000, shuffle_seed=None, prefetch_buffer_size=None, num_parallel_reads=None, sloppy=False,
        num_rows_for_inference=100, compression_type=None, ignore_errors=False
    )

    def __init__(self):
        pass

    def __call__(self):

        return self

    def __iter__(self):
        raise NotImplementedError("'{}' not implement in class "\
                "{}".format('__iter__', self.__class__.__name__))


 def FromGenerator(generator, output_types, column_names=None):
    """Creates a `Dataset` whose elements are generated by `generator`.

 def Filter(dataset, predicate):
    '''
    Filters this dataset according to predicate.
    Parameters
    ----------
    dataset :
        A dataset
    predicate :
        A function mapping a dataset element to a boolean.
    Returns :
        The Dataset containing the elements of this dataset for which predicate is True.
    generator: Callable or Iterable
        A generator callable object or an iterable Python object.
    output_types: list or tuple
        Set output data type. This parameter not support in MindSpore backend and Paddle backend.
    column_names: list or tuple
        column names of the dataset. This parameter not support in TensorFlow backend and Paddle backend.

    Returns
    -------
    Dataset
        A Dataset.

    Examples
    ----------
    With TensorLayer

    '''
    return dataset.filter(predicate)
    >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)
    >>> train_dataset = tl.dataflow.FromGenerator(train_dataset, output_types=[tl.float32, tl.int64], column_names=['data', 'label'])

    """
    output_types = tuple(output_types)
    return tf.data.Dataset.from_generator(generator, output_types=output_types)


 def Batch(dataset, batch_size, drop_last=False):
    """Combine batch_size number of consecutive rows into batches.This function not implement in Paddle backend.

 def Flat_map(dataset, map_func):
    '''
    Maps map_func across this dataset and flattens the result.
    Parameters
    ----------
    dataset:
        A dataset
    map_func
        A function mapping a dataset element to a dataset.
        A dataset.
    batch_size: int
        Sample number in a mini-batch.
    drop_last: boolean
        whether drop the last incomplete batch dataset size is not divisible by the batch size.

    Returns
        A Dataset.
    -------
    Dataset
        A batchDataset.
    """

    '''
    return dataset.flat_map(map_func)
    return dataset.batch(batch_size=batch_size, drop_remainder=drop_last)


 def FromGenerator(
    generator, output_types, output_shapes=None, args=None, column_names=None, column_types=None, schema=None,
    num_samples=None, num_parallel_workers=1, shuffle=None, sampler=None, num_shards=None, shard_id=None,
    python_multiprocessing=True
 ):
    """Creates a `Dataset` whose elements are generated by `generator`.
 def Concat(datasets):
    """Concatenate the datasets in the input list of datasets.

    Parameters
    ----------
    datasets: dataset
        A list of datasets.

    Returns
    -------
    Dataset
        datasets concatenated.

    Examples
    ----------
    With TensorLayer

    >>> dataset = tl.dataflow.Concat([dataset1, dataset2])

        generator:
        A callable object
    """
    return tf.data.Dataset.from_generator(generator, output_types, output_shapes=output_shapes, args=args)

    dataset_num = len(datasets)
    dataset = datasets[0]
    for i in range(1, dataset_num):
        dataset.concatenate(datasets[i])
    return dataset


 def FromSlices(datas, column_names=None):
    """Creates a dataset with given data slices.

    Parameters
    ----------
    datas: list or tuple
        Each data should be in shape of [N, …], while N is the sample number.
        Input data will be sliced along the first dimension and generate additional rows
    column_names: list
        List of column names of the dataset. This parameter not support in TensorFlow backend and Paddle backend.

    Returns
    -------
    Dataset
        A dataset.

 def FromSlices(
    tensor, column_names=None, num_samples=None, num_parallel_workers=1, shuffle=None, sampler=None, num_shards=None,
    shard_id=None
 ):
    Examples
    ----------
    With TensorLayer

    >>> dataset = tl.dataflow.FromSlices([data1, data2])

    return tf.data.Dataset.from_tensor_slices(tensor)
    """

    return tf.data.Dataset.from_tensor_slices(datas)

 def Map(
    dataset, map_func, num_parallel_calls=None, input_columns=None, output_columns=None, column_order=None,
    num_parallel_workers=None, python_multiprocessing=False, cache=None, callbacks=None
 ):
    """ Maps map_func across the elements of this dataset.

 def Map(dataset, map_func, input_columns=None):
    """ Maps map_func across the elements of this dataset. This function not implement in Paddle backend.

    Parameters
    ----------
    dataset : DataFlow
        input DataFlow
    dataset : Dataset
        A dataset to map.
    map_func : function
        A function mapping a dataset element to another dataset element.
    num_parallel_calls
    input_columns: list
        List of column names of the dataset to map. This parameter not support in TensorFlow backend.

    Returns
    -------
    Dataset
        A mapped dataset.

    Examples
    ----------
    With TensorLayer

    >>> dataset = tl.dataflow.Map(dataset, map_func)

    """
    return dataset.map(map_func, num_parallel_calls=num_parallel_calls)
    return dataset.map(map_func)


 def Prefetch(dataset, buffer_size):
    '''
    Creates a Dataset that prefetches elements from this dataset.
 def Repeat(dataset, count=None):
    """ Repeat this dataset count times.  This function not implement in Paddle backend.

    Parameters
    ----------
    dataset: Dataflow
        A dataset
    buffer_size :
        A tf.int64 scalar tf.Tensor, representing the maximum number of elements that will be buffered when prefetching.
    dataset : Dataset
        A dataset to repeat.
    count : int
        The number of times the dataset should be repeated. The default behavior (if count is None or -1) is for the dataset be repeated indefinitely.

    Returns
        A Dataset
    -------
    Dataset
        A repeated dataset.

    '''
    return dataset.prefetch(buffer_size=buffer_size)
    Examples
    ----------
    With TensorLayer

    >>> dataset = tl.dataflow.Repeat(dataset, 2)

 def Repeat(dataset, count=None):
    """
    return dataset.repeat(count=count)


 def Shuffle(dataset, buffer_size, seed=None, reshuffle_each_iteration=None):
    return dataset.shuffle(buffer_size, seed=seed, reshuffle_each_iteration=reshuffle_each_iteration)
 def Shuffle(dataset, buffer_size):
    """ Randomly shuffles the elements of this dataset.This function not implement in Paddle backend.


 def Skip(dataset, count):
    '''
    Creates a Dataset that skips count elements from this dataset.
    Parameters
    ----------
    dataset:
        A dataset
    count:
        A tf.int64 scalar tf.Tensor, representing the number of elements of this dataset that should be skipped to form the new dataset.
        If count is greater than the size of this dataset, the new dataset will contain no elements.
        If count is -1, skips the entire dataset.
    dataset : Dataset
        A dataset to shuffle.
    buffer_size : int
        The number of elements from this dataset from which the new dataset will sample.

    Returns
    -------
    Dataset
        A shuffled dataset.

    Examples
    ----------
    With TensorLayer

    >>> dataset = tl.dataflow.Shuffle(dataset, 2000)

    """
    return dataset.shuffle(buffer_size, seed=None, reshuffle_each_iteration=True)

    '''
    return dataset.skip(count)

 def Zip(datasets):
    """ Creates a Dataset by zipping together the given datasets.This function not implement in Paddle backend.

 def Take(dataset, count):
    '''
    Creates a Dataset with at most count elements from this dataset.
    Parameters
    ----------
    dataset:
        A dataset
    count:
        A tf.int64 scalar tf.Tensor, representing the number of elements of this dataset that should be taken to form the new dataset.
         If count is -1, or if count is greater than the size of this dataset, the new dataset will contain all elements of this dataset.
    datasets : list
        A list of datasets to zip.

    Returns
    -------
    Dataset
        A zip dataset.

    '''
    return dataset.take(count)


 def TextFlieDataset(
    filenames, compression_type=None, buffer_size=None, num_parallel_reads=None, num_samples=None, shuffle=None,
    num_shards=None, shard_id=None, cache=None
 ):

    return tf.data.TextLineDataset(filenames, compression_type, buffer_size, num_parallel_reads)
    Examples
    ----------
    With TensorLayer

    >>> dataset = tl.dataflow.Zip([dataset1, dataset2])

 def TFRecordDataset(
    filenames, compression_type=None, buffer_size=None, num_parallel_reads=None, schema=None, columns_list=None,
    num_samples=None, shuffle=None, num_shards=None, shard_id=None, shard_equal_rows=False, cache=None
 ):
    """
    return tf.data.Dataset.zip(datasets)

    return tf.data.TFRecordDataset(filenames, compression_type, buffer_size, num_parallel_reads)

 def Dataloader(dataset, batch_size, shuffle=False, drop_last=False, shuffle_buffer_size=10000):
    """ Creates a Datasetloader to trian network. We recommend using this function.

 def Zip(datasets):
    '''
    Creates a Dataset by zipping together the given datasets.
    Parameters
    ----------
    datasets:
        A tuple of datasets to be zipped together.
    dataset : Dataset
        the dataset to load data from.
    batch_size: int or None
        sample number in a mini-batch.
    shuffle: boolean
        whther to shuffle indices order before genrate batch indices.
    drop_last: boolean
        whether drop the last incomplete batch dataset size is not divisible by the batch size.
    shuffle_buffer_size: int
        The number of elements from this dataset from which the new dataset will sample. This parameter not support in Paddle backend.

    Returns
    -------
    DataLoader
        an iterable object for data iterating, each elemnet of the generated data is a Tensor.

    '''
    return tf.data.Dataset.zip(datasets)

    Examples
    ----------
    With TensorLayer

    >>> from tensorlayer.dataflow import Dataset
    >>> class mnistdataset(Dataset):
    >>>     def __init__(self, data, label,transform):
    >>>         self.data = data
    >>>         self.label = label
    >>>         self.transform = transform
    >>>     def __getitem__(self, index):
    >>>         data = self.data[index].astype('float32')
    >>>         data = self.transform(data)
    >>>         label = self.label[index].astype('int64')
    >>>         return data, label
    >>>     def __len__(self):
    >>>         return len(self.data)
    >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)
    >>> train_dataset = tl.dataflow.FromGenerator(train_dataset, output_types=[tl.float32, tl.int64], column_names=['data', 'label'])
    >>> train_dataloader = tl.dataflow.Dataloader(train_dataset, batch_size=128, shuffle=True, drop_last=False, shuffle_buffer_size=2000)

 def Dataloader(dataset, batch_size, shuffle=False, drop_last=False, prefetch=0, shuffle_buffer_size=1024):
    """

    if shuffle:
        dataset = Shuffle(dataset, buffer_size=shuffle_buffer_size, reshuffle_each_iteration=True)
        dataset = Shuffle(dataset, buffer_size=shuffle_buffer_size)

    dataset = Batch(dataset, batch_size=batch_size, drop_remainder=drop_last)
    dataset = Prefetch(dataset, buffer_size=prefetch)
    dataset = Batch(dataset, batch_size=batch_size, drop_last=drop_last)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return dataset
--- a/tensorlayer/files/utils.py
+++ b/tensorlayer/files/utils.py
@@ -37,6 +37,8 @@ if tl.BACKEND == 'mindspore':
    from mindspore.nn import Cell
    from mindspore import Tensor
    import mindspore as ms
 if tl.BACKEND == 'paddle':
    import paddle as pd

 if sys.version_info[0] == 2:
    from urllib import urlretrieve
@@ -74,6 +76,7 @@ __all__ = [
    'ms_variables_to_numpy',
    'assign_tf_variable',
    'assign_ms_variable',
    'assign_pd_variable',
    'save_weights_to_hdf5',
    'load_hdf5_to_weights_in_order',
    'load_hdf5_to_weights',
@@ -2098,6 +2101,8 @@ def save_npz_dict(save_list=None, name='model.npz'):
        save_list_var = tf_variables_to_numpy(save_list)
    elif tl.BACKEND == 'mindspore':
        save_list_var = ms_variables_to_numpy(save_list)
    elif tl.BACKEND == 'paddle':
        save_list_var = pd_variables_to_numpy(save_list)
    else:
        raise NotImplementedError('Not implemented')
    save_var_dict = {save_list_names[idx]: val for idx, val in enumerate(save_list_var)}
@@ -2148,6 +2153,11 @@ def load_and_assign_npz_dict(name='model.npz', network=None, skip=False):
            elif tl.BACKEND == 'mindspore':
                assign_param = Tensor(weights[key], dtype=ms.float32)
                assign_ms_variable(network.all_weights[net_weights_name.index(key)], assign_param)
            elif tl.BACKEND == 'paddle':
                assign_pd_variable(network.all_weights[net_weights_name.index(key)], weights[key])
            else:
                raise NotImplementedError('Not implemented')

    logging.info("[*] Model restored from npz_dict %s" % name)


@@ -2594,6 +2604,16 @@ def ms_variables_to_numpy(variables):
    return results


 def pd_variables_to_numpy(variables):
    if not isinstance(variables, list):
        var_list = [variables]
    else:
        var_list = variables

    results = [v.numpy() for v in var_list]
    return results


 def assign_tf_variable(variable, value):
    """Assign value to a TF variable"""
    variable.assign(value)
@@ -2615,6 +2635,10 @@ def assign_ms_variable(variable, value):
    Assign()(variable, value)


 def assign_pd_variable(variable, value):
    pd.assign(value, variable)


 def _save_weights_to_hdf5_group(f, layers):
    """
    Save layer/model weights into hdf5 group recursively.
--- a/tensorlayer/initializers/init.py
+++ b/tensorlayer/initializers/init.py
@@ -5,7 +5,7 @@
 #     'Initializer', 'Zeros', 'Ones', 'Constant', 'RandomUniform', 'RandomNormal', 'TruncatedNormal',
 #     'deconv2d_bilinear_upsampling_initializer', 'He_Normal'
 # ]

 from .load_initializers_backend import Initializer
 from .load_initializers_backend import Zeros
 from .load_initializers_backend import Ones
 from .load_initializers_backend import Constant
@@ -22,4 +22,4 @@ constant = Constant
 random_uniform = RandomUniform
 random_normal = RandomNormal
 truncated_normal = TruncatedNormal
 he_normal = HeNormal
 he_normal = HeNormal
--- a/tensorlayer/initializers/load_initializers_backend.py
+++ b/tensorlayer/initializers/load_initializers_backend.py
@@ -7,9 +7,7 @@ from tensorlayer.backend.ops.load_backend import BACKEND
 if BACKEND == 'tensorflow':
    from .tensorflow_initializers import *
 elif BACKEND == 'mindspore':
    from .tensorflow_initializers import *
 elif BACKEND == 'dragon':
    from .tensorflow_initializers import *
    from .mindspore_initializers import *
 elif BACKEND == 'paddle':
    from .paddle_initializers import *
 else:
--- a/tensorlayer/initializers/mindspore_initializers.py
+++ b/tensorlayer/initializers/mindspore_initializers.py
@@ -0,0 +1,258 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-

 import numpy as np
 import tensorlayer as tl
 from mindspore import Tensor
 from mindspore.common import initializer

 __all__ = [
    'Initializer', 'Zeros', 'Ones', 'Constant', 'RandomUniform', 'RandomNormal', 'TruncatedNormal',
    'deconv2d_bilinear_upsampling_initializer', 'HeNormal'
 ]


 class Initializer(object):
    """Initializer base class: all initializers inherit from this class.
    """

    def __call__(self, shape, dtype=None):
        """Returns a tensor object initialized as specified by the initializer.

        Parameters
        ----------
        shape : tuple of int.
            The shape of the tensor.
        dtype : Optional dtype of the tensor.
            If not provided will return tensor of `tl.float32`.

        Returns
        -------

        """
        raise NotImplementedError

    def get_config(self):
        """Returns the configuration of the initializer as a JSON-serializable dict.

        Returns
        -------
            A JSON-serializable Python dict.
        """
        return {}

    @classmethod
    def from_config(cls, config):
        """Instantiates an initializer from a configuration dictionary.

        Parameters
        ----------
        config : A python dictionary.
            It will typically be the output of `get_config`.

        Returns
        -------
            An Initializer instance.
        """
        if 'dtype' in config:
            config.pop('dtype')
        return cls(**config)


 class Zeros(Initializer):
    """Initializer that generates tensors initialized to 0.
    """

    def __init__(self):
        self.zero = initializer.Zero()

    def __call__(self, shape, dtype=tl.float32):
        arr = np.ndarray(shape)
        self.zero(arr)
        return Tensor(arr, dtype=dtype)


 class Ones(Initializer):
    """Initializer that generates tensors initialized to 1.
    """

    def __init__(self):
        self.one = initializer.One()

    def __call__(self, shape, dtype=tl.float32):
        arr = np.ndarray(shape)
        self.one(arr)
        return Tensor(arr, dtype=dtype)


 class Constant(Initializer):
    """Initializer that generates tensors initialized to a constant value.

    Parameters
    ----------
    value : A python scalar or a numpy array.
        The assigned value.

    """

    def __init__(self, value=0):
        self.value = value
        self.constant = initializer.Constant(value=value)

    def __call__(self, shape, dtype=tl.float32):
        arr = np.ndarray(shape)
        self.constant(arr)
        return Tensor(arr, dtype=dtype)

    def get_config(self):
        return {"value": self.value}


 class RandomUniform(Initializer):
    """Initializer that generates tensors with a uniform distribution.

    Parameters
    ----------
    minval : A python scalar or a scalar tensor.
        Lower bound of the range of random values to generate.
    maxval : A python scalar or a scalar tensor.
        Upper bound of the range of random values to generate.
    seed : A Python integer.
        Used to seed the random generator.

    """

    def __init__(self, minval=-0.05, maxval=0.05, seed=None):
        self.minval = minval
        self.maxval = maxval
        self.seed = seed

    def __call__(self, shape, dtype=tl.float32):
        return tl.random_uniform(shape, self.minval, self.maxval, dtype=dtype, seed=self.seed)

    def get_config(self):
        return {"minval": self.minval, "maxval": self.maxval, "seed": self.seed}


 class RandomNormal(Initializer):
    """Initializer that generates tensors with a normal distribution.

    Parameters
    ----------
    mean : A python scalar or a scalar tensor.
        Mean of the random values to generate.
    stddev : A python scalar or a scalar tensor.
        Standard deviation of the random values to generate.
    seed : A Python integer.
        Used to seed the random generator.
    """

    def __init__(self, mean=0.0, stddev=0.05, seed=None):
        self.mean = mean
        self.stddev = stddev
        self.seed = seed

    def __call__(self, shape, dtype=tl.float32):
        return tl.random_normal(shape, self.mean, self.stddev, dtype=dtype, seed=self.seed)

    def get_config(self):
        return {"mean": self.mean, "stddev": self.stddev, "seed": self.seed}


 class TruncatedNormal(Initializer):
    """Initializer that generates a truncated normal distribution.

    These values are similar to values from a `RandomNormal`
    except that values more than two standard deviations from the mean
    are discarded and re-drawn. This is the recommended initializer for
    neural network weights and filters.


    Parameters
    ----------
    mean : A python scalar or a scalar tensor.
        Mean of the random values to generate.
    stddev : A python scalar or a scalar tensor.
        Standard deviation of the andom values to generate.
    seed : A Python integer.
        Used to seed the random generator.
    """

    def __init__(self, mean=0.0, stddev=0.05, seed=None):
        self.mean = mean
        self.stddev = stddev
        self.seed = seed

    def __call__(self, shape, dtype=tl.float32):
        return tl.truncated_normal(shape, self.mean, self.stddev, dtype=dtype, seed=self.seed)

    def get_config(self):
        return {"mean": self.mean, "stddev": self.stddev, "seed": self.seed}


 class HeNormal(Initializer):
    """He normal initializer.

    Parameters
    ----------
    seed : A Python integer.
        Used to seed the random generator.

    """

    def __init__(self, seed=None):
        self.seed = seed

    def __call__(self, shape, dtype=tl.float32):
        return tl.he_normal(seed=self.seed, shape=shape, dtype=dtype)

    def get_config(self):
        return {"seed", self.seed}


 def deconv2d_bilinear_upsampling_initializer(shape):
    """Returns the initializer that can be passed to DeConv2dLayer for initializing the
    weights in correspondence to channel-wise bilinear up-sampling.
    Used in segmentation approaches such as [FCN](https://arxiv.org/abs/1605.06211)

    Parameters
    ----------
    shape : tuple of int
        The shape of the filters, [height, width, output_channels, in_channels].
        It must match the shape passed to DeConv2dLayer.

    Returns
    -------
    ``tf.constant_initializer``
        A constant initializer with weights set to correspond to per channel bilinear upsampling
        when passed as W_int in DeConv2dLayer

    """
    if shape[0] != shape[1]:
        raise Exception('deconv2d_bilinear_upsampling_initializer only supports symmetrical filter sizes')

    if shape[3] < shape[2]:
        raise Exception(
            'deconv2d_bilinear_upsampling_initializer behaviour is not defined for num_in_channels < num_out_channels '
        )

    filter_size = shape[0]
    num_out_channels = shape[2]
    num_in_channels = shape[3]

    # Create bilinear filter kernel as numpy array
    bilinear_kernel = np.zeros([filter_size, filter_size], dtype=np.float32)
    scale_factor = (filter_size + 1) // 2
    if filter_size % 2 == 1:
        center = scale_factor - 1
    else:
        center = scale_factor - 0.5
    for x in range(filter_size):
        for y in range(filter_size):
            bilinear_kernel[x, y] = (1 - abs(x - center) / scale_factor) * (1 - abs(y - center) / scale_factor)
    weights = np.zeros((filter_size, filter_size, num_out_channels, num_in_channels), dtype=np.float32)
    for i in range(num_out_channels):
        weights[:, :, i, i] = bilinear_kernel

    # assign numpy array to constant_initalizer and pass to get_variable
    return Constant(value=weights)
--- a/tensorlayer/initializers/paddle_initializers.py
+++ b/tensorlayer/initializers/paddle_initializers.py
@@ -9,11 +9,58 @@ from paddle.fluid.initializer import MSRAInitializer
 import paddle

 __all__ = [
    'Zeros', 'Ones', 'Constant', 'RandomUniform', 'RandomNormal', 'TruncatedNormal',
    'Initializer', 'Zeros', 'Ones', 'Constant', 'RandomUniform', 'RandomNormal', 'TruncatedNormal',
    'deconv2d_bilinear_upsampling_initializer', 'HeNormal'
 ]


 class Initializer(object):
    """Initializer base class: all initializers inherit from this class.
    """

    def __call__(self, shape, dtype=None):
        """Returns a tensor object initialized as specified by the initializer.

        Parameters
        ----------
        shape : tuple of int.
            The shape of the tensor.
        dtype : Optional dtype of the tensor.
            If not provided will return tensor of `tl.float32`.

        Returns
        -------

        """
        raise NotImplementedError

    def get_config(self):
        """Returns the configuration of the initializer as a JSON-serializable dict.

        Returns
        -------
            A JSON-serializable Python dict.
        """
        return {}

    @classmethod
    def from_config(cls, config):
        """Instantiates an initializer from a configuration dictionary.

        Parameters
        ----------
        config : A python dictionary.
            It will typically be the output of `get_config`.

        Returns
        -------
            An Initializer instance.
        """
        if 'dtype' in config:
            config.pop('dtype')
        return cls(**config)


 class Zeros(ConstantInitializer):
    """Initializer that generates tensors initialized to 0.
    """
@@ -72,8 +119,7 @@ class RandomUniform(UniformInitializer):
        assert minval is not None, 'low should not be None'
        assert maxval is not None, 'high should not be None'
        assert maxval >= minval, 'high should greater or equal than low'
        super(RandomUniform, self).__init__(
            low=minval, high=maxval, seed=seed, diag_num=0, diag_step=0, diag_val=1.0)
        super(RandomUniform, self).__init__(low=minval, high=maxval, seed=seed, diag_num=0, diag_step=0, diag_val=1.0)
        self.minval = minval
        self.maxval = maxval
        self.seed = seed
@@ -149,8 +195,7 @@ class HeNormal(MSRAInitializer):
    """

    def __init__(self, seed=0):
        super(HeNormal, self).__init__(
            uniform=False, fan_in=None, seed=seed)
        super(HeNormal, self).__init__(uniform=False, fan_in=None, seed=seed)
        self.seed = seed

    def get_config(self):
--- a/tensorlayer/initializers/tensorflow_initializers.py
+++ b/tensorlayer/initializers/tensorflow_initializers.py
@@ -59,6 +59,14 @@ class Initializer(object):

 class Zeros(Initializer):
    """Initializer that generates tensors initialized to 0.

    Examples
    --------

    >>> import tensorlayer as tl
    >>> init = tl.initializers.zeros()
    >>> print(init(shape=(5, 10), dtype=tl.float32))

    """

    def __call__(self, shape, dtype=tl.float32):
@@ -67,6 +75,14 @@ class Zeros(Initializer):

 class Ones(Initializer):
    """Initializer that generates tensors initialized to 1.

    Examples
    --------

    >>> import tensorlayer as tl
    >>> init = tl.initializers.ones()
    >>> print(init(shape=(5, 10), dtype=tl.float32))

    """

    def __call__(self, shape, dtype=tl.float32):
@@ -81,6 +97,13 @@ class Constant(Initializer):
    value : A python scalar or a numpy array.
        The assigned value.

    Examples
    --------

    >>> import tensorlayer as tl
    >>> init = tl.initializers.constant(value=10)
    >>> print(init(shape=(5, 10), dtype=tl.float32))

    """

    def __init__(self, value=0):
@@ -105,6 +128,13 @@ class RandomUniform(Initializer):
    seed : A Python integer.
        Used to seed the random generator.

    Examples
    --------

    >>> import tensorlayer as tl
    >>> init = tl.initializers.random_uniform(minval=-0.05, maxval=0.05)
    >>> print(init(shape=(5, 10), dtype=tl.float32))

    """

    def __init__(self, minval=-0.05, maxval=0.05, seed=None):
@@ -130,6 +160,16 @@ class RandomNormal(Initializer):
        Standard deviation of the random values to generate.
    seed : A Python integer.
        Used to seed the random generator.

    minval=-0.05, maxval=0.05

    Examples
    --------

    >>> import tensorlayer as tl
    >>> init = tl.initializers.random_normal(mean=0.0, stddev=0.05)
    >>> print(init(shape=(5, 10), dtype=tl.float32))

    """

    def __init__(self, mean=0.0, stddev=0.05, seed=None):
@@ -161,6 +201,14 @@ class TruncatedNormal(Initializer):
        Standard deviation of the andom values to generate.
    seed : A Python integer.
        Used to seed the random generator.

    Examples
    --------

    >>> import tensorlayer as tl
    >>> init = tl.initializers.truncated_normal(mean=0.0, stddev=0.05)
    >>> print(init(shape=(5, 10), dtype=tl.float32))

    """

    def __init__(self, mean=0.0, stddev=0.05, seed=None):
@@ -183,6 +231,13 @@ class HeNormal(Initializer):
    seed : A Python integer.
        Used to seed the random generator.

    Examples
    --------

    >>> import tensorlayer as tl
    >>> init = tl.initializers.he_normal()
    >>> print(init(shape=(5, 10), dtype=tl.float32))

    """

    def __init__(self, seed=None):
--- a/tensorlayer/layers/init.py
+++ b/tensorlayer/layers/init.py
@@ -18,7 +18,7 @@ from .normalization import *
 from .padding import *
 from .pooling import *
 from .quantize import *
 # from .recurrent import *
 from .recurrent import *
 from .scale import *
 from .shape import *
 from .spatial_transformer import *
--- a/tensorlayer/layers/activation.py
+++ b/tensorlayer/layers/activation.py
@@ -7,16 +7,7 @@ from tensorlayer.initializers import truncated_normal
 from tensorlayer.layers.core import Module

 __all__ = [
    'PRelu',
    'PRelu6',
    'PTRelu6',
    'LeakyReLU',
    'LeakyReLU6',
    'LeakyTwiceRelu6',
    'Ramp',
    'Swish',
    'HardTanh',
    'Mish'
    'PRelu', 'PRelu6', 'PTRelu6', 'LeakyReLU', 'LeakyReLU6', 'LeakyTwiceRelu6', 'Ramp', 'Swish', 'HardTanh', 'Mish'
 ]


@@ -41,7 +32,7 @@ class PRelu(Module):
    Examples
    -----------
    >>> inputs = tl.layers.Input([10, 5])
    >>> prelulayer = tl.layers.PRelu(channel_shared=True)
    >>> prelulayer = tl.layers.PRelu(channel_shared=True, in_channels=5)(inputs)

    References
    -----------
@@ -141,6 +132,11 @@ class PRelu6(Module):
    name : None or str
        A unique layer name.

    Examples
    -----------
    >>> inputs = tl.layers.Input([10, 5])
    >>> prelulayer = tl.layers.PRelu6(channel_shared=True, in_channels=5)(inputs)

    References
    -----------
    - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification <http://arxiv.org/abs/1502.01852>`__
@@ -249,6 +245,11 @@ class PTRelu6(Module):
    name : None or str
        A unique layer name.

    Examples
    -----------
    >>> inputs = tl.layers.Input([10, 5])
    >>> prelulayer = tl.layers.PTRelu6(channel_shared=True, in_channels=5)(inputs)

    References
    -----------
    - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification <http://arxiv.org/abs/1502.01852>`__
@@ -347,6 +348,11 @@ class Ramp(Module):
        Tensor
            A ``Tensor`` in the same type as ``x``.

        Examples
        -----------
        >>> inputs = tl.layers.Input([10, 5])
        >>> prelulayer = tl.layers.Ramp()(inputs)

        """

    def __init__(self, v_min=0, v_max=1):
@@ -380,7 +386,6 @@ class LeakyReLU(Module):

    Examples
    --------
    >>> import tensorlayer as tl
    >>> net = tl.layers.Input([10, 200])
    >>> net = tl.layers.LeakyReLU(alpha=0.5)(net)

@@ -429,7 +434,6 @@ class LeakyReLU6(Module):

        Examples
        --------
        >>> import tensorlayer as tl
        >>> net = tl.layers.Input([10, 200])
        >>> net = tl.layers.LeakyReLU6(alpha=0.5)(net)

@@ -487,7 +491,6 @@ class LeakyTwiceRelu6(Module):

        Examples
        --------
        >>> import tensorlayer as tl
        >>> net = tl.layers.Input([10, 200])
        >>> net = tl.layers.LeakyTwiceRelu6(alpha_low=0.5, alpha_high=0.2)(net)

@@ -535,6 +538,11 @@ class Swish(Module):
        name: str
            function name (optional).

        Examples
        --------
        >>> net = tl.layers.Input([10, 200])
        >>> net = tl.layers.Swish()(net)

        Returns
        -------
        Tensor
@@ -563,6 +571,11 @@ class HardTanh(Module):
        name : str
            The function name (optional).

        Examples
        --------
        >>> net = tl.layers.Input([10, 200])
        >>> net = tl.layers.HardTanh()(net)

        Returns
        -------
        Tensor
@@ -588,6 +601,11 @@ class Mish(Module):
        x : Tensor
            input.

        Examples
        --------
        >>> net = tl.layers.Input([10, 200])
        >>> net = tl.layers.Mish()(net)

        Returns
        -------
        Tensor
--- a/tensorlayer/layers/convolution/binary_conv.py
+++ b/tensorlayer/layers/convolution/binary_conv.py
@@ -4,7 +4,6 @@
 import tensorlayer as tl
 from tensorlayer import logging
 from tensorlayer.layers.core import Module
 from tensorlayer.backend import BACKEND

 __all__ = [
    'BinaryConv2d',
@@ -49,8 +48,7 @@ class BinaryConv2d(Module):

    >>> net = tl.layers.Input([8, 100, 100, 32], name='input')
    >>> binaryconv2d = tl.layers.BinaryConv2d(
    ...     n_filter=64, filter_size=(3, 3), strides=(2, 2), act=tl.relu, in_channels=32, name='binaryconv2d'
    ... )(net)
        ... n_filter=64, filter_size=(3, 3), strides=(2, 2), act=tl.ReLU, in_channels=32, name='binaryconv2d')(net)
    >>> print(binaryconv2d)
    >>> output shape : (8, 50, 50, 64)

--- a/tensorlayer/layers/convolution/deformable_conv.py
+++ b/tensorlayer/layers/convolution/deformable_conv.py
@@ -16,7 +16,7 @@ class DeformableConv2d(Module):

    Parameters
    ----------
    offset_layer : tf.Tensor
    offset_layer : tl.Tensor
        To predict the offset of convolution operations.
        The shape is (batchsize, input height, input width, 2*(number of element in the convolution kernel))
        e.g. if apply a 3*3 kernel, the number of the last dimension should be 18 (2*3*3)
@@ -40,7 +40,6 @@ class DeformableConv2d(Module):
    Examples
    --------
    With TensorLayer

    >>> net = tl.layers.Input([5, 10, 10, 16], name='input')
    >>> offset1 = tl.layers.Conv2d(
    ...     n_filter=18, filter_size=(3, 3), strides=(1, 1), padding='SAME', name='offset1'
@@ -58,7 +57,6 @@ class DeformableConv2d(Module):
    References
    ----------
    - The deformation operation was adapted from the implementation in `here <https://github.com/kastnerkyle/deform-conv>`__

    Notes
    -----
    - The padding is fixed to 'SAME'.
@@ -66,9 +64,11 @@ class DeformableConv2d(Module):

    """

    # @deprecated_alias(layer='prev_layer', end_support_version=1.9)  # TODO remove this line for the 1.9 release
    def __init__(
        self,
        offset_layer=None,
        # shape=(3, 3, 1, 100),
        n_filter=32,
        filter_size=(3, 3),
        act=None,
@@ -76,7 +76,7 @@ class DeformableConv2d(Module):
        W_init=tl.initializers.truncated_normal(stddev=0.02),
        b_init=tl.initializers.constant(value=0.0),
        in_channels=None,
        name=None
        name=None  # 'deformable_conv_2d',
    ):
        super().__init__(name, act=act)

@@ -88,17 +88,10 @@ class DeformableConv2d(Module):
        self.b_init = b_init
        self.in_channels = in_channels

        # layer forward  state
        self._forward_state = False

        self.kernel_n = filter_size[0] * filter_size[1]
        if self.offset_layer.get_shape()[-1] != 2 * self.kernel_n:
            raise AssertionError("offset.get_shape()[-1] is not equal to: %d" % 2 * self.kernel_n)

        if self.in_channels is not None:
            self.build(None)
            self._built = True

        logging.info(
            "DeformableConv2d %s: n_filter: %d, filter_size: %s act: %s" % (
                self.name, self.n_filter, str(self.filter_size
@@ -106,7 +99,6 @@ class DeformableConv2d(Module):
            )
        )


    def __repr__(self):
        actstr = self.act.__class__.__name__ if self.act is not None else 'No Activation'
        s = (
@@ -122,13 +114,14 @@ class DeformableConv2d(Module):
        return s.format(classname=self.__class__.__name__, **self.__dict__)

    def build(self, inputs_shape):
        if self.in_channels is None:
            self.in_channels = inputs_shape[-1]

        self.in_channels = inputs_shape[-1]

        self.input_h = int(inputs_shape[1])
        self.input_w = int(inputs_shape[2])
        initial_offsets = tl.ops.stack(tl.ops.meshgrid(tl.ops.range(self.filter_size[0]),
                                                       tl.ops.range(self.filter_size[1]), indexing='ij')) # initial_offsets --> (kh, kw, 2)
        initial_offsets = tl.ops.stack(
            tl.ops.meshgrid(tl.ops.range(self.filter_size[0]), tl.ops.range(self.filter_size[1]), indexing='ij')
        )  # initial_offsets --> (kh, kw, 2)
        initial_offsets = tl.ops.reshape(initial_offsets, (-1, 2))  # initial_offsets --> (n, 2)
        initial_offsets = tl.ops.expand_dims(initial_offsets, 0)  # initial_offsets --> (1, n, 2)
        initial_offsets = tl.ops.expand_dims(initial_offsets, 0)  # initial_offsets --> (1, 1, n, 2)
@@ -168,12 +161,15 @@ class DeformableConv2d(Module):
                self._built = True
            self._forward_state = True

        # shape = (filter_size[0], filter_size[1], pre_channel, n_filter)
        offset = self.offset_layer
        grid_offset = self.grid_offset

        input_deform = self._tf_batch_map_offsets(inputs, offset, grid_offset)
        outputs = self.conv3d(input=input_deform, filters=self.W)
        outputs = tl.ops.reshape(tensor=outputs, shape=[outputs.get_shape()[0], self.input_h, self.input_w, self.n_filter])
        outputs = tl.ops.reshape(
            tensor=outputs, shape=[outputs.get_shape()[0], self.input_h, self.input_w, self.n_filter]
        )
        if self.b_init:
            outputs = self.bias_add(outputs, self.b)
        if self.act:
@@ -219,21 +215,17 @@ class DeformableConv2d(Module):

    def _tf_batch_map_coordinates(self, inputs, coords):
        """Batch version of tf_map_coordinates

        Only supports 2D feature maps

        Parameters
        ----------
        inputs : ``tf.Tensor``
        inputs : ``tl.Tensor``
            shape = (b*c, h, w)
        coords : ``tf.Tensor``
        coords : ``tl.Tensor``
            shape = (b*c, h, w, n, 2)

        Returns
        -------
        ``tf.Tensor``
        ``tl.Tensor``
            A Tensor with the shape as (b*c, h, w, n)

        """
        inputs_shape = inputs.get_shape()
        coords_shape = coords.get_shape()
@@ -243,8 +235,8 @@ class DeformableConv2d(Module):
        kernel_n = int(coords_shape[3])
        n_coords = input_h * input_w * kernel_n

        coords_lt = tl.ops.cast(tl.ops.floor(coords), 'int32')
        coords_rb = tl.ops.cast(tl.ops.ceil(coords), 'int32')
        coords_lt = tl.ops.cast(tl.ops.Floor()(coords), 'int32')
        coords_rb = tl.ops.cast(tl.ops.Ceil()(coords), 'int32')
        coords_lb = tl.ops.stack([coords_lt[:, :, :, :, 0], coords_rb[:, :, :, :, 1]], axis=-1)
        coords_rt = tl.ops.stack([coords_rb[:, :, :, :, 0], coords_lt[:, :, :, :, 1]], axis=-1)

@@ -265,21 +257,18 @@ class DeformableConv2d(Module):

    def _tf_batch_map_offsets(self, inputs, offsets, grid_offset):
        """Batch map offsets into input

        Parameters
        ------------
        inputs : ``tf.Tensor``
        inputs : ``tl.Tensor``
            shape = (b, h, w, c)
        offsets: ``tf.Tensor``
        offsets: ``tl.Tensor``
            shape = (b, h, w, 2*n)
        grid_offset: `tf.Tensor``
        grid_offset: `tl.Tensor``
            Offset grids shape = (h, w, n, 2)

        Returns
        -------
        ``tf.Tensor``
        ``tl.Tensor``
            A Tensor with the shape as (b, h, w, c)

        """
        inputs_shape = inputs.get_shape()
        batch_size = tl.get_tensor_shape(inputs)[0]
@@ -293,8 +282,6 @@ class DeformableConv2d(Module):

        # offsets (b, h, w, 2*n) --> (b, h, w, n, 2)
        offsets = tl.ops.reshape(offsets, (batch_size, input_h, input_w, kernel_n, 2))
        # offsets (b, h, w, n, 2) --> (b*c, h, w, n, 2)
        # offsets = tf.tile(offsets, [channel, 1, 1, 1, 1])

        coords = tl.ops.expand_dims(grid_offset, 0)  # grid_offset --> (1, h, w, n, 2)
        coords = tl.ops.tile(coords, [batch_size, 1, 1, 1, 1]) + offsets  # grid_offset --> (b, h, w, n, 2)
@@ -313,12 +300,3 @@ class DeformableConv2d(Module):
        mapped_vals = self._to_b_h_w_n_c(mapped_vals, [batch_size, input_h, input_w, kernel_n, channel])

        return mapped_vals

 if __name__ == '__main__':
    net = tl.layers.Input([5, 10, 10, 16], name='input')
    offset1 = tl.layers.Conv2d(n_filter=18, filter_size=(3, 3), strides=(1, 1), padding='SAME', name='offset1', in_channels=16)(net)
    deformconv1 = DeformableConv2d(offset_layer=offset1, n_filter=32, filter_size=(3, 3), name='deformable1')(net)
    offset2 = tl.layers.Conv2d(n_filter=18, filter_size=(3, 3), strides=(1, 1), padding='SAME', name='offset2', in_channels=32)(deformconv1)
    deformconv2 = DeformableConv2d(offset_layer=offset2, n_filter=64, filter_size=(3, 3), name='deformable2')(deformconv1)
    print(deformconv2)

--- a/tensorlayer/layers/convolution/depthwise_conv.py
+++ b/tensorlayer/layers/convolution/depthwise_conv.py
@@ -138,7 +138,7 @@ class DepthwiseConv2d(Module):
        if BACKEND == 'mindspore':
            self.filter_shape = (self.filter_size[0], self.filter_size[1], self.in_channels, 1)

        self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init)
        self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init, transposed=True)

        self.depthwise_conv2d = tl.ops.DepthwiseConv2d(
            strides=self._strides, padding=self.padding, data_format=self.data_format, dilations=self._dilation_rate,
--- a/tensorlayer/layers/convolution/dorefa_conv.py
+++ b/tensorlayer/layers/convolution/dorefa_conv.py
@@ -52,7 +52,7 @@ class DorefaConv2d(Module):

    >>> net = tl.layers.Input([8, 12, 12, 32], name='input')
    >>> dorefaconv2d = tl.layers.DorefaConv2d(
    ...     n_filter=32, filter_size=(5, 5), strides=(1, 1), act=tl.relu, padding='SAME', name='dorefaconv2d'
    ...     n_filter=32, filter_size=(5, 5), strides=(1, 1), act=tl.ReLU, padding='SAME', name='dorefaconv2d'
    ... )(net)
    >>> print(dorefaconv2d)
    >>> output shape : (8, 12, 12, 32)
--- a/tensorlayer/layers/convolution/group_conv.py
+++ b/tensorlayer/layers/convolution/group_conv.py
@@ -13,6 +13,7 @@ __all__ = [

 class GroupConv2d(Module):
    """The :class:`GroupConv2d` class is 2D grouped convolution, see `here <https://blog.yani.io/filter-group-tutorial/>`__.

      Parameters
      --------------
      n_filter : int
@@ -39,6 +40,7 @@ class GroupConv2d(Module):
          The number of in channels.
      name : None or str
          A unique layer name.

      Examples
      ---------
      With TensorLayer
@@ -48,6 +50,7 @@ class GroupConv2d(Module):
      ... )(net)
      >>> print(groupconv2d)
      >>> output shape : (8, 12, 12, 64)

      """

    def __init__(
--- a/tensorlayer/layers/convolution/quan_conv.py
+++ b/tensorlayer/layers/convolution/quan_conv.py
@@ -55,7 +55,7 @@ class QuanConv2d(Module):

    >>> net = tl.layers.Input([8, 12, 12, 64], name='input')
    >>> quanconv2d = tl.layers.QuanConv2d(
    ...     n_filter=32, filter_size=(5, 5), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='quancnn2d'
    ...     n_filter=32, filter_size=(5, 5), strides=(1, 1), act=tl.ReLU, padding='SAME', name='quancnn2d'
    ... )(net)
    >>> print(quanconv2d)
    >>> output shape : (8, 12, 12, 32)
@@ -149,8 +149,9 @@ class QuanConv2d(Module):
            self.b = self._get_weights("biases", shape=(self.n_filter, ), init=self.b_init)
            self.bias_add = tl.ops.BiasAdd(data_format=self.data_format)

        self.conv2d = tl.ops.Conv2D(strides=self.strides, padding=self.padding, data_format=self.data_format,
            dilations=self._dilation_rate)
        self.conv2d = tl.ops.Conv2D(
            strides=self.strides, padding=self.padding, data_format=self.data_format, dilations=self._dilation_rate
        )

    def forward(self, inputs):
        if self._forward_state == False:
--- a/tensorlayer/layers/convolution/quan_conv_bn.py
+++ b/tensorlayer/layers/convolution/quan_conv_bn.py
@@ -237,4 +237,4 @@ class QuanConv2dWithBN(Module):
        return tf.compat.v1.div(tf.multiply(gama, w), tf.sqrt(var + epsilon))

    def _bias_fold(self, beta, gama, mean, var, epsilon):
        return tf.subtract(beta, tf.compat.v1.div(tf.multiply(gama, mean), tf.sqrt(var + epsilon)))
        return tf.subtract(beta, tf.compat.v1.div(tf.multiply(gama, mean), tf.sqrt(var + epsilon)))
--- a/tensorlayer/layers/convolution/separable_conv.py
+++ b/tensorlayer/layers/convolution/separable_conv.py
@@ -15,6 +15,7 @@ __all__ = [
 class SeparableConv1d(Module):
    """The :class:`SeparableConv1d` class is a 1D depthwise separable convolutional layer.
    This layer performs a depthwise convolution that acts separately on channels, followed by a pointwise convolution that mixes channels.

    Parameters
    ------------
    n_filter : int
@@ -43,13 +44,15 @@ class SeparableConv1d(Module):
        The number of in channels.
    name : None or str
        A unique layer name.

    Examples
    --------
    With TensorLayer
    >>> net = tl.layers.Input([8, 50, 64], name='input')
    >>> separableconv1d = tl.layers.SeparableConv1d(n_filter=32, filter_size=3, strides=2, padding='SAME', act=tf.nn.relu, name='separable_1d')(net)
    >>> separableconv1d = tl.layers.SeparableConv1d(n_filter=32, filter_size=3, strides=2, padding='SAME', act=tl.ReLU, name='separable_1d')(net)
    >>> print(separableconv1d)
    >>> output shape : (8, 25, 32)

    """

    def __init__(
@@ -112,10 +115,10 @@ class SeparableConv1d(Module):

        if BACKEND == 'tensorflow':
            self.depthwise_filter_shape = (self.filter_size, self.in_channels, self.depth_multiplier)
            self.pointwise_filter_shape = (1, self.depth_multiplier * self.in_channels, self.n_filter)
        elif BACKEND == 'mindspore':
            self.depthwise_filter_shape = (self.filter_size, 1, self.depth_multiplier * self.in_channels)
            self.pointwise_filter_shape = (1, self.depth_multiplier * self.in_channels, self.n_filter)

        self.pointwise_filter_shape = (1, self.depth_multiplier * self.in_channels, self.n_filter)

        self.depthwise_W = self._get_weights(
            'depthwise_filters', shape=self.depthwise_filter_shape, init=self.depthwise_init
@@ -159,6 +162,7 @@ class SeparableConv1d(Module):
 class SeparableConv2d(Module):
    """The :class:`SeparableConv2d` class is a 2D depthwise separable convolutional layer.
        This layer performs a depthwise convolution that acts separately on channels, followed by a pointwise convolution that mixes channels.

        Parameters
        ------------
        n_filter : int
@@ -187,13 +191,15 @@ class SeparableConv2d(Module):
            The number of in channels.
        name : None or str
            A unique layer name.

        Examples
        --------
        With TensorLayer
        >>> net = tl.layers.Input([8, 50, 50, 64], name='input')
        >>> separableconv2d = tl.layers.SeparableConv2d(n_filter=32, filter_size=3, strides=2, depth_multiplier = 3 , padding='SAME', act=tf.nn.relu, name='separable_2d')(net)
        >>> separableconv2d = tl.layers.SeparableConv2d(n_filter=32, filter_size=3, strides=2, depth_multiplier = 3 , padding='SAME', act=tl.ReLU, name='separable_2d')(net)
        >>> print(separableconv2d)
        >>> output shape : (8, 24, 24, 32)

        """

    def __init__(
@@ -307,13 +313,3 @@ class SeparableConv2d(Module):
        if self.act_init_flag:
            outputs = self.act(outputs)
        return outputs


 if __name__ == '__main__':
    net = tl.layers.Input([5, 400, 400, 3], name='input')
    layer = SeparableConv2d(
        in_channels=3, filter_size=(3, 3), strides=(2, 2), dilation_rate=(2, 2), act=tl.ReLU, depth_multiplier=3,
        name='separableconv2d1'
    )
    print(len(layer.all_weights))
    print(layer(net).shape)
--- a/tensorlayer/layers/convolution/simplified_conv.py
+++ b/tensorlayer/layers/convolution/simplified_conv.py
@@ -5,7 +5,6 @@ from tensorlayer.layers.core import Module
 import tensorlayer as tl
 from tensorlayer import logging


 __all__ = [
    'Conv1d',
    'Conv2d',
@@ -51,7 +50,7 @@ class Conv1d(Module):
    >>> net = tl.layers.Input([8, 100, 1], name='input')
    >>> conv1d = tl.layers.Conv1d(n_filter=32, filter_size=5, stride=2, b_init=None, in_channels=1, name='conv1d_1')
    >>> print(conv1d)
    >>> tensor = tl.layers.Conv1d(n_filter=32, filter_size=5, stride=2, act=tl.ops.relu, name='conv1d_2')(net)
    >>> tensor = tl.layers.Conv1d(n_filter=32, filter_size=5, stride=2, act=tl.ReLU, name='conv1d_2')(net)
    >>> print(tensor)

    """
@@ -189,10 +188,10 @@ class Conv2d(Module):
    --------
    With TensorLayer

    >>> net = tl.layers.Input([8, 3, 400, 400], name='input')
    >>> net = tl.layers.Input([8, 400, 400, 3], name='input')
    >>> conv2d = tl.layers.Conv2d(n_filter=32, filter_size=(3, 3), strides=(2, 2), b_init=None, in_channels=3, name='conv2d_1')
    >>> print(conv2d)
    >>> tensor = tl.layers.Conv2d(n_filter=32, filter_size=(3, 3), strides=(2, 2), act=tl.ops.relu, name='conv2d_2')(net)
    >>> tensor = tl.layers.Conv2d(n_filter=32, filter_size=(3, 3), strides=(2, 2), act=tl.ReLU, name='conv2d_2')(net)
    >>> print(tensor)

    """
@@ -337,7 +336,7 @@ RuntimeError: Unable to cast from non-held to held instance (T& to Holder<T>) of
    >>> net = tl.layers.Input([8, 20, 20, 20, 3], name='input')
    >>> conv3d = tl.layers.Conv3d(n_filter=32, filter_size=(3, 3, 3), strides=(2, 2, 2), b_init=None, in_channels=3, name='conv3d_1')
    >>> print(conv3d)
    >>> tensor = tl.layers.Conv3d(n_filter=32, filter_size=(3, 3, 3), strides=(2, 2, 2), act=tl.ops.relu, name='conv3d_2')(net)
    >>> tensor = tl.layers.Conv3d(n_filter=32, filter_size=(3, 3, 3), strides=(2, 2, 2), act=tl.ReLU, name='conv3d_2')(net)
    >>> print(tensor)

    """
@@ -416,9 +415,6 @@ RuntimeError: Unable to cast from non-held to held instance (T& to Holder<T>) of

        self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init)

        if self.b_init:
            self.b = self._get_weights("biases", shape=(self.n_filter, ), init=self.b_init)

        self.b_init_flag = False
        if self.b_init:
            self.b = self._get_weights("biases", shape=(self.n_filter, ), init=self.b_init)
@@ -427,7 +423,7 @@ RuntimeError: Unable to cast from non-held to held instance (T& to Holder<T>) of

        self.conv3d = tl.ops.Conv3D(
            strides=self._strides, padding=self.padding, data_format=self.data_format, dilations=self._dilation_rate,
            out_channel=self.n_filter, k_size=(self.filter_size[0], self.filter_size[1])
            out_channel=self.n_filter, k_size=(self.filter_size[0], self.filter_size[1], self.filter_size[2])
        )

        self.act_init_flag = False
@@ -486,7 +482,7 @@ class DeConv1d(Module):
    >>> net = tl.layers.Input([8, 100, 1], name='input')
    >>> conv1d = tl.layers.DeConv1d(n_filter=32, filter_size=5, stride=2, b_init=None, in_channels=1, name='Deonv1d_1')
    >>> print(conv1d)
    >>> tensor = tl.layers.DeConv1d(n_filter=32, filter_size=5, stride=2, act=tl.ops.relu, name='Deconv1d_2')(net)
    >>> tensor = tl.layers.DeConv1d(n_filter=32, filter_size=5, stride=2, act=tl.ReLU, name='Deconv1d_2')(net)
    >>> print(tensor)

    """
@@ -495,7 +491,7 @@ class DeConv1d(Module):
        self,
        n_filter=32,
        filter_size=15,
        strides=1,
        stride=1,
        act=None,
        padding='SAME',
        data_format="channels_last",
@@ -508,7 +504,7 @@ class DeConv1d(Module):
        super(DeConv1d, self).__init__(name, act=act)
        self.n_filter = n_filter
        self.filter_size = filter_size
        self.strides = strides
        self.stride = stride
        self.padding = padding
        self.data_format = data_format
        self.dilation_rate = dilation_rate
@@ -522,7 +518,7 @@ class DeConv1d(Module):

        logging.info(
            "DeConv1d %s: n_filter: %d filter_size: %s stride: %d pad: %s act: %s" % (
                self.name, n_filter, filter_size, strides, padding,
                self.name, n_filter, filter_size, stride, padding,
                self.act.__class__.__name__ if self.act is not None else 'No Activation'
            )
        )
@@ -531,7 +527,7 @@ class DeConv1d(Module):
        actstr = self.act.__class__.__name__ if self.act is not None else 'No Activation'
        s = (
            '{classname}(in_channels={in_channels}, out_channels={n_filter}, kernel_size={filter_size}'
            ', strides={strides}, padding={padding}'
            ', stride={stride}, padding={padding}'
        )
        if self.dilation_rate != 1:
            s += ', dilation={dilation_rate}'
@@ -567,7 +563,7 @@ class DeConv1d(Module):
            self.b_init_flag = True

        self.conv1d_transpose = tl.ops.Conv1d_transpose(
            strides=self.strides,
            stride=self.stride,
            padding=self.padding,
            data_format=self.data_format,
            dilations=self.dilation_rate,
@@ -631,10 +627,10 @@ class DeConv2d(Module):
    --------
    With TensorLayer

    >>> net = tl.layers.Input([8, 3, 400, 400], name='input')
    >>> net = tl.layers.Input([8, 400, 400, 3], name='input')
    >>> conv2d_transpose = tl.layers.DeConv2d(n_filter=32, filter_size=(3, 3), strides=(2, 2), b_init=None, in_channels=3, name='conv2d_transpose_1')
    >>> print(conv2d_transpose)
    >>> tensor = tl.layers.DeConv2d(n_filter=32, filter_size=(3, 3), strides=(2, 2), act=tl.ops.relu, name='conv2d_transpose_2')(net)
    >>> tensor = tl.layers.DeConv2d(n_filter=32, filter_size=(3, 3), strides=(2, 2), act=tl.ReLU, name='conv2d_transpose_2')(net)
    >>> print(tensor)

    """
@@ -656,10 +652,10 @@ class DeConv2d(Module):
        super(DeConv2d, self).__init__(name, act=act)
        self.n_filter = n_filter
        self.filter_size = filter_size
        self._strides = self.strides = strides
        self.strides = strides
        self.padding = padding
        self.data_format = data_format
        self._dilation_rate = self.dilation_rate = dilation_rate
        self.dilation_rate = dilation_rate
        self.W_init = W_init
        self.b_init = b_init
        self.in_channels = in_channels
@@ -696,20 +692,16 @@ class DeConv2d(Module):
            self.data_format = 'NHWC'
            if self.in_channels is None:
                self.in_channels = inputs_shape[-1]
            self._strides = [1, self._strides[0], self._strides[1], 1]
            self._dilation_rate = [1, self._dilation_rate[0], self._dilation_rate[1], 1]
        elif self.data_format == 'channels_first':
            self.data_format = 'NCHW'
            if self.in_channels is None:
                self.in_channels = inputs_shape[1]
            self._strides = [1, 1, self._strides[0], self._strides[1]]
            self._dilation_rate = [1, 1, self._dilation_rate[0], self._dilation_rate[1]]
        else:
            raise Exception("data_format should be either channels_last or channels_first")

        #TODO channels first filter shape [out_channel, in_channel, filter_h, filter_w]
        self.filter_shape = (self.filter_size[0], self.filter_size[1], self.n_filter, self.in_channels)
        self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init)
        self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init, transposed=True)

        self.b_init_flag = False
        if self.b_init:
@@ -718,7 +710,7 @@ class DeConv2d(Module):
            self.b_init_flag = True

        self.conv2d_transpose = tl.ops.Conv2d_transpose(
            strides=self._strides, padding=self.padding, data_format=self.data_format, dilations=self._dilation_rate,
            strides=self.strides, padding=self.padding, data_format=self.data_format, dilations=self.dilation_rate,
            out_channel=self.n_filter, k_size=(self.filter_size[0], self.filter_size[1]), in_channels=self.in_channels
        )

@@ -781,7 +773,7 @@ class DeConv3d(Module):
    >>> net = tl.layers.Input([8, 20, 20, 20, 3], name='input')
    >>> deconv3d = tl.layers.DeConv3d(n_filter=32, filter_size=(3, 3, 3), strides=(2, 2, 2), b_init=None, in_channels=3, name='deconv3d_1')
    >>> print(deconv3d)
    >>> tensor = tl.layers.DeConv3d(n_filter=32, filter_size=(3, 3, 3), strides=(2, 2, 2), act=tl.ops.relu, name='deconv3d_2')(net)
    >>> tensor = tl.layers.DeConv3d(n_filter=32, filter_size=(3, 3, 3), strides=(2, 2, 2), act=tl.ReLU, name='deconv3d_2')(net)
    >>> print(tensor)

    """
@@ -803,10 +795,10 @@ class DeConv3d(Module):
        super(DeConv3d, self).__init__(name, act=act)
        self.n_filter = n_filter
        self.filter_size = filter_size
        self._strides = self.strides = strides
        self.strides = strides
        self.padding = padding
        self.data_format = data_format
        self._dilation_rate = self.dilation_rate = dilation_rate
        self.dilation_rate = dilation_rate
        self.W_init = W_init
        self.b_init = b_init
        self.in_channels = in_channels
@@ -843,14 +835,10 @@ class DeConv3d(Module):
            self.data_format = 'NDHWC'
            if self.in_channels is None:
                self.in_channels = inputs_shape[-1]
            self._strides = [1, self._strides[0], self._strides[1], self._strides[2], 1]
            self._dilation_rate = [1, self.dilation_rate[0], self.dilation_rate[1], self.dilation_rate[2], 1]
        elif self.data_format == 'channels_first':
            self.data_format = 'NCDHW'
            if self.in_channels is None:
                self.in_channels = inputs_shape[1]
            self._strides = [1, 1, self._strides[0], self._strides[1], self._strides[2]]
            self._dilation_rate = [1, 1, self._dilation_rate[0], self._dilation_rate[1], self._dilation_rate[2]]
        else:
            raise Exception("data_format should be either channels_last or channels_first")

@@ -858,7 +846,7 @@ class DeConv3d(Module):
            self.filter_size[0], self.filter_size[1], self.filter_size[2], self.n_filter, self.in_channels
        )

        self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init)
        self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init, transposed=True)

        if self.b_init:
            self.b = self._get_weights("biases", shape=(self.n_filter, ), init=self.b_init)
@@ -870,8 +858,9 @@ class DeConv3d(Module):
            self.b_init_flag = True

        self.conv3d_transpose = tl.ops.Conv3d_transpose(
            strides=self._strides, padding=self.padding, data_format=self.data_format, dilations=self._dilation_rate,
            out_channel=self.n_filter, k_size=(self.filter_size[0], self.filter_size[1], self.filter_size[2])
            strides=self.strides, padding=self.padding, data_format=self.data_format, dilations=self.dilation_rate,
            out_channel=self.n_filter, k_size=(self.filter_size[0], self.filter_size[1], self.filter_size[2]),
            in_channels=self.in_channels
        )

        self.act_init_flag = False
--- a/tensorlayer/layers/convolution/super_resolution.py
+++ b/tensorlayer/layers/convolution/super_resolution.py
@@ -61,11 +61,11 @@ class SubpixelConv1d(Module):

        logging.info(
            "SubpixelConv1d  %s: scale: %d act: %s" %
            (self.name, scale, self.act.__name__ if self.act is not None else 'No Activation')
            (self.name, scale, self.act.__class__.__name__ if self.act is not None else 'No Activation')
        )

    def __repr__(self):
        actstr = self.act.__name__ if self.act is not None else 'No Activation'
        actstr = self.act.__class__.__name__ if self.act is not None else 'No Activation'
        s = ('{classname}(in_channels={in_channels}, out_channels={out_channels}')
        s += (', ' + actstr)
        if self.name is not None:
@@ -163,11 +163,11 @@ class SubpixelConv2d(Module):
            self._built = True
        logging.info(
            "SubpixelConv2d  %s: scale: %d act: %s" %
            (self.name, scale, self.act.__name__ if self.act is not None else 'No Activation')
            (self.name, scale, self.act.__class__.__name__ if self.act is not None else 'No Activation')
        )

    def __repr__(self):
        actstr = self.act.__name__ if self.act is not None else 'No Activation'
        actstr = self.act.__class__.__name__ if self.act is not None else 'No Activation'
        s = ('{classname}(in_channels={in_channels}, out_channels={n_out_channels}')
        s += (', ' + actstr)
        if self.name is not None:
--- a/tensorlayer/layers/convolution/ternary_conv.py
+++ b/tensorlayer/layers/convolution/ternary_conv.py
@@ -50,7 +50,7 @@ class TernaryConv2d(Module):

    >>> net = tl.layers.Input([8, 12, 12, 32], name='input')
    >>> ternaryconv2d = tl.layers.TernaryConv2d(
    ...     n_filter=64, filter_size=(5, 5), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='ternaryconv2d'
    ...     n_filter=64, filter_size=(5, 5), strides=(1, 1), act=tl.ReLU, padding='SAME', name='ternaryconv2d'
    ... )(net)
    >>> print(ternaryconv2d)
    >>> output shape : (8, 12, 12, 64)
@@ -140,9 +140,9 @@ class TernaryConv2d(Module):
            self.b = self._get_weights("biases", shape=(self.n_filter, ), init=self.b_init)
            self.bias_add = tl.ops.BiasAdd(data_format=self.data_format)

        self.conv2d = tl.ops.Conv2D(strides=self._strides, padding=self.padding, data_format=self.data_format,
            dilations=self._dilation_rate)

        self.conv2d = tl.ops.Conv2D(
            strides=self._strides, padding=self.padding, data_format=self.data_format, dilations=self._dilation_rate
        )

    def forward(self, inputs):
        if self._forward_state == False:
--- a/tensorlayer/layers/core/init.py
+++ b/tensorlayer/layers/core/init.py
@@ -9,7 +9,5 @@ elif BACKEND == 'tensorflow':
    from .core_tensorflow import *
 elif BACKEND == 'paddle':
    from .core_paddle import *
 elif BACKEND == 'dragon':
    from .core_dragon import *
 else:
    raise ("Unsupported backend:", BACKEND)
--- a/tensorlayer/layers/core/common.py
+++ b/tensorlayer/layers/core/common.py
@@ -37,44 +37,46 @@ def str2act(act):
        raise Exception("Unsupported act: {}".format(act))
    return _act_dict[act]

 def _save_weights(self, file_path, format=None):

 def _save_weights(net, file_path, format=None):
    """Input file_path, save model weights into a file of given format.
                Use self.load_weights() to restore.

            Parameters
            ----------
            file_path : str
                Filename to which the model weights will be saved.
            format : str or None
                Saved file format.
                Value should be None, 'hdf5', 'npz', 'npz_dict' or 'ckpt'. Other format is not supported now.
                1) If this is set to None, then the postfix of file_path will be used to decide saved format.
                If the postfix is not in ['h5', 'hdf5', 'npz', 'ckpt'], then file will be saved in hdf5 format by default.
                2) 'hdf5' will save model weights name in a list and each layer has its weights stored in a group of
                the hdf5 file.
                3) 'npz' will save model weights sequentially into a npz file.
                4) 'npz_dict' will save model weights along with its name as a dict into a npz file.
                5) 'ckpt' will save model weights into a tensorflow ckpt file.

                Default None.

            Examples
            --------
            1) Save model weights in hdf5 format by default.
            >>> net = vgg16()
            >>> net.save_weights('./model.h5')
            ...
            >>> net.load_weights('./model.h5')

            2) Save model weights in npz/npz_dict format
            >>> net = vgg16()
            >>> net.save_weights('./model.npz')
            >>> net.save_weights('./model.npz', format='npz_dict')

            """

    # self.all_weights = self.network.all_weights
    if self.all_weights is None or len(self.all_weights) == 0:
                Use net.load_weights() to restore.

    Parameters
    ----------
    file_path : str
        Filename to which the model weights will be saved.
    format : str or None
        Saved file format.
        Value should be None, 'hdf5', 'npz', 'npz_dict' or 'ckpt'. Other format is not supported now.
        1) If this is set to None, then the postfix of file_path will be used to decide saved format.
        If the postfix is not in ['h5', 'hdf5', 'npz', 'ckpt'], then file will be saved in hdf5 format by default.
        2) 'hdf5' will save model weights name in a list and each layer has its weights stored in a group of
        the hdf5 file.
        3) 'npz' will save model weights sequentially into a npz file.
        4) 'npz_dict' will save model weights along with its name as a dict into a npz file.
        5) 'ckpt' will save model weights into a tensorflow ckpt file.

        Default None.

    Examples
    --------
    1) Save model weights in hdf5 format by default.
    >>> net = vgg16()
    >>> optimizer = tl.optimizers.Adam(learning_rate=0.001)
    >>> metric = tl.metric.Accuracy()
    >>> model = tl.models.Model(network=net, loss_fn=tl.cost.cross_entropy, optimizer=optimizer, metrics=metric)
    >>> model.save_weights('./model.h5')
    ...
    >>> model.load_weights('./model.h5')

    2) Save model weights in npz/npz_dict format
    >>> model.save_weights('./model.npz')
    >>> model.save_weights('./model.npz', format='npz_dict')

    """

    if net.all_weights is None or len(net.all_weights) == 0:
        logging.warning("Model contains no weights or layers haven't been built, nothing will be saved")
        return

@@ -86,11 +88,12 @@ def _save_weights(self, file_path, format=None):
            format = 'hdf5'

    if format == 'hdf5' or format == 'h5':
        utils.save_weights_to_hdf5(file_path, self)
        raise NotImplementedError("hdf5 load/save is not supported now.")
        # utils.save_weights_to_hdf5(file_path, net)
    elif format == 'npz':
        utils.save_npz(self.all_weights, file_path)
        utils.save_npz(net.all_weights, file_path)
    elif format == 'npz_dict':
        utils.save_npz_dict(self.all_weights, file_path)
        utils.save_npz_dict(net.all_weights, file_path)
    elif format == 'ckpt':
        # TODO: enable this when tf save ckpt is enabled
        raise NotImplementedError("ckpt load/save is not supported now.")
@@ -100,8 +103,9 @@ def _save_weights(self, file_path, format=None):
            "Other format is not supported now."
        )

 def _load_weights(self, file_path, format=None, in_order=True, skip=False):
    """Load model weights from a given file, which should be previously saved by self.save_weights().

 def _load_weights(net, file_path, format=None, in_order=True, skip=False):
    """Load model weights from a given file, which should be previously saved by net.save_weights().

    Parameters
    ----------
@@ -110,7 +114,7 @@ def _load_weights(self, file_path, format=None, in_order=True, skip=False):
    format : str or None
        If not specified (None), the postfix of the file_path will be used to decide its format. If specified,
        value should be 'hdf5', 'npz', 'npz_dict' or 'ckpt'. Other format is not supported now.
        In addition, it should be the same format when you saved the file using self.save_weights().
        In addition, it should be the same format when you saved the file using net.save_weights().
        Default is None.
    in_order : bool
        Allow loading weights into model in a sequential way or by name. Only useful when 'format' is 'hdf5'.
@@ -122,7 +126,7 @@ def _load_weights(self, file_path, format=None, in_order=True, skip=False):
    skip : bool
        Allow skipping weights whose name is mismatched between the file and model. Only useful when 'format' is
        'hdf5' or 'npz_dict'. If 'skip' is True, 'in_order' argument will be ignored and those loaded weights
        whose name is not found in model weights (self.all_weights) will be skipped. If 'skip' is False, error will
        whose name is not found in model weights (net.all_weights) will be skipped. If 'skip' is False, error will
        occur when mismatch is found.
        Default is False.

@@ -130,14 +134,17 @@ def _load_weights(self, file_path, format=None, in_order=True, skip=False):
    --------
    1) load model from a hdf5 file.
    >>> net = vgg16()
    >>> net.load_weights('./model_graph.h5', in_order=False, skip=True) # load weights by name, skipping mismatch
    >>> net.load_weights('./model_eager.h5') # load sequentially
    >>> optimizer = tl.optimizers.Adam(learning_rate=0.001)
    >>> metric = tl.metric.Accuracy()
    >>> model = tl.models.Model(network=net, loss_fn=tl.cost.cross_entropy, optimizer=optimizer, metrics=metric)
    >>> model.load_weights('./model_graph.h5', in_order=False, skip=True) # load weights by name, skipping mismatch
    >>> model.load_weights('./model_eager.h5') # load sequentially

    2) load model from a npz file
    >>> net.load_weights('./model.npz')
    >>> model.load_weights('./model.npz')

    2) load model from a npz file, which is saved as npz_dict previously
    >>> net.load_weights('./model.npz', format='npz_dict')
    3) load model from a npz file, which is saved as npz_dict previously
    >>> model.load_weights('./model.npz', format='npz_dict')

    Notes
    -------
@@ -154,16 +161,17 @@ def _load_weights(self, file_path, format=None, in_order=True, skip=False):
        format = file_path.split('.')[-1]

    if format == 'hdf5' or format == 'h5':
        if skip ==True or in_order == False:
            # load by weights name
            utils.load_hdf5_to_weights(file_path, self, skip)
        else:
            # load in order
            utils.load_hdf5_to_weights_in_order(file_path, self)
        raise NotImplementedError("hdf5 load/save is not supported now.")
        # if skip ==True or in_order == False:
        #     # load by weights name
        #     utils.load_hdf5_to_weights(file_path, net, skip)
        # else:
        #     # load in order
        #     utils.load_hdf5_to_weights_in_order(file_path, net)
    elif format == 'npz':
        utils.load_and_assign_npz(file_path, self)
        utils.load_and_assign_npz(file_path, net)
    elif format == 'npz_dict':
        utils.load_and_assign_npz_dict(file_path, self, skip)
        utils.load_and_assign_npz_dict(file_path, net, skip)
    elif format == 'ckpt':
        # TODO: enable this when tf save ckpt is enabled
        raise NotImplementedError("ckpt load/save is not supported now.")
--- a/tensorlayer/layers/core/core_dragon.py
+++ b/tensorlayer/layers/core/core_dragon.py
@@ -1,765 +0,0 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 #TODO Dragon Module needs a better implementation

 import time
 import dragon as dg
 import tensorlayer as tl
 from tensorlayer.layers.utils import (get_variable_with_initializer)
 from .common import str2act, _save_weights, _load_weights
 from collections import OrderedDict
 from tensorlayer import logging

 __all__ = ['Module', 'SequentialLayer', 'LayerList']

 _global_layer_name_dict = {}
 Parameter_ = dg.Tensor

 class Module(object):

    def __init__(self, name=None, act=None, *args, **kwargs):
        self._params = OrderedDict()
        self._layers = OrderedDict()
        self._params_status = OrderedDict()
        self._parameter_layout_dict = {}
        self._create_time = int(time.time() * 1e9)

        global _global_layer_name_dict
        if name is None:
            prefix = self.__class__.__name__.lower()

            if _global_layer_name_dict.get(prefix) is not None:
                _global_layer_name_dict[prefix] += 1
                name = prefix + '_' + str(_global_layer_name_dict[prefix])
            else:
                _global_layer_name_dict[prefix] = 0
                name = prefix
            while True:
                if _global_layer_name_dict.get(name) is None:
                    break
                _global_layer_name_dict[prefix] += 1
                name = prefix + '_' + str(_global_layer_name_dict[prefix])
        else:
            if _global_layer_name_dict.get(name) is not None:
                pass
            else:
                _global_layer_name_dict[name] = 0

        self.name = name

        if isinstance(act, str):
            str_act = str2act(act)

        if act:
            if isinstance(act, str) and (len(act) > 5 and act[0:5] == "lrelu" or len(act) > 10 and act[0:10] == "leaky_relu"):
                self.act = str_act
            elif isinstance(act, str):
                self.act = str_act()
            else:
                self.act = act()
        else:
            self.act = act

        # Layer building state
        self._built = False

        # Layer nodes state
        self._nodes = []
        self._nodes_fixed = False

        # Layer weight state
        self._all_weights = []
        self._trainable_weights = []
        self._nontrainable_weights = []

        # layer forward  state
        self._forward_state = False

        # Layer training state
        self.is_train = True

    def extend_repr(self):
        """
        Sets the extended representation of the Module.

        To print customized extended information, re-implement this method in your own Layers.
        """
        return ''

    def __repr__(self):
        extra_str = self.extend_repr()
        info_str = self.__class__.__name__ + '<'
        if self._layers:
            sub_str = '\n'
            if extra_str:
                sub_str += '{}\n'.format(self.extend_repr())
            for key, value in self._layers.items():
                sub_str += '({}): {}\n'.format(key, repr(value))
            sub_str = sub_str.replace('\n', '\n  ') + '>'
            info_str += sub_str
        else:
            info_str += extra_str + '>'
        return info_str

    def __setattr__(self, name, value):
        layers = self.__dict__.get('_layers')
        params = self.__dict__.get('_params')

        if isinstance(value, Parameter_):
            if params is None:
                raise AttributeError("Can not assign params before Module.__init__() call.")
            if name in self.__dict__:
                if self.__dict__[name] is not None:
                    raise TypeError("Expected type is not in (Parameter, Module), but got Parameter.")
                del self.__dict__[name]
            if layers and name in layers:
                raise TypeError("Expected type is Module, but got Parameter.")
            self.insert_param_to_layer(name, value)

        elif isinstance(value, Module):
            if layers is None:
                raise AttributeError("Can not assign layers before Module.__init__() call.")
            if name in self.__dict__:
                del self.__dict__[name]
            if params and name in params:
                raise TypeError("Expected type is Parameter, but got Module.")
            # TODO How to prompt the user, enter the in_channels.
            # TODO Automatic shape inference when the user does not enter inchannels.
            # if value._built is False:
            #     raise AttributeError(
            #         "The registered layer `{}` should be built in advance. "
            #         "Do you forget to pass the keyword argument 'in_channels'? ".format(value.name)
            #     )
            layers[name] = value
        else:
            object.__setattr__(self, name, value)

    def __call__(self, inputs, *args, **kwargs):

        output = self.forward(inputs, *args, **kwargs)

        return output

    def forward(self, *inputs, **kwargs):
        raise Exception("The forward method must be implemented by inherited class")

    def build(self, inputs_shape):
        raise Exception("The build(self, inputs_shape) method must be implemented by inherited class")

    def _get_weights(self, var_name, shape, init=tl.initializers.random_normal(), trainable=True):
        """ Get trainable variables. """
        weight = get_variable_with_initializer(
            scope_name=self.name, var_name=var_name, shape=shape, init=init, trainable=trainable
        )
        self.trainable = trainable
        return weight

    def save_weights(self, file_path, format=None):
        """Input file_path, save model weights into a file of given format."""
        _save_weights(self, file_path, format)

    def load_weights(self, file_path, format=None, in_order=True, skip=False):
        """Load model weights from a given file, which should be previously saved by self.save_weights()."""
        _load_weights(self, file_path, format, in_order, skip)

    def _set_mode_for_layers(self, is_train):
        """Set all layers of this network to a given mode.

        Parameters
        ----------
        is_train : boolean
            Network's mode. True means training mode while False means evaluation mode.

        """
        layers = self.layers_and_names(name_prefix='')
        for layer_name, layer in layers:
            if isinstance(layer, Module):
                layer.is_train = is_train


    def set_train(self):
        """Set this network in training mode. After calling this method,
        all layers in network are in training mode, in particular, BatchNorm, Dropout, etc.
        TODO It is not possible to modify the parameter state after initialization, and a better way needs to be found.
        Examples
        --------
        >>> import tensorlayer as tl
        >>> net = tl.vgg16()
        >>> net.set_train()

        """
        if self.is_train !=True:
            self.is_train = True
            self._set_mode_for_layers(True)

    def set_eval(self):
        """Set this network in evaluation mode. After calling this method,
        all layers in network are in evaluation mode, in particular, BatchNorm, Dropout, etc.
        TODO It is not possible to modify the parameter state after initialization, and a better way needs to be found.
        Examples
        --------
        >>> import tensorlayer as tl
        >>> net = tl.vgg16()
        >>> net.eval()
        # do evaluation

        """
        if self.is_train != False:
            self.is_train = False
            self._set_mode_for_layers(False)

    def test(self):
        """Set this network in evaluation mode."""
        self.eval()

    def infer(self):
        """Set this network in evaluation mode."""
        self.eval()

    @staticmethod
    def _compute_shape(tensors):
        if isinstance(tensors, list):
            shape_mem = [tl.get_tensor_shape(t) for t in tensors]
        else:
            shape_mem = tl.get_tensor_shape(tensors)
        return shape_mem

    def insert_param_to_layer(self, param_name, param, check_name=True):
        """
        Adds a parameter to the current layer.

        Inserts a parameter with given name to the layer. Please refer to the usage in
        source code of `tensorlayer.layer.Module.__setattr__`.

        Args:
            param_name (str): Name of the parameter.
            param (Parameter): Parameter to be inserted to the layer.
            check_name (bool): Determines whether the name input is compatible. Default: True.

        Raises:
            KeyError: If the name of parameter is null or contains dot.
            AttributeError: If user did not call init() first.
            TypeError: If the type of parameter is not Parameter_.
        """
        if not param_name:
            raise KeyError("The name of parameter should not be null.")
        if check_name and '.' in param_name:
            raise KeyError("The name of parameter should not contain \".\"")
        if '_params' not in self.__dict__:
            raise AttributeError("You need call init() first.")
        if hasattr(self, param_name) and param_name not in self._params:
            raise KeyError("Duplicated parameter name '{}'.".format(param_name))
        if not isinstance(param, Parameter_) and param is not None:
            raise TypeError("The type of parameter should be 'Parameter' if not None.")
        self._params[param_name] = param
        try:
            self._params_status[param_name] = self.trainable
        except:
            pass

    def _add_node(self, input_tensors, output_tensors):
        """Add a LayerNode for this layer given input_tensors, output_tensors.

        WARINING: This function should not be called from outside, it should only be called
        in layer.__call__ when building static model.

        Parameters
        ----------
        input_tensors : Tensor or a list of tensors
            Input tensors to this layer.
        output_tensors : Tensor or a list of tensors
            Output tensors to this layer.

        """
        raise NotImplementedError

    @property
    def create_time(self):
        return self._create_time

    def __getattr__(self, name):
        if '_params' in self.__dict__:
            params = self.__dict__['_params']
            if name in params:
                return params[name]
        if '_layers' in self.__dict__:
            layers = self.__dict__['_layers']
            if name in layers:
                return layers[name]
        if '_params_status' in self.__dict__:
            params_status = self.__dict__['_params_status']
            if name in params_status:
                return params_status[name]
        raise AttributeError("'{}' object has no attribute '{}'.".format(type(self).__name__, name))

    def __delattr__(self, name):
        if name in self._params:
            del self._params[name]
        elif name in self._layers:
            del self._layers[name]
        else:
            object.__delattr__(self, name)

    @property
    def trainable_weights(self):
        """
        Returns all trainable weights.

        Returns a list of all trainable parmeters.

        Args:
            recurse (bool): Whether contains the trainable weights of sublayers. Default: True.

        Returns:
            List, the list of trainable weights.
        """
        self.get_weights()
        layers = self.layers_and_names(name_prefix='')
        for layer_name, layer in layers:
            params = layer._params.items()
            params_status = layer._params_status.items()
            params_zip = zip(params, params_status)
            for params, params_status in params_zip:
                if params_status[1] ==True:
                    self._trainable_weights.append(params[1])
        return self._trainable_weights

    @property
    def nontrainable_weights(self):
        """
        Returns all untrainable weights.

        Returns a list of all untrainable weights.

        Args:
            recurse (bool): Whether contains the untrainable weights of sublayers. Default: True.

        Returns:
            List, the list of untrainable weights.
        """
        layers = self.layers_and_names(name_prefix='')
        for layer_name, layer in layers:
            params = layer._params.items()
            params_status = layer._params_status.items()
            params_zip = zip(params, params_status)
            for params, params_status in params_zip:
                if params_status[1] == False:
                    self._nontrainable_weights.append(params[1])
        return self._nontrainable_weights

    @property
    def all_weights(self):
        layers = self.layers_and_names(name_prefix='')
        for layer_name, layer in layers:
            params = layer._params.items()
            for par, val in params:
                self._all_weights.append(val)
        return self._all_weights

    def get_weights(self, expand=True):
        """
        Returns an iterator over layer weights.

        Yields weights of this layer. If `expand` is True, yield parameters of this layer and all sublayers.

        Args:
            expand (bool): If True, yields parameters of this layer and all sublayers. Otherwise, yields only parameters
                           that are direct members of this layer. Default: True.

        Examples:
            >>> net = Net()
            >>> for item in net.get_weights():
            >>>     print(item)
        """
        for _, param in self.parameters_and_names(expand=expand):
            yield param

    def check_names(self):
        names = set("")
        for value, param in self.parameters_and_names():
            if param.name in names:
                raise ValueError(
                    "The value of {} is {}, its name '{}' already exists.".format(value, param, param.name)
                )
            names.add(param.name)

    def insert_child_to_layer(self, child_name, child):
        """
        Adds a child layer to the current layer.

        Args:
            child_name (str): Name of the child layer.
            child (Module): The child layer to be inserted.

        Raises:
            KeyError: Child Module's name is incorrect or duplicated with the other child name.
            TypeError: Child Module's type is incorrect.
        """
        if not child_name or '.' in child_name:
            raise KeyError("Child layer name is incorrect.")
        if hasattr(self, child_name) and child_name not in self._layers:
            raise KeyError("Duplicate child name '{}'.".format(child_name))
        if not isinstance(child, Module) and child is not None:
            raise TypeError("Child layer type is incorrect.")
        self._layers[child_name] = child

    def parameters_and_names(self, name_prefix='', expand=True):
        """
        Returns an iterator over layer parameters.

        Includes the parameter's name  and itself.

        Args:
            name_prefix (str): Namespace. Default: ''.
            expand (bool): If True, yields parameters of this layer and all sublayers. Otherwise, yields only parameters
                           that are direct members of this layer. Default: True.

        Examples:
            >>> n = Net()
            >>> names = []
            >>> for m in n.parameters_and_names():
            >>>     if m[0]:
            >>>         names.append(m[0])
        """
        layers = []
        if expand:
            layers = self.layers_and_names(name_prefix=name_prefix)
        else:
            layers.append((name_prefix, self))

        params_set = set()
        for layer_name, layer in layers:
            params = layer._params.items()
            for par_name, par in params:
                if par.inited_param is not None:
                    par = par.inited_param
                if par is not None and id(par) not in params_set:
                    params_set.add(id(par))
                    par_new_name = par_name
                    if layer_name:
                        par_new_name = layer_name + '.' + par_new_name

                    yield par_new_name, par

    def layers_and_names(self, layers=None, name_prefix=''):
        """
        Returns an iterator over all layers in the network.

        Includes the layer's name and itself.

        Args:
            layers (str): layers to iterate over. Default: None.
            name_prefix (str): Namespace. Default: ''.

        Examples:
            >>> n = Net()
            >>> names = []
            >>> for m in n.layers_and_names():
            >>>     if m[0]:
            >>>         names.append(m[0])
        """
        t_layers = layers if layers else set()
        if self in t_layers:
            return

        t_layers.add(self)
        yield name_prefix, self

        for name, layer in self._layers.items():
            if layer:
                layers_name_prefix = name
                if name_prefix:
                    layers_name_prefix = name_prefix + '.' + layers_name_prefix
                for ele in layer.layers_and_names(t_layers, layers_name_prefix):
                    yield ele

    def layers(self):
        """Returns an iterator over immediate layers."""
        return self.name_layers().values()

    def name_layers(self):
        """
        Returns an iterator over all layers in the network.

        Include name of the layer and layer itself.
        """
        value_set = set()
        layers = OrderedDict()
        for name, layer in self._layers.items():
            if layer is not None and layer not in value_set:
                value_set.add(layer)
                layers[name] = layer
        return layers

    def init_build(self, *inputs, **kwargs):
        """
        (1) This method must be called when the Layer has no input in_channels.
        (2) Automatic shape inference when the user does not enter inchannels.
        """

        self.forward(*inputs, **kwargs)


 class SequentialLayer(Module):
    """
    Sequential layer container.

    A list of Layers will be added to it in the order they are passed in the constructor.
    Alternatively, an ordered dict of layers can also be passed in.

    Args:
        args (list, OrderedDict): List of subclass of Module.

    Raises:
        TypeError: If the type of the argument is not list or OrderedDict.

    Inputs:
        - **input** (Tensor) - Tensor with shape according to the first Module in the sequence.

    Outputs:
        Tensor, the output Tensor with shape depending on the input and defined sequence of Layers.

    Examples:
        >>> conv = tl.layers.Conv2d(3, 2, 3, pad_mode='valid')
        >>> bn = tl.layers.BatchNorm2d(2)
        >>> seq = tl.layers.SequentialLayer([conv, bn])
        >>>
        >>> x = tl.layers.Input((1, 3, 4, 4))
        >>> seq(x)
    """
    def __init__(self, *args):
        super(SequentialLayer, self).__init__()
        self._built = True
        if len(args) == 1:
            layers = args[0]
            if isinstance(layers, list):
                for index, layer in enumerate(layers):
                    self.insert_child_to_layer(str(index), layer)
            elif isinstance(layers, OrderedDict):
                for name, layer in layers.items():
                    self.insert_child_to_layer(name, layer)
            else:
                raise TypeError('Layers must be list or orderedDict')
        else:
            for index, layer in enumerate(args):
                self.insert_child_to_layer(str(index), layer)
        self.layer_list = list(self._layers.values())

    def __getitem__(self, index):
        if isinstance(index, slice):
            return self.__class__(
                OrderedDict(list(self._layers.items())[index]))
        index = self._valid_index(len(self), index)
        return list(self._layers.values())[index]

    def __setitem__(self, index, layer):
        if self._valid_module(layer):
            index = self._valid_index(len(self), index)
            key = list(self._layers.keys())[index]
            self._layers[key] = layer
            self.layer_list = list(self._layers.values())

    def __delitem__(self, index):
        if isinstance(index, int):
            index = self._valid_index(len(self), index)
            key = list(self._layers.keys())[index]
            del self._layers[key]
        elif isinstance(index, slice):
            keys = list(self._layers.keys())[index]
            for key in keys:
                del self._layers[key]
        else:
            raise TypeError('Index {} is not int type or slice type'.format(index))
        self.layer_list = list(self._layers.values())

    def __len__(self):
        return len(self._layers)


    def append(self, layer):
        if self._valid_module(layer):
            self._layers[str(len(self))] = layer
        self.layer_list = list(self._layers.values())
        return self

    def build(self, inputs_shape):
        pass

    def forward(self, input_data):
        for layer in self.layer_list:
            input_data = layer(input_data)
        return input_data

    def _valid_index(self, layer_num, index):
        if not isinstance(index, int):
            raise TypeError("Index {} is not int type")
        if not -layer_num <= index < layer_num:
            raise IndexError("Index should be a number in range [{}, {}), but got {}"
                             .format(-layer_num, layer_num, index))
        return index % layer_num

    def _valid_module(self, layer):
        if issubclass(layer.__class__, Module):
            return True
        raise TypeError('Module {} is not subclass of Module'.format(layer))


 class LayerList(Module):
    """
    The class :class:`LayerList` is a linear stack of layers.

    The :class:`LayerList` can be created by passing a list of layer instances.
    The given layer instances will be automatically connected one by one.

    Parameters
    ----------
    layers: list of Layer
        A list of layers.
    name : str or None
        A unique layer name. If None, a unique name will be automatically assigned.

    Methods
    ---------
    __init__()
        Initializing the LayerList.
    weights()
        A collection of weights of all the layer instances.
    build()
        Build the LayerList. The layer instances will be connected automatically one by one.
    forward()
        Forward the computation. The computation will go through all layer instances.
    """

    def __init__(self, layers, name=None):
        """
        Initializing the LayerList given a list of Layer.

        :param layers: list of Layer
        :param name: str or None
        """

        super(LayerList, self).__init__(name=name)
        self.layers = layers
        is_built = True
        for layer in self.layers:
            self._trainable_weights.extend(layer.trainable_weights)
            self._nontrainable_weights.extend(layer.nontrainable_weights)
            if layer._built is False:
                is_built = False
        #     if layer._built and layer.all_weights is not None:
        #         # some layers in the list passed in have already been built
        #         # e.g. using input shape to construct layers in dynamic eager
        #         if self._all_weights is None:
        #             self._all_weights = list()
        #         self._all_weights.extend(layer.all_weights)
        if is_built:
            self._built = True

        logging.info(
            "LayerList %s including layers [%s]" % (self.name, ', '.join([layer.name for layer in self.layers]))
        )

        # check layer name uniqueness in LayerList
        local_layer_name_set = set()
        for layer in self.layers:
            if layer.name not in local_layer_name_set:
                local_layer_name_set.add(layer.name)
            else:
                raise ValueError(
                    'Layer name \'%s\' has already been used by another layer. Please change the layer name.' %
                    layer.name
                )

    def __getitem__(self, idx):
        if isinstance(idx, slice):
            return LayerList(list(self.layers)[idx])
        else:
            return self.layers[idx]

    def __len__(self):
        return len(self.layers)

    def __repr__(self):
        tmpstr = 'LayerList' + '(\n'
        for idx, layer in enumerate(self.layers):
            modstr = layer.__repr__()
            modstr = _addindent(modstr, 2)
            tmpstr = tmpstr + '  (' + str(idx) + '): ' + modstr + '\n'

        tmpstr = tmpstr + ')'
        return tmpstr

    @property
    def trainable_weights(self):
        return self._trainable_weights

    @property
    def nontrainable_weights(self):
        return self._nontrainable_weights

    @property
    def all_weights(self):
        return self._trainable_weights + self._nontrainable_weights

    # def build(self, inputs_shape):
    #     """
    #     Build the LayerList. The layer instances will be connected automatically one by one.
    #     """
    #     in_tensor = self._input_tensors
    #     # in_layer = self._input_layer
    #     for layer in self.layers:
    #         is_build = layer._built
    #         out_tensor = layer(in_tensor)
    #         # nlayer = layer(in_layer)
    #         if is_build is False and layer.all_weights is not None:
    #             if self._all_weights is None:
    #                 self._all_weights = list()
    #             self._all_weights.extend(layer.all_weights)
    #         layer._built = True
    #         in_tensor = out_tensor
    #         # in_layer = nlayer

    def forward(self, inputs):
        """
        Forward the computation. The computation will go through all layer instances.
        """
        z = inputs
        for layer in self.layers:
            z = layer.forward(z)
        return z

    def _set_mode_for_layers(self, is_train):
        """Set training/evaluation mode for all layer instances."""
        self.is_train = is_train
        for layer in self.layers:
            if isinstance(layer, LayerList):
                layer._set_mode_for_layers(is_train)
            else:
                layer.is_train = is_train

    def get_args(self):
        init_args = {}
        layers = self.layer_args["layers"]
        init_args["layers"] = [layer.config for layer in layers]
        init_args.update({"layer_type": "layerlist"})
        return init_args

 def tolist(tensors):
    if isinstance(tensors, list) or isinstance(tensors, tuple):
        ntensors = list()
        for t in tensors:
            ntensors += tolist(t)
        return ntensors
    else:
        return [tensors]

 def _addindent(s_, numSpaces):
    s = s_.split('\n')
    # don't do anything for single-line stuff
    if len(s) == 1:
        return s_
    first = s.pop(0)
    s = [(numSpaces * ' ') + line for line in s]
    s = '\n'.join(s)
    s = first + '\n' + s
    return s
--- a/tensorlayer/layers/core/core_mindspore.py
+++ b/tensorlayer/layers/core/core_mindspore.py
@@ -4,10 +4,17 @@
 from .common import str2act, _save_weights, _load_weights
 from mindspore.nn import Cell
 import tensorlayer as tl
 from tensorlayer.layers.utils import (get_variable_with_initializer)
 from collections import OrderedDict

 __all__ = ['Module', 'SequentialLayer', 'LayerList']
 from mindspore import log as logger
 import inspect
 from mindspore import context
 import numpy
 import mindspore as ms
 from mindspore.common.api import _pynative_exec
 from mindspore.common.parameter import Parameter

 __all__ = ['Module', 'SequentialLayer']

 _global_layer_name_dict = {}  # TODO: better implementation?

@@ -44,7 +51,8 @@ class Module(Cell):
            str_act = str2act(act)

        if act:
            if isinstance(act, str) and (len(act) > 5 and act[0:5] == "lrelu" or len(act) > 10 and act[0:10] == "leaky_relu"):
            if isinstance(act, str) and (len(act) > 5 and act[0:5] == "lrelu" or
                                         len(act) > 10 and act[0:10] == "leaky_relu"):
                self.act = str_act
            elif isinstance(act, str):
                self.act = str_act()
@@ -68,10 +76,12 @@ class Module(Cell):
        # Layer training state
        self.is_train = True


        # layer forward  state
        self._forward_state = False

        # data_format
        self.data_format = "NCHW"

    def forward(self, *inputs, **kwargs):
        raise Exception("The forward method must be implemented by inherited class")

@@ -81,13 +91,25 @@ class Module(Cell):
    def build(self, inputs_shape):
        raise Exception("The build(self, inputs_shape) method must be implemented by inherited class")

    def _get_weights(self, var_name, shape, init=tl.initializers.random_normal(), trainable=True):
    def _get_weights(self, var_name, shape, init=tl.initializers.random_normal(), trainable=True, transposed=False):
        """ Get trainable variables. """
        weight = get_variable_with_initializer(
            scope_name=self.name, var_name=var_name, shape=shape, init=init, trainable=trainable
        )
        var_name = self.name + "/" + var_name
        # TODO 2D mindspore weights shape : [out_channel, in_channel, kernel_h, kernel_w]
        # TODO 2D mindspore transposed shape [in_channel, out_channel, kernel_h, kernel_w]
        if len(shape) == 3:
            shape = shape[::-1]
        if len(shape) == 4:
            if not transposed and self.data_format == 'NHWC':
                shape = (shape[3], shape[0], shape[1], shape[2])
            else:
                shape = (shape[3], shape[2], shape[0], shape[1])
        if len(shape) == 5:
            shape = (shape[4], shape[3], shape[0], shape[1], shape[2])

        initial_value = init(shape=shape)
        var = tl.Variable(initial_value=initial_value, name=var_name, trainable=trainable)
        self.trainable = trainable
        return weight
        return var

    def save_weights(self, file_path, format=None):
        """Input file_path, save model weights into a file of given format."""
@@ -105,6 +127,59 @@ class Module(Cell):
            shape_mem = tl.get_tensor_shape(tensors)
        return shape_mem

    def __call__(self, *inputs, **kwargs):
        if self.__class__.construct is Cell.construct:
            logger.warning(
                f"The '{self.__class__}' does not override the method 'construct', "
                f"will call the super class(Cell) 'construct'."
            )
        if kwargs:
            bound_args = inspect.signature(self.construct).bind(*inputs, **kwargs)
            inputs = bound_args.args
            kwargs = bound_args.kwargs

        if context.get_context("mode") == context.GRAPH_MODE:
            raise NotImplemented("GRAPH MODE is not supported, please select PYNATIVE MODE.")

        # if context.get_context("mode") == context.GRAPH_MODE:
        #     if kwargs:
        #         raise ValueError("For 'graph' mode, the outermost network does not support passing "
        #                          "variable key-value pair parameters.")
        #     if self.enable_hook:
        #         raise ValueError("The graph mode does not support hook function.")
        #     out = self.compile_and_run(*inputs)
        #     return out

        self.do_parameter_broadcast()
        for item in inputs:
            if isinstance(item, numpy.ndarray):
                raise TypeError("cell inputs should not be numpy array.")
        origin_grad = []
        if self.requires_grad is True:
            _pynative_exec.set_grad_flag(True)
            _pynative_exec.new_graph(self, *inputs, **kwargs)
            for cell in self.cells():
                origin_grad.append(cell.requires_grad)
                cell.set_grad(True)
        else:
            _pynative_exec.set_grad_flag(False)
        cast_inputs = list()
        if hasattr(self, "_mindspore_flags"):
            if self._mindspore_flags.get('fp16'):
                cast_inputs = self._cast_mixed_precision_inputs(inputs, ms.float16)
            if self._mindspore_flags.get('fp32'):
                cast_inputs = self._cast_mixed_precision_inputs(inputs, ms.float32)
        if not cast_inputs:
            cast_inputs = inputs
        output = self.run_construct(cast_inputs, kwargs)
        if isinstance(output, Parameter):
            output = output.data
        if self.requires_grad is True:
            _pynative_exec.end_graph(self, output, *inputs, **kwargs)
            for i, cell in enumerate(self.cells()):
                cell.set_grad(origin_grad[i])
        return output

    def _add_node(self, input_tensors, output_tensors):
        """Add a LayerNode for this layer given input_tensors, output_tensors.

@@ -197,32 +272,36 @@ class Module(Cell):

 class SequentialLayer(Module):
    """
    Sequential layer container.

    A list of Layers will be added to it in the order they are passed in the constructor.
    Alternatively, an ordered dict of layers can also be passed in.

    Args:
        args (list, OrderedDict): List of subclass of Module.

    Raises:
        TypeError: If the type of the argument is not list or OrderedDict.

    Inputs:
        - **input** (Tensor) - Tensor with shape according to the first Module in the sequence.
    The class :class:`SequentialLayer` is a linear stack of layers.
    The :class:`SequentialLayer` can be created by passing a list of layer instances.
    The given layer instances will be automatically connected one by one.
    Parameters
    ----------
    layers: list of Layer
        A list of layers.
    name : str or None
        A unique layer name. If None, a unique name will be automatically assigned.
    Methods
    ---------
    __init__()
        Initializing the LayerList.
    weights()
        A collection of weights of all the layer instances.
    build()
        Build the LayerList. The layer instances will be connected automatically one by one.
    forward()
        Forward the computation. The computation will go through all layer instances.

    Outputs:
        Tensor, the output Tensor with shape depending on the input and defined sequence of Layers.
    Examples
    ---------
    >>> conv = tl.layers.Conv2d(3, 2, 3, pad_mode='valid')
    >>> bn = tl.layers.BatchNorm2d(2)
    >>> seq = tl.layers.SequentialLayer([conv, bn])
    >>> x = tl.layers.Input((1, 3, 4, 4))
    >>> seq(x)

    Examples:
        >>> conv = tl.layers.Conv2d(3, 2, 3, pad_mode='valid')
        >>> bn = tl.layers.BatchNorm2d(2)
        >>> relu = tl.ReLU()
        >>> seq = tl.layers.SequentialLayer([conv, bn, relu])
        >>>
        >>> x = tl.layers.Input((1, 3, 4, 4))
        >>> seq(x)
    """

    def __init__(self, *args):
        super(SequentialLayer, self).__init__()
        # self._built = True
@@ -243,8 +322,7 @@ class SequentialLayer(Module):

    def __getitem__(self, index):
        if isinstance(index, slice):
            return self.__class__(
                OrderedDict(list(self._layers.items())[index]))
            return self.__class__(OrderedDict(list(self._layers.items())[index]))
        index = self._valid_index(len(self), index)
        return list(self._layers.values())[index]

@@ -294,62 +372,12 @@ class SequentialLayer(Module):
        if not isinstance(index, int):
            raise TypeError("Index {} is not int type")
        if not -layer_num <= index < layer_num:
            raise IndexError("Index should be a number in range [{}, {}), but got {}"
                             .format(-layer_num, layer_num, index))
            raise IndexError(
                "Index should be a number in range [{}, {}), but got {}".format(-layer_num, layer_num, index)
            )
        return index % layer_num

    def _valid_module(self, layer):
        if issubclass(layer.__class__, Module):
            return True
        raise TypeError('Module {} is not subclass of Module'.format(layer))


 class LayerList(Module):
    """
    The class :class:`LayerList` is a linear stack of layers.

    The :class:`LayerList` can be created by passing a list of layer instances.
    The given layer instances will be automatically connected one by one.

    Parameters
    ----------
    layers: list of Layer
        A list of layers.
    name : str or None
        A unique layer name. If None, a unique name will be automatically assigned.

    Methods
    ---------
    __init__()
        Initializing the LayerList.
    weights()
        A collection of weights of all the layer instances.
    build()
        Build the LayerList. The layer instances will be connected automatically one by one.
    forward()
        Forward the computation. The computation will go through all layer instances.
    """

    def __init__(self, layers, name=None):
        """
        Initializing the LayerList given a list of Layer.

        :param layers: list of Layer
        :param name: str or None
        """

        super(LayerList, self).__init__(name=name)
        pass

    def __getitem__(self, idx):
        pass

    def __len__(self):
        return len(self.layers)

    def __repr__(self):
        pass

    def forward(self, inputs):
        pass

--- a/tensorlayer/layers/core/core_paddle.py
+++ b/tensorlayer/layers/core/core_paddle.py
@@ -3,11 +3,13 @@

 import copy, six
 from .common import str2act
 from .common import _save_weights, _load_weights
 from paddle.fluid import framework
 from paddle.fluid.dygraph import Layer
 from paddle.fluid.framework import in_dygraph_mode
 from paddle.fluid.dygraph.base import program_desc_tracing_guard, param_guard
 from paddle.fluid.dygraph import parallel_helper
 import paddle as pd

 _global_layer_name_dict = {}

@@ -44,7 +46,8 @@ class Module(Layer):
            str_act = str2act(act)

        if act:
            if isinstance(act, str) and (len(act) > 5 and act[0:5] == "lrelu" or len(act) > 10 and act[0:10] == "leaky_relu"):
            if isinstance(act, str) and (len(act) > 5 and act[0:5] == "lrelu" or
                                         len(act) > 10 and act[0:10] == "leaky_relu"):
                self.act = str_act
            elif isinstance(act, str):
                self.act = str_act()
@@ -176,8 +179,7 @@ class Module(Layer):
                with program_desc_tracing_guard(False):
                    self._build_once(*inputs, **kwargs)
                    if parallel_helper._is_data_parallel_mode():
                        parallel_helper._broadcast_parameters(
                            self._parameters.values())
                        parallel_helper._broadcast_parameters(self._parameters.values())
                self._paddle_built = True

            outputs = self.forward(*inputs, **kwargs)
@@ -189,36 +191,45 @@ class Module(Layer):

            return outputs

    def _get_weights(self, var_name, shape, init=None, trainable=True):
        if var_name in ["filters", "weights"]:
            w_tmp = self.create_parameter(shape=shape, attr=init, is_bias=False)
        elif var_name in ["biases"]:
            w_tmp = self.create_parameter(shape=shape, attr=init, is_bias=True)
        else:
            w_tmp = self.create_parameter(shape=shape, attr=init)
    def _get_weights(self, var_name, shape, init=None, trainable=True, transposed=None):
        # TODO 2D mindspore weights shape : [out_channel, in_channel, kernel_h, kernel_w]
        # TODO 2D mindspore transposed shape [in_channel, out_channel, kernel_h, kernel_w]
        if len(shape) == 3:
            shape = shape[::-1]
        if len(shape) == 4:
            if transposed:
                shape = (shape[3], shape[0], shape[1], shape[2])
            else:
                shape = (shape[3], shape[2], shape[0], shape[1])
        if len(shape) == 5:
            shape = (shape[4], shape[3], shape[0], shape[1], shape[2])

        # if var_name in ["filters", "weights"]:
        #     var_name = self.name + "/" + var_name
        #     w_tmp = self.create_parameter(shape=shape, attr=init, is_bias=False, trainable=trainable, var_name=var_name)
        # elif var_name in ["biases"]:
        #     var_name = self.name + "/" + var_name
        #     w_tmp = self.create_parameter(shape=shape, attr=init, is_bias=True, trainable=trainable, var_name=var_name)
        # else:
        var_name = self.name + "/" + var_name
        w_tmp = self.create_parameter(shape=shape, attr=init, var_name=var_name, trainable=trainable)
        self.trainable = trainable

        return w_tmp

    def create_parameter(self,
                         shape,
                         attr=None,
                         dtype=None,
                         is_bias=False,
                         default_initializer=None):
    def create_parameter(
        self, shape, attr=None, dtype=None, is_bias=False, default_initializer=None, trainable=True, var_name=None
    ):
        """Create parameters for this layer."""
        temp_attr = copy.deepcopy(attr)
        init_attr = pd.ParamAttr(name=var_name, initializer=attr, trainable=trainable, do_model_average=True)
        temp_attr = copy.deepcopy(init_attr)
        if isinstance(temp_attr, six.string_types) and temp_attr == "":
            temp_attr = None
        return self._helper.create_parameter(temp_attr, shape, dtype, is_bias,
                                             default_initializer)
        return self._helper.create_parameter(temp_attr, shape, dtype, is_bias, default_initializer)

    @property
    def all_weights(self):
        ret = [
            param
            for _, param in self.named_parameters(
                include_sublayers=True)
        ]
        ret = [param for _, param in self.named_parameters(include_sublayers=True)]
        return ret

    @property
@@ -231,4 +242,11 @@ class Module(Layer):
        (2) Automatic shape inference when the user does not enter inchannels.
        """

        self.forward(*inputs, **kwargs)
        self.forward(*inputs, **kwargs)

    def save_weights(self, file_path, format=None):
        _save_weights(net=self, file_path=file_path, format=format)

    def load_weights(self, file_path, format=None, in_order=True, skip=False):
        """Load model weights from a given file, which should be previously saved by self.save_weights()."""
        _load_weights(net=self, file_path=file_path, format=format, in_order=in_order, skip=skip)
--- a/tensorlayer/layers/core/core_tensorflow.py
+++ b/tensorlayer/layers/core/core_tensorflow.py
@@ -16,6 +16,30 @@ Parameter_ = tf.Variable


 class Module(object):
    """The basic :class:`Module` class represents a single layer of a neural network.
        It should be subclassed when implementing new types of layers.
        Parameters
        ----------
        name : str or None
            A unique layer name. If None, a unique name will be automatically assigned.
        Methods
        ---------
        __init__()
            Initializing the Layer.
        __call__()
            Forwarding the computation.
        all_weights()
            Return a list of Tensor which are all weights of this Layer.
        trainable_weights()
            Return a list of Tensor which are all trainable weights of this Layer.
        nontrainable_weights()
            Return a list of Tensor which are all nontrainable weights of this Layer.
        build()
            Abstract method. Build the Layer. All trainable weights should be defined in this function.
        forward()
            Abstract method. Forward computation and return computation results.

        """

    def __init__(self, name=None, act=None, *args, **kwargs):
        self._params = OrderedDict()
@@ -51,7 +75,8 @@ class Module(object):
            str_act = str2act(act)

        if act:
            if isinstance(act, str) and (len(act) > 5 and act[0:5] == "lrelu" or len(act) > 10 and act[0:10] == "leaky_relu"):
            if isinstance(act, str) and (len(act) > 5 and act[0:5] == "lrelu" or
                                         len(act) > 10 and act[0:10] == "leaky_relu"):
                self.act = str_act
            elif isinstance(act, str):
                self.act = str_act()
@@ -68,9 +93,9 @@ class Module(object):
        self._nodes_fixed = False

        # Layer weight state
        self._all_weights = []
        self._trainable_weights = []
        self._nontrainable_weights = []
        self._all_weights = None
        self._trainable_weights = None
        self._nontrainable_weights = None

        # layer forward  state
        self._forward_state = False
@@ -83,7 +108,9 @@ class Module(object):
        Sets the extended representation of the Module.

        To print customized extended information, re-implement this method in your own Layers.

        """

        return ''

    def __repr__(self):
@@ -123,7 +150,6 @@ class Module(object):
                del self.__dict__[name]
            if params and name in params:
                raise TypeError("Expected type is Parameter, but got Module.")
            # TODO How to prompt the user, enter the in_channels.
            # TODO Automatic shape inference when the user does not enter inchannels.
            # if value._built is False:
            #     raise AttributeError(
@@ -146,8 +172,9 @@ class Module(object):
    def build(self, inputs_shape):
        raise Exception("The build(self, inputs_shape) method must be implemented by inherited class")

    def _get_weights(self, var_name, shape, init=tl.initializers.random_normal(), trainable=True):
    def _get_weights(self, var_name, shape, init=tl.initializers.random_normal(), trainable=True, transposed=None):
        """ Get trainable variables. """

        weight = get_variable_with_initializer(
            scope_name=self.name, var_name=var_name, shape=shape, init=init, trainable=trainable
        )
@@ -156,10 +183,12 @@ class Module(object):

    def save_weights(self, file_path, format=None):
        """Input file_path, save model weights into a file of given format."""

        _save_weights(self, file_path, format)

    def load_weights(self, file_path, format=None, in_order=True, skip=False):
        """Load model weights from a given file, which should be previously saved by self.save_weights()."""

        _load_weights(self, file_path, format, in_order, skip)

    def _set_mode_for_layers(self, is_train):
@@ -171,12 +200,12 @@ class Module(object):
            Network's mode. True means training mode while False means evaluation mode.

        """

        layers = self.layers_and_names(name_prefix='')
        for layer_name, layer in layers:
            if isinstance(layer, Module):
                layer.is_train = is_train


    def set_train(self):
        """Set this network in training mode. After calling this method,
        all layers in network are in training mode, in particular, BatchNorm, Dropout, etc.
@@ -188,6 +217,7 @@ class Module(object):
        >>> net.set_train()

        """

        if self.is_train !=True:
            self.is_train = True
            self._set_mode_for_layers(True)
@@ -200,22 +230,15 @@ class Module(object):
        --------
        >>> import tensorlayer as tl
        >>> net = tl.vgg16()
        >>> net.eval()
        >>> net.set_eval()
        # do evaluation

        """

        if self.is_train != False:
            self.is_train = False
            self._set_mode_for_layers(False)

    def test(self):
        """Set this network in evaluation mode."""
        self.eval()

    def infer(self):
        """Set this network in evaluation mode."""
        self.eval()

    @staticmethod
    def _compute_shape(tensors):
        if isinstance(tensors, list):
@@ -231,16 +254,17 @@ class Module(object):
        Inserts a parameter with given name to the layer. Please refer to the usage in
        source code of `tensorlayer.layer.Module.__setattr__`.

        Args:
            param_name (str): Name of the parameter.
            param (Parameter): Parameter to be inserted to the layer.
            check_name (bool): Determines whether the name input is compatible. Default: True.
        Parameters
        ----------
        param_name : str
            Name of the parameter.
        param : Parameter
            Parameter to be inserted to the layer.
        check_name : bool
            Determines whether the name input is compatible. Default: True.

        Raises:
            KeyError: If the name of parameter is null or contains dot.
            AttributeError: If user did not call init() first.
            TypeError: If the type of parameter is not Parameter_.
        """

        if not param_name:
            raise KeyError("The name of parameter should not be null.")
        if check_name and '.' in param_name:
@@ -271,6 +295,7 @@ class Module(object):
            Output tensors to this layer.

        """

        raise NotImplementedError

    @property
@@ -304,73 +329,87 @@ class Module(object):
    def trainable_weights(self):
        """
        Returns all trainable weights.

        Returns a list of all trainable parmeters.

        Args:
            recurse (bool): Whether contains the trainable weights of sublayers. Default: True.

        Returns:
            List, the list of trainable weights.
        """
        self.get_weights()
        layers = self.layers_and_names(name_prefix='')
        for layer_name, layer in layers:
            params = layer._params.items()
            params_status = layer._params_status.items()
            params_zip = zip(params, params_status)
            for params, params_status in params_zip:
                if params_status[1] ==True:
                    self._trainable_weights.append(params[1])

        if self._trainable_weights is not None and len(self._trainable_weights) > 0:
            # self._trainable_weights already extracted, so do nothing
            pass
        else:
            self._trainable_weights = []
            layers = self.layers_and_names(name_prefix='')
            for layer_name, layer in layers:
                params = layer._params.items()
                params_status = layer._params_status.items()
                params_zip = zip(params, params_status)
                for params, params_status in params_zip:
                    if params_status[1] ==True:
                        self._trainable_weights.append(params[1])
        return self._trainable_weights

    @property
    def nontrainable_weights(self):
        """
        Returns all untrainable weights.

        Returns a list of all untrainable weights.

        Args:
            recurse (bool): Whether contains the untrainable weights of sublayers. Default: True.

        Returns:
            List, the list of untrainable weights.
        """
        layers = self.layers_and_names(name_prefix='')
        for layer_name, layer in layers:
            params = layer._params.items()
            params_status = layer._params_status.items()
            params_zip = zip(params, params_status)
            for params, params_status in params_zip:
                if params_status[1] == False:
                    self._nontrainable_weights.append(params[1])

        if self._nontrainable_weights is not None and len(self._nontrainable_weights) > 0:
            # self._nontrainable_weights already extracted, so do nothing
            pass
        else:
            self._nontrainable_weights = []
            layers = self.layers_and_names(name_prefix='')
            for layer_name, layer in layers:
                params = layer._params.items()
                params_status = layer._params_status.items()
                params_zip = zip(params, params_status)
                for params, params_status in params_zip:
                    if params_status[1] == False:
                        self._nontrainable_weights.append(params[1])
        return self._nontrainable_weights

    @property
    def all_weights(self):
        layers = self.layers_and_names(name_prefix='')
        for layer_name, layer in layers:
            params = layer._params.items()
            for par, val in params:
                self._all_weights.append(val)
        """
        Returns all weights.
        Returns a list of all weights.

        """

        if self._all_weights is not None and len(self._all_weights) > 0:
            # self._all_weights already extracted, so do nothing
            pass
        else:
            self._all_weights = []
            layers = self.layers_and_names(name_prefix='')
            for layer_name, layer in layers:
                params = layer._params.items()
                for par, val in params:
                    self._all_weights.append(val)
        return self._all_weights

    def get_weights(self, expand=True):
        """
        Returns an iterator over layer weights.

        Yields weights of this layer. If `expand` is True, yield parameters of this layer and all sublayers.

        Args:
            expand (bool): If True, yields parameters of this layer and all sublayers. Otherwise, yields only parameters
                           that are direct members of this layer. Default: True.
        Parameters
        ----------
        expand : bool
            If True, yields parameters of this layer and all sublayers. Otherwise, yields only parameters
            that are direct members of this layer. Default: True.

        Examples
        ---------
        >>> net = Net()
        >>> for item in net.get_weights():
        >>>     print(item)

        Examples:
            >>> net = Net()
            >>> for item in net.get_weights():
            >>>     print(item)
        """

        for _, param in self.parameters_and_names(expand=expand):
            yield param

@@ -387,14 +426,15 @@ class Module(object):
        """
        Adds a child layer to the current layer.

        Args:
            child_name (str): Name of the child layer.
            child (Module): The child layer to be inserted.
        Parameters
        ----------
        child_name : str
            Name of the child layer.
        child : Module
            The child layer to be inserted.

        Raises:
            KeyError: Child Module's name is incorrect or duplicated with the other child name.
            TypeError: Child Module's type is incorrect.
        """

        if not child_name or '.' in child_name:
            raise KeyError("Child layer name is incorrect.")
        if hasattr(self, child_name) and child_name not in self._layers:
@@ -409,18 +449,24 @@ class Module(object):

        Includes the parameter's name  and itself.

        Args:
            name_prefix (str): Namespace. Default: ''.
            expand (bool): If True, yields parameters of this layer and all sublayers. Otherwise, yields only parameters
                           that are direct members of this layer. Default: True.
        Parameters
        ----------
        name_prefix : str
            Namespace. Default: ''.
        expand : bool
            If True, yields parameters of this layer and all sublayers. Otherwise, yields only parameters
            that are direct members of this layer. Default: True.

        Examples
        ---------
        >>> n = Net()
        >>> names = []
        >>> for m in n.parameters_and_names():
        >>>     if m[0]:
        >>>         names.append(m[0])

        Examples:
            >>> n = Net()
            >>> names = []
            >>> for m in n.parameters_and_names():
            >>>     if m[0]:
            >>>         names.append(m[0])
        """

        layers = []
        if expand:
            layers = self.layers_and_names(name_prefix=name_prefix)
@@ -447,17 +493,23 @@ class Module(object):

        Includes the layer's name and itself.

        Args:
            layers (str): layers to iterate over. Default: None.
            name_prefix (str): Namespace. Default: ''.
        Parameters
        ----------
        layers : str
            layers to iterate over. Default: None.
        name_prefix : str
            Namespace. Default: ''.

        Examples
        ---------
        >>> n = Net()
        >>> names = []
        >>> for m in n.layers_and_names():
        >>>     if m[0]:
        >>>         names.append(m[0])

        Examples:
            >>> n = Net()
            >>> names = []
            >>> for m in n.layers_and_names():
            >>>     if m[0]:
            >>>         names.append(m[0])
        """

        t_layers = layers if layers else set()
        if self in t_layers:
            return
@@ -475,6 +527,7 @@ class Module(object):

    def layers(self):
        """Returns an iterator over immediate layers."""

        return self.name_layers().values()

    def name_layers(self):
@@ -483,6 +536,7 @@ class Module(object):

        Include name of the layer and layer itself.
        """

        value_set = set()
        layers = OrderedDict()
        for name, layer in self._layers.items():
@@ -494,7 +548,7 @@ class Module(object):
    def init_build(self, *inputs, **kwargs):
        """
        (1) This method must be called when the Layer has no input in_channels.
        (2) Automatic shape inference when the user does not enter inchannels.
        (2) Automatic shape inference when the user does not enter in_channels.
        """

        self.forward(*inputs, **kwargs)
@@ -502,31 +556,35 @@ class Module(object):

 class SequentialLayer(Module):
    """
    Sequential layer container.

    A list of Layers will be added to it in the order they are passed in the constructor.
    Alternatively, an ordered dict of layers can also be passed in.

    Args:
        args (list, OrderedDict): List of subclass of Module.

    Raises:
        TypeError: If the type of the argument is not list or OrderedDict.

    Inputs:
        - **input** (Tensor) - Tensor with shape according to the first Module in the sequence.

    Outputs:
        Tensor, the output Tensor with shape depending on the input and defined sequence of Layers.
    The class :class:`SequentialLayer` is a linear stack of layers.
    The :class:`SequentialLayer` can be created by passing a list of layer instances.
    The given layer instances will be automatically connected one by one.
    Parameters
    ----------
    layers: list of Layer
        A list of layers.
    name : str or None
        A unique layer name. If None, a unique name will be automatically assigned.
    Methods
    ---------
    __init__()
        Initializing the LayerList.
    weights()
        A collection of weights of all the layer instances.
    build()
        Build the LayerList. The layer instances will be connected automatically one by one.
    forward()
        Forward the computation. The computation will go through all layer instances.

    Examples:
        >>> conv = tl.layers.Conv2d(3, 2, 3, pad_mode='valid')
        >>> bn = tl.layers.BatchNorm2d(2)
        >>> seq = tl.layers.SequentialLayer([conv, bn])
        >>>
        >>> x = tl.layers.Input((1, 3, 4, 4))
        >>> seq(x)
    Examples
    ---------
    >>> conv = tl.layers.Conv2d(3, 2, 3, pad_mode='valid')
    >>> bn = tl.layers.BatchNorm2d(2)
    >>> seq = tl.layers.SequentialLayer([conv, bn])
    >>> x = tl.layers.Input((1, 3, 4, 4))
    >>> seq(x)
    """

    def __init__(self, *args):
        super(SequentialLayer, self).__init__()
        self._built = True
@@ -547,21 +605,20 @@ class SequentialLayer(Module):

    def __getitem__(self, index):
        if isinstance(index, slice):
            return self.__class__(
                OrderedDict(list(self._layers.items())[index]))
        index = self._valid_index(len(self), index)
            return self.__class__(OrderedDict(list(self._layers.items())[index]))
        index = _valid_index(len(self), index)
        return list(self._layers.values())[index]

    def __setitem__(self, index, layer):
        if self._valid_module(layer):
            index = self._valid_index(len(self), index)
        if _valid_module(layer):
            index = _valid_index(len(self), index)
            key = list(self._layers.keys())[index]
            self._layers[key] = layer
            self.layer_list = list(self._layers.values())

    def __delitem__(self, index):
        if isinstance(index, int):
            index = self._valid_index(len(self), index)
            index = _valid_index(len(self), index)
            key = list(self._layers.keys())[index]
            del self._layers[key]
        elif isinstance(index, slice):
@@ -575,9 +632,8 @@ class SequentialLayer(Module):
    def __len__(self):
        return len(self._layers)


    def append(self, layer):
        if self._valid_module(layer):
        if _valid_module(layer):
            self._layers[str(len(self))] = layer
        self.layer_list = list(self._layers.values())
        return self
@@ -590,176 +646,131 @@ class SequentialLayer(Module):
            input_data = layer(input_data)
        return input_data

    def _valid_index(self, layer_num, index):
        if not isinstance(index, int):
            raise TypeError("Index {} is not int type")
        if not -layer_num <= index < layer_num:
            raise IndexError("Index should be a number in range [{}, {}), but got {}"
                             .format(-layer_num, layer_num, index))
        return index % layer_num

    def _valid_module(self, layer):
        if issubclass(layer.__class__, Module):
            return True
        raise TypeError('Module {} is not subclass of Module'.format(layer))


 class LayerList(Module):
    """
    The class :class:`LayerList` is a linear stack of layers.
    Holds Modules in a list.

    The :class:`LayerList` can be created by passing a list of layer instances.
    The given layer instances will be automatically connected one by one.
    LayerList can be used like a regular Python list, support
    '__getitem__', '__setitem__', '__delitem__', '__len__', '__iter__' and '__iadd__',
    but module it contains are properly registered, and will be visible by all Modules methods.

    Parameters
    ----------
    layers: list of Layer
        A list of layers.
    name : str or None
        A unique layer name. If None, a unique name will be automatically assigned.

        args : list
            List of subclass of Module.
    Methods
    ---------
    __init__()
        Initializing the LayerList.
    weights()
        A collection of weights of all the layer instances.
    build()
        Build the LayerList. The layer instances will be connected automatically one by one.
    forward()
        Forward the computation. The computation will go through all layer instances.
        Initializing the Layer.
    insert()
        Inserts a given layer before a given index in the list.
    extend()
        Appends layers from a Python iterable to the end of the list.
    append()
        Appends a given layer to the end of the list.

    Examples
    ---------
    Args:
        args (list, optional): List of subclass of Module.

    Examples:

    """

    def __init__(self, layers, name=None):
        """
        Initializing the LayerList given a list of Layer.

        :param layers: list of Layer
        :param name: str or None
        """

        super(LayerList, self).__init__(name=name)
        self.layers = layers
        is_built = True
        for layer in self.layers:
            self._trainable_weights.extend(layer.trainable_weights)
            self._nontrainable_weights.extend(layer.nontrainable_weights)
            if layer._built is False:
                is_built = False
        #     if layer._built and layer.all_weights is not None:
        #         # some layers in the list passed in have already been built
        #         # e.g. using input shape to construct layers in dynamic eager
        #         if self._all_weights is None:
        #             self._all_weights = list()
        #         self._all_weights.extend(layer.all_weights)
        if is_built:
            self._built = True

        logging.info(
            "LayerList %s including layers [%s]" % (self.name, ', '.join([layer.name for layer in self.layers]))
        )
    def __init__(self, args):
        super(LayerList, self).__init__()
        self.extend(args)

        # check layer name uniqueness in LayerList
        local_layer_name_set = set()
        for layer in self.layers:
            if layer.name not in local_layer_name_set:
                local_layer_name_set.add(layer.name)
            else:
                raise ValueError(
                    'Layer name \'%s\' has already been used by another layer. Please change the layer name.' %
                    layer.name
                )
    def __getitem__(self, index):
        if isinstance(index, slice):
            return self.__class__(list(self._layers.values())[index])
        if isinstance(index, int):
            index = _valid_index(len(self), index)
            return self._layers[str(index)]
        raise TypeError('Index {} is not int type or slice type'.format(index))

    def __setitem__(self, index, layer):
        if not isinstance(index, int) and _valid_module(layer):
            raise TypeError('Index {} is not int type'.format(index))
        index = _valid_index(len(self), index)
        self._layers[str(index)] = layer

    def __getitem__(self, idx):
        if isinstance(idx, slice):
            return LayerList(list(self.layers)[idx])
    def __delitem__(self, index):
        if isinstance(index, int):
            index = _valid_index(len(self), index)
            del self._layers[str(index)]
        elif isinstance(index, slice):
            keys = list(self._layers.keys())[index]
            for key in keys:
                del self._layers[key]
        else:
            return self.layers[idx]
            raise TypeError('Index {} is not int type or slice type'.format(index))
        temp_dict = OrderedDict()
        for idx, layer in enumerate(self._layers.values()):
            temp_dict[str(idx)] = layer
        self._layers = temp_dict

    def __len__(self):
        return len(self.layers)
        return len(self._layers)

    def __repr__(self):
        tmpstr = 'LayerList' + '(\n'
        for idx, layer in enumerate(self.layers):
            modstr = layer.__repr__()
            modstr = _addindent(modstr, 2)
            tmpstr = tmpstr + '  (' + str(idx) + '): ' + modstr + '\n'
    def __iter__(self):
        return iter(self._layers.values())

        tmpstr = tmpstr + ')'
        return tmpstr
    def __iadd__(self, layers):
        self.extend(layers)
        return self

    @property
    def trainable_weights(self):
        return self._trainable_weights
    def insert(self, index, layer):
        """
            Inserts a given layer before a given index in the list.

    @property
    def nontrainable_weights(self):
        return self._nontrainable_weights
        """

    @property
    def all_weights(self):
        return self._trainable_weights + self._nontrainable_weights

    # def build(self, inputs_shape):
    #     """
    #     Build the LayerList. The layer instances will be connected automatically one by one.
    #     """
    #     in_tensor = self._input_tensors
    #     # in_layer = self._input_layer
    #     for layer in self.layers:
    #         is_build = layer._built
    #         out_tensor = layer(in_tensor)
    #         # nlayer = layer(in_layer)
    #         if is_build is False and layer.all_weights is not None:
    #             if self._all_weights is None:
    #                 self._all_weights = list()
    #             self._all_weights.extend(layer.all_weights)
    #         layer._built = True
    #         in_tensor = out_tensor
    #         # in_layer = nlayer

    def forward(self, inputs):
        idx = _valid_index(len(self), index)
        _valid_module(layer)
        length = len(self)
        while length > idx:
            self._layers[str(length)] = self._layers[str(length - 1)]
            length -= 1
        self._layers[str(idx)] = layer

    def extend(self, layers):
        """
        Forward the computation. The computation will go through all layer instances.
            Appends layers from a Python iterable to the end of the list.

        """
        z = inputs
        for layer in self.layers:
            z = layer.forward(z)
        return z

    def _set_mode_for_layers(self, is_train):
        """Set training/evaluation mode for all layer instances."""
        self.is_train = is_train
        for layer in self.layers:
            if isinstance(layer, LayerList):
                layer._set_mode_for_layers(is_train)
            else:
                layer.is_train = is_train
        if not isinstance(layers, list):
            raise TypeError('Modules {} should be list of sublayers'.format(layers))
        for layer in layers:
            if _valid_module(layer):
                self._layers[str(len(self))] = layer
        return self

    def append(self, layer):
        """
            Appends a given layer to the end of the list.

        """

        if _valid_module(layer):
            self._layers[str(len(self))] = layer

    def forward(self, *inputs):
        raise NotImplementedError


 def _valid_index(layer_num, index):
    if not isinstance(index, int):
        raise TypeError("Index {} is not int type")
    if not -layer_num <= index < layer_num:
        raise IndexError("Index should be a number in range [{}, {}), but got {}".format(-layer_num, layer_num, index))
    return index % layer_num


    def get_args(self):
        init_args = {}
        layers = self.layer_args["layers"]
        init_args["layers"] = [layer.config for layer in layers]
        init_args.update({"layer_type": "layerlist"})
        return init_args

 def tolist(tensors):
    if isinstance(tensors, list) or isinstance(tensors, tuple):
        ntensors = list()
        for t in tensors:
            ntensors += tolist(t)
        return ntensors
    else:
        return [tensors]

 def _addindent(s_, numSpaces):
    s = s_.split('\n')
    # don't do anything for single-line stuff
    if len(s) == 1:
        return s_
    first = s.pop(0)
    s = [(numSpaces * ' ') + line for line in s]
    s = '\n'.join(s)
    s = first + '\n' + s
    return s
 def _valid_module(layer):
    if issubclass(layer.__class__, Module):
        return True
    raise TypeError('Module {} is not subclass of Module'.format(layer))
--- a/tensorlayer/layers/dense/base_dense.py
+++ b/tensorlayer/layers/dense/base_dense.py
@@ -34,10 +34,10 @@ class Dense(Module):
    With TensorLayer

    >>> net = tl.layers.Input([100, 50], name='input')
    >>> dense = tl.layers.Dense(n_units=800, act=tl.ops.relu, in_channels=50, name='dense_1')
    >>> dense = tl.layers.Dense(n_units=800, act=tl.ReLU, in_channels=50, name='dense_1')
    >>> print(dense)
    Dense(n_units=800, relu, in_channels='50', name='dense_1')
    >>> tensor = tl.layers.Dense(n_units=800, act=tl.ops.relu, name='dense_2')(net)
    >>> tensor = tl.layers.Dense(n_units=800, act=tl.ReLU, name='dense_2')(net)
    >>> print(tensor)
    tf.Tensor([...], shape=(100, 800), dtype=float32)

@@ -47,7 +47,6 @@ class Dense(Module):

    """

    # @cell_attr_register
    def __init__(
        self,
        n_units,
--- a/tensorlayer/layers/dense/binary_dense.py
+++ b/tensorlayer/layers/dense/binary_dense.py
@@ -34,6 +34,14 @@ class BinaryDense(Module):
    name : None or str
        A unique layer name.

    Examples
    --------
    >>> net = tl.layers.Input([10, 784], name='input')
    >>> net = tl.layers.BinaryDense(n_units=800, act=tl.ReLU, name='relu1')(net)
    >>> output shape :(10, 800)
    >>> net = tl.layers.BinaryDense(n_units=10, name='output')(net)
    >>> output shape : (10, 10)

    """

    def __init__(
@@ -90,7 +98,6 @@ class BinaryDense(Module):

        self.matmul = tl.ops.MatMul()


    def forward(self, inputs):
        if self._forward_state == False:
            if self._built == False: