From 9241541fa9092f0f59d4c8fc9a7b5035de62e836 Mon Sep 17 00:00:00 2001 From: Eric Lai Date: Wed, 12 May 2021 17:56:45 +0800 Subject: [PATCH] update dataflow --- docs/conf.py | 45 +- docs/index.rst | 2 +- docs/modules/activation.rst | 5 + docs/modules/app.rst | 10 + docs/modules/layers.rst | 13 + docs/modules/rein.rst | 5 +- docs/modules/visualize.rst | 4 + docs/user/contributing.rst | 2 + docs/user/get_involved.rst | 5 +- docs/user/installation.rst | 20 +- .../tutorial_paddle_tensorlayer_mlp.py | 44 +- tensorlayer/__init__.py | 1 + tensorlayer/backend/ops/__init__.py | 12 +- tensorlayer/backend/ops/dragon_backend.py | 14 +- tensorlayer/backend/ops/mindspore_backend.py | 23 +- tensorlayer/backend/ops/mindspore_nn.py | 380 +++- tensorlayer/backend/ops/paddle_backend.py | 19 +- tensorlayer/backend/ops/paddle_nn.py | 104 ++ tensorlayer/backend/ops/tensorflow_backend.py | 14 +- tensorlayer/backend/ops/tensorflow_nn.py | 329 +++- tensorlayer/cost/paddle_cost.py | 83 +- tensorlayer/dataflow/__init__.py | 10 +- tensorlayer/dataflow/image/__init__.py | 15 + tensorlayer/dataflow/image/mindspore_image.py | 1539 +++++++++++++++++ tensorlayer/dataflow/image/paddle_image.py | 19 + .../dataflow/image/tensorflow_image.py | 760 ++++++++ tensorlayer/dataflow/mindspore_data.py | 14 +- tensorlayer/dataflow/mindspore_image.py | 305 ---- tensorlayer/dataflow/paddle_data.py | 131 ++ tensorlayer/dataflow/tensorflow_data.py | 12 + tensorlayer/dataflow/tensorflow_image.py | 200 --- tensorlayer/layers/convolution/__init__.py | 18 +- tensorlayer/layers/convolution/binary_conv.py | 155 ++ tensorlayer/layers/convolution/dorefa_conv.py | 168 ++ tensorlayer/layers/convolution/group_conv.py | 164 ++ .../layers/convolution/separable_conv.py | 319 ++++ tensorlayer/layers/pooling.py | 359 +++- tensorlayer/metric/__init__.py | 15 + tensorlayer/metric/mindspore_metric.py | 88 + tensorlayer/metric/paddle_metric.py | 89 + tensorlayer/metric/tensorflow_metric.py | 98 ++ tensorlayer/models/core.py | 108 +- tensorlayer/optimizers/__init__.py | 4 +- .../optimizers/mindspore_optimizers.py | 6 +- tensorlayer/optimizers/paddle_optimizers.py | 355 +++- .../optimizers/tensorflow_optimizers.py | 4 +- tests/dataflow/__init__.py | 0 tests/dataflow/test_dataflow_image.py | 279 +++ tests/layers/test_layers_convolution.py | 99 +- tests/layers/test_layers_pooling.py | 34 +- 50 files changed, 5793 insertions(+), 708 deletions(-) create mode 100644 docs/modules/app.rst create mode 100644 tensorlayer/dataflow/image/mindspore_image.py create mode 100644 tensorlayer/dataflow/image/paddle_image.py create mode 100644 tensorlayer/dataflow/image/tensorflow_image.py delete mode 100644 tensorlayer/dataflow/mindspore_image.py create mode 100644 tensorlayer/dataflow/paddle_data.py delete mode 100644 tensorlayer/dataflow/tensorflow_image.py create mode 100644 tensorlayer/layers/convolution/binary_conv.py create mode 100644 tensorlayer/layers/convolution/dorefa_conv.py create mode 100644 tensorlayer/layers/convolution/group_conv.py create mode 100644 tensorlayer/layers/convolution/separable_conv.py create mode 100644 tensorlayer/metric/__init__.py create mode 100644 tensorlayer/metric/mindspore_metric.py create mode 100644 tensorlayer/metric/paddle_metric.py create mode 100644 tensorlayer/metric/tensorflow_metric.py create mode 100644 tests/dataflow/__init__.py create mode 100644 tests/dataflow/test_dataflow_image.py diff --git a/docs/conf.py b/docs/conf.py index 89f1601..d9dd03b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,7 +19,7 @@ # import os, sys, datetime sys.path.insert(0, os.path.abspath("../")) # Important -sys.path.insert(0, os.path.abspath(os.path.join("..", "tensorlayer"))) # Important +sys.path.insert(0, os.path.abspath(os.path.join("..", "tensorlayer"))) # Important from package_info import __shortversion__ from package_info import __version__ @@ -159,7 +159,6 @@ pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False - # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for @@ -284,29 +283,28 @@ htmlhelp_basename = 'TensorLayerdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'TensorLayer.tex', 'TensorLayer Documentation', - 'TensorLayer contributors', 'manual'), + (master_doc, 'TensorLayer.tex', 'TensorLayer Documentation', 'TensorLayer contributors', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of @@ -335,30 +333,26 @@ latex_documents = [ # # latex_domain_indices = True - # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'tensorlayer', 'TensorLayer Documentation', - [author], 1) -] +man_pages = [(master_doc, 'tensorlayer', 'TensorLayer Documentation', [author], 1)] # If true, show URL addresses after external links. # # man_show_urls = False - # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'TensorLayer', 'TensorLayer Documentation', - author, 'TensorLayer', 'Deep learning and Reinforcement learning library for Researchers and Engineers.', - 'Miscellaneous'), + ( + master_doc, 'TensorLayer', 'TensorLayer Documentation', author, 'TensorLayer', + 'Deep learning and Reinforcement learning library for Researchers and Engineers.', 'Miscellaneous' + ), ] # Documents to append as an appendix to all manuals. @@ -377,7 +371,6 @@ texinfo_documents = [ # # texinfo_no_detailmenu = False - # -- Options for Epub output ---------------------------------------------- # Bibliographic Dublin Core info. diff --git a/docs/index.rst b/docs/index.rst index 977f337..b4b1fd2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,7 +9,7 @@ Welcome to TensorLayer **Documentation Version:** |release| -**Jun 2019** `Deep Reinforcement Learning Model ZOO Release !! `__. +**Jun 2020** `Deep Reinforcement Learning Book Is Released `__. **Good News:** We won the **Best Open Source Software Award** `@ACM Multimedia (MM) 2017 `_. diff --git a/docs/modules/activation.rst b/docs/modules/activation.rst index 3965bd0..79bad96 100644 --- a/docs/modules/activation.rst +++ b/docs/modules/activation.rst @@ -35,6 +35,7 @@ For more complex activation, TensorFlow API will be required. sign hard_tanh pixel_wise_softmax + mish Ramp ------ @@ -68,6 +69,10 @@ Pixel-wise softmax -------------------- .. autofunction:: pixel_wise_softmax +mish +--------- +.. autofunction:: mish + Parametric activation ------------------------------ See ``tensorlayer.layers``. diff --git a/docs/modules/app.rst b/docs/modules/app.rst new file mode 100644 index 0000000..d636292 --- /dev/null +++ b/docs/modules/app.rst @@ -0,0 +1,10 @@ +API - Application Library +========================= + +Application library is an open source Deep learning applications based on TensorLayer. + +Supported Application: +------------------------- + + + diff --git a/docs/modules/layers.rst b/docs/modules/layers.rst index f6c86a5..78e0eee 100644 --- a/docs/modules/layers.rst +++ b/docs/modules/layers.rst @@ -13,6 +13,9 @@ Layer list .. autosummary:: Layer + + ModelLayer + LayerList Input @@ -131,8 +134,18 @@ Layer list Base Layer ----------- +Base Layer +^^^^^^^^^^^^^^^^ .. autoclass:: Layer +Model Layer +^^^^^^^^^^^^^^^^ +.. autoclass:: ModelLayer + +Layer List +^^^^^^^^^^^^^^^^ +.. autoclass:: LayerList + .. ----------------------------------------------------------- .. Input Layer .. ----------------------------------------------------------- diff --git a/docs/modules/rein.rst b/docs/modules/rein.rst index 9ee16a6..79f4e76 100644 --- a/docs/modules/rein.rst +++ b/docs/modules/rein.rst @@ -1,7 +1,10 @@ API - Reinforcement Learning ============================== -Reinforcement Learning. +We provide two reinforcement learning libraries: + +- `RL-tutorial `__ for professional users with low-level APIs. +- `RLzoo `__ for simple usage with high-level APIs. .. automodule:: tensorlayer.rein diff --git a/docs/modules/visualize.rst b/docs/modules/visualize.rst index 0bbe028..0ef8f3b 100644 --- a/docs/modules/visualize.rst +++ b/docs/modules/visualize.rst @@ -19,6 +19,7 @@ to visualize the model, activations etc. Here we provide more functions for data frame images2d tsne_embedding + draw_boxes_and_labels_to_image_with_json Save and read images @@ -44,6 +45,9 @@ Save image for object detection ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autofunction:: draw_boxes_and_labels_to_image +Save image for object detection with json +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. autofunction:: draw_boxes_and_labels_to_image_with_json Save image for pose estimation (MPII) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/user/contributing.rst b/docs/user/contributing.rst index a83767a..9b1d98f 100644 --- a/docs/user/contributing.rst +++ b/docs/user/contributing.rst @@ -40,8 +40,10 @@ For TensorLayer 1.x, it was actively developed and maintained by the following p - **Hao Dong** (`@zsdonghao `_) - ``_ - **Jonathan Dekhtiar** (`@DEKHTIARJonathan `_) - ``_ - **Luo Mai** (`@luomai `_) - ``_ +- **Pan Wang** (`@FerociousPanda `_) - ``_ (UI) - **Simiao Yu** (`@nebulaV `_) - ``_ + Numerous other contributors can be found in the `Github Contribution Graph `_. diff --git a/docs/user/get_involved.rst b/docs/user/get_involved.rst index 3fe4131..90d699a 100644 --- a/docs/user/get_involved.rst +++ b/docs/user/get_involved.rst @@ -9,9 +9,10 @@ Ph.D. Postition @ PKU Hi, I am `Hao Dong `__, the founder of this project and a new faculty member in EECS, Peking University. I now have a few Ph.D. positions per year open for international students who would like to study AI. If you or your friends are interested in it, feel free to contact me. -PKU is a top 30 university in the global ranking. The application is competitive, apply early is recommended. For the application of next year, please note that the DDL of Chinese Government Scholarship is in the end of each year, please check this `link `__ for more details. +PKU is a top 30 university in the global ranking. The application is competitive, apply early is recommended. Please check the following links for more details. -My homepage: `https://zsdonghao.github.io `__ +- `About the International Elite Ph.D. Program in Computer Science `__ +- `My homepage `__ Contact: hao.dong [AT] pku.edu.cn diff --git a/docs/user/installation.rst b/docs/user/installation.rst index bb86054..3ba467f 100644 --- a/docs/user/installation.rst +++ b/docs/user/installation.rst @@ -35,6 +35,8 @@ For stable version: .. code-block:: bash pip3 install tensorlayer + + pip install tensorlayer -i https://pypi.tuna.tsinghua.edu.cn/simple (faster in China) For latest version, please install from Github. @@ -60,7 +62,7 @@ Alternatively, you can build from the source. cd tensorlayer # Install virtualenv if necessary - pip install virtualenv + sudo pip3 install virtualenv # Then create a virtualenv called `venv` virtualenv venv @@ -73,21 +75,21 @@ Alternatively, you can build from the source. venv\Scripts\activate.bat # basic installation - pip install . + pip3 install . # ============= IF TENSORFLOW IS NOT ALREADY INSTALLED ============= # # for a machine **without** an NVIDIA GPU - pip install -e ".[all_cpu_dev]" + pip3 install -e ".[all_cpu_dev]" # for a machine **with** an NVIDIA GPU - pip install -e ".[all_gpu_dev]" + pip3 install -e ".[all_gpu_dev]" If you want install TensorLayer 1.X, the simplest way to install TensorLayer 1.X is as follow. It will also install the numpy and matplotlib automatically. .. code-block:: bash - [stable version] pip install tensorlayer==1.x.x + [stable version] pip3 install tensorlayer==1.x.x However, if you want to modify or extend TensorLayer 1.X, you can download the repository from `Github`_ and install it as follow. @@ -95,7 +97,7 @@ However, if you want to modify or extend TensorLayer 1.X, you can download the r .. code-block:: bash cd to the root of the git tree - pip install -e . + pip3 install -e . This command will run the ``setup.py`` to install TensorLayer. The ``-e`` reflects editable, then you can edit the source code in ``tensorlayer`` folder, and ``import`` the edited @@ -194,9 +196,9 @@ For TensorLayer, please refer to the steps mentioned above. .. code-block:: bash - pip install tensorflow #CPU version - pip install tensorflow-gpu   #GPU version (GPU version and CPU version just choose one) - pip install tensorlayer       #Install tensorlayer + pip3 install tensorflow #CPU version + pip3 install tensorflow-gpu   #GPU version (GPU version and CPU version just choose one) + pip3 install tensorlayer       #Install tensorlayer diff --git a/examples/basic_tutorials/tutorial_paddle_tensorlayer_mlp.py b/examples/basic_tutorials/tutorial_paddle_tensorlayer_mlp.py index 96d57b5..ce02d34 100644 --- a/examples/basic_tutorials/tutorial_paddle_tensorlayer_mlp.py +++ b/examples/basic_tutorials/tutorial_paddle_tensorlayer_mlp.py @@ -1,25 +1,23 @@ #! /usr/bin/python # -*- coding: utf-8 -*- + import os os.environ['TL_BACKEND'] = 'paddle' - -import paddle.nn.functional as F -from paddle.vision.transforms import Compose, Normalize -import paddle +# os.environ['TL_BACKEND'] = 'tensorflow' import tensorlayer as tl from tensorlayer.layers import Module from tensorlayer.layers import Dense, Flatten -transform = Compose([Normalize(mean=[127.5], - std=[127.5], - data_format='CHW')]) print('download training data and load training data') -train_dataset = paddle.vision.datasets.MNIST(mode='train', transform=transform) -test_dataset = paddle.vision.datasets.MNIST(mode='test', transform=transform) + +X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784)) + print('load finished') + class MLP(Module): + def __init__(self): super(MLP, self).__init__() self.linear1 = Dense(n_units=120, in_channels=784, act=tl.ReLU) @@ -34,24 +32,12 @@ class MLP(Module): x = self.linear3(x) return x -train_loader = paddle.io.DataLoader(train_dataset, batch_size=64, shuffle=True) - -def train(model): - model.train() - epochs = 2 - optim = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.trainable_weights) - for epoch in range(epochs): - for batch_id, data in enumerate(train_loader()): - x_data = data[0] - y_data = data[1] - predicts = model(x_data) - loss = tl.cost.mean_squared_error(predicts, y_data) - acc = paddle.metric.accuracy(predicts, y_data) - loss.backward() - if batch_id % 5 == 0: - print("epoch: {}, batch_id: {}, loss is: {}, acc is: {}".format(epoch, batch_id, loss.numpy(), acc.numpy())) - optim.step() - optim.clear_grad() -model = MLP() -train(model) +traindataset = tl.dataflow.FromSlices((X_train, y_train)) +train_loader = tl.dataflow.Dataloader(traindataset, batch_size=64, shuffle=True) +net = MLP() + +optimizer = tl.optimizers.Adam(learning_rate=0.001) +metric = tl.metric.Accuracy() +model = tl.models.Model(network=net, loss_fn=tl.cost.cross_entropy, optimizer=optimizer, metrics=metric) +model.train(n_epoch=20, train_dataset=train_loader, print_freq=5, print_train_batch=True) diff --git a/tensorlayer/__init__.py b/tensorlayer/__init__.py index b111a3e..442dce1 100644 --- a/tensorlayer/__init__.py +++ b/tensorlayer/__init__.py @@ -50,6 +50,7 @@ if 'TENSORLAYER_PACKAGE_BUILDING' not in os.environ: from tensorlayer import rein from tensorlayer import utils from tensorlayer import dataflow + from tensorlayer import metric from tensorlayer.lazy_imports import LazyImport diff --git a/tensorlayer/backend/ops/__init__.py b/tensorlayer/backend/ops/__init__.py index 5b8a61b..96277ae 100644 --- a/tensorlayer/backend/ops/__init__.py +++ b/tensorlayer/backend/ops/__init__.py @@ -30,6 +30,9 @@ from .load_backend import depthwise_conv2d from .load_backend import Conv1d_transpose from .load_backend import Conv2d_transpose from .load_backend import Conv3d_transpose +from .load_backend import GroupConv2D +from .load_backend import BinaryConv2D +from .load_backend import DorefaConv2D from .load_backend import ReLU from .load_backend import ReLU6 @@ -47,6 +50,14 @@ from .load_backend import AvgPool from .load_backend import Dropout from .load_backend import BatchNorm from .load_backend import DepthwiseConv2d +from .load_backend import SeparableConv1D +from .load_backend import SeparableConv2D +from .load_backend import AdaptiveMeanPool1D +from .load_backend import AdaptiveMeanPool2D +from .load_backend import AdaptiveMeanPool3D +from .load_backend import AdaptiveMaxPool1D +from .load_backend import AdaptiveMaxPool2D +from .load_backend import AdaptiveMaxPool3D # load ops from .load_backend import Variable @@ -123,4 +134,3 @@ from .load_backend import Maximum from .load_backend import Meshgrid from .load_backend import BatchToSpace from .load_backend import DepthToSpace - diff --git a/tensorlayer/backend/ops/dragon_backend.py b/tensorlayer/backend/ops/dragon_backend.py index 37e6e5a..e62f27e 100644 --- a/tensorlayer/backend/ops/dragon_backend.py +++ b/tensorlayer/backend/ops/dragon_backend.py @@ -548,7 +548,9 @@ def reduce_min(input_tensor, axis=None): """ return D.min(input_tensor, axis) + class Pad(object): + def __init__(self, paddings, mode="REFLECT"): if mode not in ['CONSTANT', 'REFLECT', 'SYMMETRIC']: raise Exception("Unsupported mode: {}".format(mode)) @@ -561,6 +563,7 @@ class Pad(object): outputs = D.pad(x, pads=self.paddings, mode=self.mode, value=0) return outputs + def pad(tensor, paddings, mode='CONSTANT', constant_values=0): """ Pads a tensor. @@ -627,6 +630,7 @@ def stack(values, axis=0): class Meshgrid(object): + def __init__(self, indexing='xy'): super(Meshgrid, self).__init__() self.index = indexing @@ -947,7 +951,6 @@ class Count_nonzero(object): pass - class Resize: def __init__(self, scale, method, antialias=False, data_format='channels_last', ksize=None): @@ -1010,19 +1013,25 @@ class Sign(object): def __call__(self, x): return D.math.sign(x) + def ceil(x): raise NotImplementedError + def multiply(x, y): raise NotImplementedError + def divide(x, y): raise NotImplementedError + def identity(x): raise NotImplementedError + class BatchToSpace(object): + def __init__(self, block_size, crops): super(BatchToSpace, self).__init__() pass @@ -1032,8 +1041,9 @@ class BatchToSpace(object): class DepthToSpace(object): + def __init__(self, block_size, data_format='NHWC'): pass def __call__(self, input): - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/tensorlayer/backend/ops/mindspore_backend.py b/tensorlayer/backend/ops/mindspore_backend.py index d067be2..b602a4b 100644 --- a/tensorlayer/backend/ops/mindspore_backend.py +++ b/tensorlayer/backend/ops/mindspore_backend.py @@ -221,6 +221,7 @@ class Normal(Initializer): class RandomNormal(Cell): + def __init__(self, mean=0.0, stddev=0.01, seed=None): super(RandomNormal, self).__init__() self.normal = Normal(mean=mean, stddev=stddev, seed=seed) @@ -711,7 +712,9 @@ def reduce_min(input_tensor, axis=None): outputs = Rmin_obj(input_tensor, axis) return outputs + class Pad(Cell): + def __init__(self, paddings, mode="REFLECT"): super(Pad, self).__init__() if mode not in ["REFLECT", "SYMMETRIC"]: @@ -722,6 +725,7 @@ class Pad(Cell): def construct(self, x): return self.pad(x, self.paddings) + def pad(tensor, paddings, mode='CONSTANT', constant_values=0): """ Pads a tensor. @@ -745,6 +749,7 @@ def pad(tensor, paddings, mode='CONSTANT', constant_values=0): class Unstack(Cell): + def __init__(self, axis, num=None): super(Unstack, self).__init__() if num is not None: @@ -756,6 +761,7 @@ class Unstack(Cell): class Stack(Cell): + def __init__(self, axis=0): super(Stack, self).__init__() self.stack = P.Pack(axis=axis) @@ -785,6 +791,7 @@ def stack(values, axis=0): class Meshgrid(Cell): + def __init__(self, indexing='xy'): super(Meshgrid, self).__init__() self._meshgrid = P.Meshgrid(indexing=indexing) @@ -794,7 +801,6 @@ class Meshgrid(Cell): return self._meshgrid(inputs) - def meshgrid(*args, **kwargs): """ Broadcasts parameters for evaluation on an N-D grid. @@ -815,7 +821,6 @@ def meshgrid(*args, **kwargs): return _meshgrid(*args) - def range(start, limit=None, delta=1, dtype=None): """ Creates a sequence of numbers. @@ -885,7 +890,6 @@ class Tile(Cell): return self.tile(input, tuple(multiples)) - def tile(input, multiples): """ Constructs a tensor by tiling a given tensor. @@ -1156,6 +1160,7 @@ def resize(inputs, output_size, method, antialias): class ZeroPadding1D(Cell): + def __init__(self, padding): super(ZeroPadding1D, self).__init__() if np.size(padding) == 2: @@ -1168,6 +1173,7 @@ class ZeroPadding1D(Cell): class ZeroPadding2D(Cell): + def __init__(self, padding): super(ZeroPadding2D, self).__init__() if np.size(padding) == 4: @@ -1180,6 +1186,7 @@ class ZeroPadding2D(Cell): class ZeroPadding3D(Cell): + def __init__(self, padding): super(ZeroPadding3D, self).__init__() if np.size(padding) == 6: @@ -1200,20 +1207,26 @@ class Sign(Cell): def construct(self, x): return self.sign(x) + def ceil(x): _ceil = P.Ceil() return _ceil(x) + def multiply(x, y): raise NotImplementedError + def divide(x, y): raise NotImplementedError + def identity(x): raise NotImplementedError + class BatchToSpace(Cell): + def __init__(self, block_size, crops): super(BatchToSpace, self).__init__() self.batch_to_space = P.BatchToSpace(block_size=block_size, crops=crops) @@ -1221,7 +1234,9 @@ class BatchToSpace(Cell): def __call__(self, input_x): return self.batch_to_space(input_x) + class DepthToSpace(Cell): + def __init__(self, block_size, data_format='NHWC'): super(DepthToSpace, self).__init__() self.data_format = data_format @@ -1236,4 +1251,4 @@ class DepthToSpace(Cell): if self.data_format == 'NHWC': output = nchw_to_nhwc(output) - return output \ No newline at end of file + return output diff --git a/tensorlayer/backend/ops/mindspore_nn.py b/tensorlayer/backend/ops/mindspore_nn.py index 3af4430..6e6619e 100644 --- a/tensorlayer/backend/ops/mindspore_nn.py +++ b/tensorlayer/backend/ops/mindspore_nn.py @@ -6,13 +6,13 @@ from __future__ import absolute_import, division, print_function from mindspore.nn.cell import Cell from mindspore import context import mindspore as ms -from mindspore.ops import operations as P +import mindspore.ops as P from mindspore.ops import functional as F from mindspore.communication.management import get_group_size, get_rank from mindspore.communication import management from mindspore._checkparam import check_int_positive from mindspore._extends import cell_attr_register - +from mindspore.ops._grad.grad_base import bprop_getters def padding_format(padding): @@ -876,7 +876,6 @@ def pool(input, window_shape, pooling_type, strides=None, padding='VALID', data_ pass - class DepthwiseConv2d(Cell): def __init__(self, strides, padding, data_format=None, dilations=None, ksize=None, channel_multiplier=1): @@ -1138,7 +1137,6 @@ def conv3d_transpose( pass - class BatchNorm(Cell): """Batch Normalization base class.""" @@ -1321,3 +1319,377 @@ class BatchNorm(Cell): if self.data_format == 'channels_last' and self.get_dim(x) == '2d': y = nchw_to_nhwc(y) return y + + +class GroupConv2D(Cell): + + def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, groups): + super(GroupConv2D, self).__init__() + self.data_format, self.padding = preprocess_2d_format(data_format, padding) + + if self.data_format is 'NHWC': + self.ms_stride = strides[1] + self.ms_dilation = dilations[1] + + elif self.data_format is 'NCHW': + self.ms_stride = strides[2] + self.ms_dilation = dilations[2] + + self.conv2d = P.Conv2D( + out_channel=out_channel, kernel_size=k_size, pad_mode=self.padding, stride=self.ms_stride, + dilation=self.ms_dilation, mode=1, group=groups + ) + + def construct(self, inputs, filters): + if self.data_format == 'NHWC': + inputs = nhwc_to_nchw(inputs) + + outputs = self.conv2d(inputs, filters) + + if self.data_format == 'NHWC': + outputs = nchw_to_nhwc(outputs) + return outputs + + +class SeparableConv1D(Cell): + + def __init__(self, stride, padding, data_format, dilations, out_channel, k_size, in_channel, depth_multiplier): + super(SeparableConv1D, self).__init__() + self.data_format, self.padding = preprocess_1d_format(data_format, padding) + self.stride = (1, stride) + self.dilations = (1, dilations) + self.k_size = (1, k_size) + self.out_channel = out_channel + self.in_channel = in_channel + self.depth_multiplier = depth_multiplier + self.depthwise_conv = P.Conv2D( + out_channel=self.in_channel * self.depth_multiplier, kernel_size=self.k_size, pad_mode=self.padding, + stride=self.stride, dilation=self.dilations, mode=1, group=self.in_channel + ) + + self.pointwise_conv = P.Conv2D( + out_channel=self.out_channel, kernel_size=(1, 1), pad_mode=self.padding, stride=(1, 1), dilation=(1, 1), + mode=1, group=1 + ) + + self.expand_dims = P.ExpandDims() + self.squeeze = P.Squeeze(2) + + def construct(self, x, depthwise_filters, pointwise_filters): + + if self.data_format == 'NWC': + x = nhwc_to_nchw(x) + + x = self.expand_dims(x, 2) + depthwise_filters = self.expand_dims(depthwise_filters, 2) + pointwise_filters = self.expand_dims(pointwise_filters, 2) + + outputs = self.depthwise_conv(x, depthwise_filters) + outputs = self.pointwise_conv(outputs, pointwise_filters) + + outputs = self.squeeze(outputs) + + if self.data_format == 'NWC': + outputs = nchw_to_nhwc(outputs) + return outputs + + +class SeparableConv2D(Cell): + + def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, in_channel, depth_multiplier): + super(SeparableConv2D, self).__init__() + self.data_format, self.padding = preprocess_2d_format(data_format, padding) + self.k_size = k_size + self.out_channel = out_channel + self.in_channel = in_channel + self.depth_multiplier = depth_multiplier + + if self.data_format is 'NHWC': + self.ms_stride = strides[1] + self.ms_dilation = dilations[1] + # self.transpose = P.Transpose() + elif self.data_format is 'NCHW': + self.ms_stride = strides[2] + self.ms_dilation = dilations[2] + + self.depthwise_conv = P.Conv2D( + out_channel=self.in_channel * self.depth_multiplier, kernel_size=self.k_size, pad_mode=self.padding, + stride=self.ms_stride, dilation=self.ms_dilation, mode=1, group=self.in_channel + ) + + self.pointwise_conv = P.Conv2D( + out_channel=self.out_channel, kernel_size=(1, 1), pad_mode=self.padding, stride=(1, 1), dilation=(1, 1), + mode=1, group=1 + ) + + def construct(self, x, depthwise_filters, pointwise_filters): + if self.data_format == 'NHWC': + x = nhwc_to_nchw(x) + + outputs = self.depthwise_conv(x, depthwise_filters) + outputs = self.pointwise_conv(outputs, pointwise_filters) + + if self.data_format == 'NHWC': + outputs = nchw_to_nhwc(outputs) + return outputs + + +class AdaptiveMeanPool1D(Cell): + + def __init__(self, output_size, data_format): + super(AdaptiveMeanPool1D, self).__init__() + self.data_format, _ = preprocess_1d_format(data_format, None) + self.output_size = output_size + self.expand_dims = P.ExpandDims() + self.squeeze = P.Squeeze(2) + + def construct(self, inputs): + + if self.data_format == 'NWC': + n, w, c = inputs.shape + inputs = nhwc_to_nchw(inputs) + else: + n, c, w = inputs.shape + inputs = self.expand_dims(inputs, 2) + + stride = (1, w // self.output_size) + kernel = (1, w - (self.output_size - 1) * stride[1]) + outputs = P.AvgPool(kernel_size=kernel, strides=stride, pad_mode='VALID')(inputs) + outputs = self.squeeze(outputs) + + if self.data_format == 'NWC': + outputs = nchw_to_nhwc(outputs) + + return outputs + + +class AdaptiveMeanPool2D(Cell): + + def __init__(self, output_size, data_format): + super(AdaptiveMeanPool2D, self).__init__() + self.data_format, _ = preprocess_2d_format(data_format, None) + self.output_size = output_size + + def construct(self, inputs): + + if self.data_format == 'NHWC': + n, h, w, c = inputs.shape + inputs = nhwc_to_nchw(inputs) + else: + n, c, h, w = inputs.shape + + out_h, out_w = self.output_size + stride_h = h // out_h + kernel_h = h - (out_h - 1) * stride_h + stride_w = w // out_w + kernel_w = w - (out_w - 1) * stride_w + outputs = P.AvgPool(kernel_size=(kernel_h, kernel_w), strides=(stride_h, stride_w), pad_mode='VALID')(inputs) + + if self.data_format == 'NHWC': + outputs = nchw_to_nhwc(outputs) + + return outputs + + +class AdaptiveMeanPool3D(Cell): + + pass + + +class AdaptiveMaxPool1D(Cell): + + def __init__(self, output_size, data_format): + super(AdaptiveMaxPool1D, self).__init__() + self.data_format, _ = preprocess_1d_format(data_format, None) + self.output_size = output_size + self.expand_dims = P.ExpandDims() + self.squeeze = P.Squeeze(2) + + def construct(self, inputs): + + if self.data_format == 'NWC': + n, w, c = inputs.shape + inputs = nhwc_to_nchw(inputs) + else: + n, c, w = inputs.shape + inputs = self.expand_dims(inputs, 2) + + stride = (1, w // self.output_size) + kernel = (1, w - (self.output_size - 1) * stride[1]) + outputs = P.MaxPool(kernel_size=kernel, strides=stride, pad_mode='VALID')(inputs) + outputs = self.squeeze(outputs) + + if self.data_format == 'NWC': + outputs = nchw_to_nhwc(outputs) + + return outputs + + +class AdaptiveMaxPool2D(Cell): + + def __init__(self, output_size, data_format): + super(AdaptiveMaxPool2D, self).__init__() + self.data_format, _ = preprocess_2d_format(data_format, None) + self.output_size = output_size + + def construct(self, inputs): + + if self.data_format == 'NHWC': + n, h, w, c = inputs.shape + inputs = nhwc_to_nchw(inputs) + else: + n, c, h, w = inputs.shape + + out_h, out_w = self.output_size + stride_h = h // out_h + kernel_h = h - (out_h - 1) * stride_h + stride_w = w // out_w + kernel_w = w - (out_w - 1) * stride_w + outputs = P.MaxPool(kernel_size=(kernel_h, kernel_w), strides=(stride_h, stride_w), pad_mode='VALID')(inputs) + + if self.data_format == 'NHWC': + outputs = nchw_to_nhwc(outputs) + + return outputs + + +class AdaptiveMaxPool3D(Cell): + + pass + + +class BinaryConv2D(Cell): + + def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, in_channel): + super(BinaryConv2D, self).__init__() + self.data_format, self.padding = preprocess_2d_format(data_format, padding) + if self.data_format is 'NHWC': + self.ms_stride = strides[1] + self.ms_dilation = dilations[1] + # self.transpose = P.Transpose() + elif self.data_format is 'NCHW': + self.ms_stride = strides[2] + self.ms_dilation = dilations[2] + + self.conv2d = P.Conv2D( + out_channel=out_channel, kernel_size=k_size, pad_mode=self.padding, stride=self.ms_stride, + dilation=self.ms_dilation, mode=1, group=1 + ) + + @bprop_getters.register(P.Sign) + def get_bprop_Sign(self): + + def bprop(x, out, dout): + + grad = P.clip_by_value(dout, -1, 1) + return (grad, ) + + return bprop + + self.sign = P.Sign() + + def construct(self, inputs, filters): + + if self.data_format == 'NHWC': + inputs = nhwc_to_nchw(inputs) + + filters = self.sign(filters) + + outputs = self.conv2d(inputs, filters) + + if self.data_format == 'NHWC': + outputs = nchw_to_nhwc(outputs) + + return outputs + + +class DorefaConv2D(Cell): + + def __init__(self, bitW, bitA, strides, padding, data_format, dilations, out_channel, k_size, in_channel): + super(DorefaConv2D, self).__init__() + self.data_format, self.padding = preprocess_2d_format(data_format, padding) + self.bitW = ms.Tensor(bitW) + self.bitA = ms.Tensor(bitA) + if self.data_format is 'NHWC': + self.ms_stride = strides[1] + self.ms_dilation = dilations[1] + # self.transpose = P.Transpose() + elif self.data_format is 'NCHW': + self.ms_stride = strides[2] + self.ms_dilation = dilations[2] + + self.conv2d = P.Conv2D( + out_channel=out_channel, kernel_size=k_size, pad_mode=self.padding, stride=self.ms_stride, + dilation=self.ms_dilation, mode=1, group=1 + ) + + @bprop_getters.register(P.Round) + def get_bprop_Round(self): + + def bprop(x, out, dout): + + return (dout, ) + + return bprop + + @bprop_getters.register(P.Sign) + def get_bprop_Sign(self): + + def bprop(x, out, dout): + + return (dout, ) + + return bprop + + self.mimimum = P.Minimum() + self.abs = P.Abs() + self.round = P.Round() + self.reducemean = P.ReduceMean() + self.sign = P.Sign() + self.pow = P.Pow() + self.sub = P.Sub() + self.oneslike = P.OnesLike() + + def cabs(self, inputs): + + a = P.stop_gradient(self.oneslike(inputs)) + return self.mimimum(self.abs(inputs), a) + + def _quantize_dorefa(self, x, k): + + n = self.sub(self.pow(2.0, k), 1) + return self.round(x * n) / n + + def quantize_active(self, x, bitA): + if bitA == 32: + return x + return self._quantize_dorefa(x, bitA) + + def quantize_weight(self, x, bitW, force_quantization=False): + + if bitW == 32 and not force_quantization: + return x + + if bitW == 1: + E = P.stop_gradient(self.reducemean(self.abs(x))) + return self.sign(x / E) * E + + x = P.clip_by_value(x * 0.5 + 0.5, 0.0, 1.0) + + return 2 * self._quantize_dorefa(x, bitW) - 1 + + def construct(self, inputs, filters): + + if self.data_format == 'NHWC': + inputs = nhwc_to_nchw(inputs) + + inputs = self.quantize_active(self.cabs(inputs), self.bitA) + + filters = self.quantize_weight(filters, self.bitW) + + outputs = self.conv2d(inputs, filters) + + if self.data_format == 'NHWC': + outputs = nchw_to_nhwc(outputs) + + return outputs diff --git a/tensorlayer/backend/ops/paddle_backend.py b/tensorlayer/backend/ops/paddle_backend.py index e9b37c5..f7334c0 100644 --- a/tensorlayer/backend/ops/paddle_backend.py +++ b/tensorlayer/backend/ops/paddle_backend.py @@ -20,6 +20,7 @@ uint16 = "uint16" uint32 = "uint32" uint64 = "uint64" + def _getter(init_fn, **kwargs): """Return an named eager tensor.""" raise NotImplementedError @@ -272,6 +273,7 @@ def dtypes(dt): class Maximum(object): + def __init__(self): pass @@ -280,6 +282,7 @@ class Maximum(object): class Minimum(object): + def __init__(self): pass @@ -313,7 +316,7 @@ class FlattenReshape(object): pass def __call__(self, inputs): - return pd.flatten(x=inputs, start_axis=1,stop_axis=-1) + return pd.flatten(x=inputs, start_axis=1, stop_axis=-1) class Reshape(object): @@ -504,7 +507,9 @@ def reduce_min(input_tensor, axis=None): """ raise NotImplementedError + class Pad(object): + def __init__(self, paddings, mode="REFLECT"): if mode not in ['CONSTANT', 'REFLECT', 'SYMMETRIC']: raise Exception("Unsupported mode: {}".format(mode)) @@ -516,6 +521,7 @@ class Pad(object): def __call__(self, x): raise NotImplementedError + def pad(tensor, paddings, mode='CONSTANT', constant_values=0): """ Pads a tensor. @@ -577,6 +583,7 @@ def stack(values, axis=0): class Meshgrid(object): + def __init__(self, indexing='xy'): super(Meshgrid, self).__init__() self.index = indexing @@ -886,7 +893,6 @@ class Count_nonzero(object): pass - class Resize: def __init__(self, scale, method, antialias=False, data_format='channels_last', ksize=None): @@ -943,19 +949,25 @@ class Sign(object): def __call__(self, x): raise NotImplementedError + def ceil(x): raise NotImplementedError + def multiply(x, y): raise NotImplementedError + def divide(x, y): raise NotImplementedError + def identity(x): raise NotImplementedError + class BatchToSpace(object): + def __init__(self, block_size, crops): super(BatchToSpace, self).__init__() pass @@ -965,8 +977,9 @@ class BatchToSpace(object): class DepthToSpace(object): + def __init__(self, block_size, data_format='NHWC'): pass def __call__(self, input): - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/tensorlayer/backend/ops/paddle_nn.py b/tensorlayer/backend/ops/paddle_nn.py index 47d9dd0..535b9fa 100644 --- a/tensorlayer/backend/ops/paddle_nn.py +++ b/tensorlayer/backend/ops/paddle_nn.py @@ -4,6 +4,7 @@ import paddle as pd import paddle.nn.functional as F + def padding_format(padding): """ Checks that the padding format correspond format. @@ -764,6 +765,7 @@ def depthwise_conv2d(input, filter, strides, padding, data_format=None, dilation pass + class Conv1d_transpose(object): def __init__( @@ -923,4 +925,106 @@ class BatchNorm(object): pass def __call__(self, *args, **kwargs): + raise NotImplementedError + + +class GroupConv2D(object): + + def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, groups): + pass + + def __call__(self, input, filters): + raise NotImplementedError + + +class SeparableConv1D(object): + + def __init__(self, stride, padding, data_format, dilations, out_channel, k_size, in_channel, depth_multiplier): + pass + + def __call__(self, inputs, depthwise_filters, pointwise_filters): + raise NotImplementedError + + +class SeparableConv2D(object): + + def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, in_channel, depth_multiplier): + pass + + def __call__(self, inputs, depthwise_filters, pointwise_filters): + raise NotImplementedError + + +class AdaptiveMeanPool1D(object): + + def __init__(self, output_size, data_format): + pass + + def __call__(self, input): + + raise NotImplementedError + + +class AdaptiveMeanPool2D(object): + + def __init__(self, output_size, data_format): + pass + + def __call__(self, inputs): + + raise NotImplementedError + + +class AdaptiveMeanPool3D(object): + + def __init__(self, output_size, data_format): + pass + + def __call__(self, inputs): + raise NotImplementedError + + +class AdaptiveMaxPool1D(object): + + def __init__(self, output_size, data_format): + pass + + def __call__(self, input): + + raise NotImplementedError + + +class AdaptiveMaxPool2D(object): + + def __init__(self, output_size, data_format): pass + + def __call__(self, inputs): + raise NotImplementedError + + +class AdaptiveMaxPool3D(object): + + def __init__(self, output_size, data_format): + pass + + def __call__(self, inputs): + raise NotImplementedError + + +class BinaryConv2D(object): + + def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, in_channel): + pass + + def __call__(self, inputs, filters): + raise NotImplementedError + + +class DorefaConv2D(object): + + def __init__(self, bitW, bitA, strides, padding, data_format, dilations, out_channel, k_size, in_channel): + pass + + def __call__(self, inputs, filters): + raise NotImplementedError diff --git a/tensorlayer/backend/ops/tensorflow_backend.py b/tensorlayer/backend/ops/tensorflow_backend.py index 74df53d..9d9a00f 100644 --- a/tensorlayer/backend/ops/tensorflow_backend.py +++ b/tensorlayer/backend/ops/tensorflow_backend.py @@ -291,6 +291,7 @@ def dtypes(dt): class Maximum(object): + def __init__(self): pass @@ -299,6 +300,7 @@ class Maximum(object): class Minimum(object): + def __init__(self): pass @@ -524,6 +526,7 @@ def reduce_min(input_tensor, axis=None): class Pad(object): + def __init__(self, paddings, mode="REFLECT"): if mode not in ['CONSTANT', 'REFLECT', 'SYMMETRIC']: raise Exception("Unsupported mode: {}".format(mode)) @@ -534,6 +537,7 @@ class Pad(object): outputs = tf.pad(x, self.paddings, mode=self.mode, constant_values=0) return outputs + def pad(tensor, paddings, mode='CONSTANT', constant_values=0): """ Pads a tensor. @@ -600,6 +604,7 @@ def stack(values, axis=0): class Meshgrid(object): + def __init__(self, indexing='xy'): super(Meshgrid, self).__init__() self.index = indexing @@ -931,7 +936,6 @@ class Count_nonzero(object): return tf.math.count_nonzero(input, axis=axis, keepdims=self.keepdims, dtype=self.dtype) - class Resize: def __init__(self, scale, method, antialias=False, data_format='channels_last', ksize=None): @@ -992,19 +996,25 @@ class Sign(object): def __call__(self, x): return tf.sign(x) + def ceil(x): return tf.math.ceil(x) + def multiply(x, y): return tf.multiply(x, y) + def divide(x, y): return tf.divide(x, y) + def identity(x): return tf.identity(x) + class BatchToSpace(object): + def __init__(self, block_size, crops): self.bolock_size = block_size self.crops = crops @@ -1012,7 +1022,9 @@ class BatchToSpace(object): def __call__(self, input_x): return tf.batch_to_space(input=input_x, block_shape=self.bolock_size, crops=self.crops) + class DepthToSpace(object): + def __init__(self, block_size, data_format='NHWC'): self.block_size = block_size self.data_format = data_format diff --git a/tensorlayer/backend/ops/tensorflow_nn.py b/tensorlayer/backend/ops/tensorflow_nn.py index 71d978f..5cefda3 100644 --- a/tensorlayer/backend/ops/tensorflow_nn.py +++ b/tensorlayer/backend/ops/tensorflow_nn.py @@ -5,7 +5,7 @@ import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.ops import math_ops from tensorflow.python.training import moving_averages - +from math import floor, ceil # loss function sparse_softmax_cross_entropy_with_logits = tf.nn.sparse_softmax_cross_entropy_with_logits sigmoid_cross_entropy_with_logits = tf.nn.sigmoid_cross_entropy_with_logits @@ -1517,3 +1517,330 @@ class BatchNorm(object): ) return outputs + + +class GroupConv2D(object): + + def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, groups): + self.data_format, self.padding = preprocess_2d_format(data_format, padding) + self.strides = strides + self.dilations = dilations + self.groups = groups + if self.data_format == 'NHWC': + self.channels_axis = 3 + else: + self.channels_axis = 1 + + def __call__(self, input, filters): + + if self.groups == 1: + outputs = tf.nn.conv2d( + input=input, + filters=filters, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilations=self.dilations, + ) + else: + inputgroups = tf.split(input, num_or_size_splits=self.groups, axis=self.channels_axis) + weightsgroups = tf.split(filters, num_or_size_splits=self.groups, axis=self.channels_axis) + convgroups = [] + for i, k in zip(inputgroups, weightsgroups): + convgroups.append( + tf.nn.conv2d( + input=i, + filters=k, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilations=self.dilations, + ) + ) + outputs = tf.concat(axis=self.channels_axis, values=convgroups) + + return outputs + + +class SeparableConv1D(object): + + def __init__(self, stride, padding, data_format, dilations, out_channel, k_size, in_channel, depth_multiplier): + self.data_format, self.padding = preprocess_1d_format(data_format, padding) + + if self.data_format == 'NWC': + self.spatial_start_dim = 1 + self.strides = (1, stride, stride, 1) + self.data_format = 'NHWC' + else: + self.spatial_start_dim = 2 + self.strides = (1, 1, stride, stride) + self.data_format = 'NCHW' + self.dilation_rate = (1, dilations) + + def __call__(self, inputs, depthwise_filters, pointwise_filters): + inputs = tf.expand_dims(inputs, axis=self.spatial_start_dim) + depthwise_filters = tf.expand_dims(depthwise_filters, 0) + pointwise_filters = tf.expand_dims(pointwise_filters, 0) + + outputs = tf.nn.separable_conv2d( + inputs, depthwise_filters, pointwise_filters, strides=self.strides, padding=self.padding, + dilations=self.dilation_rate, data_format=self.data_format + ) + + outputs = tf.squeeze(outputs, axis=self.spatial_start_dim) + + return outputs + + +class SeparableConv2D(object): + + def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, in_channel, depth_multiplier): + self.data_format, self.padding = preprocess_2d_format(data_format, padding) + self.strides = strides + self.dilations = (dilations[2], dilations[2]) + + def __call__(self, inputs, depthwise_filters, pointwise_filters): + + outputs = tf.nn.separable_conv2d( + inputs, depthwise_filters, pointwise_filters, strides=self.strides, padding=self.padding, + dilations=self.dilations, data_format=self.data_format + ) + + return outputs + + +class AdaptiveMeanPool1D(object): + + def __init__(self, output_size, data_format): + self.data_format, _ = preprocess_1d_format(data_format, None) + self.output_size = output_size + + def __call__(self, input): + + if self.data_format == 'NWC': + n, w, c = input.shape + else: + n, c, w = input.shape + + stride = floor(w / self.output_size) + kernel = w - (self.output_size - 1) * stride + output = tf.nn.avg_pool1d(input, ksize=kernel, strides=stride, data_format=self.data_format, padding='VALID') + + return output + + +class AdaptiveMeanPool2D(object): + + def __init__(self, output_size, data_format): + self.data_format, _ = preprocess_2d_format(data_format, None) + self.output_size = output_size + + def __call__(self, inputs): + + if self.data_format == 'NHWC': + n, h, w, c = inputs.shape + else: + n, c, h, w = inputs.shape + + out_h, out_w = self.output_size + stride_h = floor(h / out_h) + kernel_h = h - (out_h - 1) * stride_h + stride_w = floor(w / out_w) + kernel_w = w - (out_w - 1) * stride_w + + outputs = tf.nn.avg_pool2d( + inputs, ksize=(kernel_h, kernel_w), strides=(stride_h, stride_w), data_format=self.data_format, + padding='VALID' + ) + + return outputs + + +class AdaptiveMeanPool3D(object): + + def __init__(self, output_size, data_format): + self.data_format, _ = preprocess_3d_format(data_format, None) + self.output_size = output_size + + def __call__(self, inputs): + + if self.data_format == 'NDHWC': + n, d, h, w, c = inputs.shape + else: + n, c, d, h, w = inputs.shape + + out_d, out_h, out_w = self.output_size + stride_d = floor(d / out_d) + kernel_d = d - (out_d - 1) * stride_d + stride_h = floor(h / out_h) + kernel_h = h - (out_h - 1) * stride_h + stride_w = floor(w / out_w) + kernel_w = w - (out_w - 1) * stride_w + + outputs = tf.nn.avg_pool3d( + inputs, ksize=(kernel_d, kernel_h, kernel_w), strides=(stride_d, stride_h, stride_w), + data_format=self.data_format, padding='VALID' + ) + + return outputs + + +class AdaptiveMaxPool1D(object): + + def __init__(self, output_size, data_format): + self.data_format, _ = preprocess_1d_format(data_format, None) + self.output_size = output_size + + def __call__(self, input): + + if self.data_format == 'NWC': + n, w, c = input.shape + else: + n, c, w = input.shape + + stride = floor(w / self.output_size) + kernel = w - (self.output_size - 1) * stride + output = tf.nn.max_pool1d(input, ksize=kernel, strides=stride, data_format=self.data_format, padding='VALID') + + return output + + +class AdaptiveMaxPool2D(object): + + def __init__(self, output_size, data_format): + self.data_format, _ = preprocess_2d_format(data_format, None) + self.output_size = output_size + + def __call__(self, inputs): + + if self.data_format == 'NHWC': + n, h, w, c = inputs.shape + else: + n, c, h, w = inputs.shape + + out_h, out_w = self.output_size + stride_h = floor(h / out_h) + kernel_h = h - (out_h - 1) * stride_h + stride_w = floor(w / out_w) + kernel_w = w - (out_w - 1) * stride_w + + outputs = tf.nn.max_pool2d( + inputs, ksize=(kernel_h, kernel_w), strides=(stride_h, stride_w), data_format=self.data_format, + padding='VALID' + ) + + return outputs + + +class AdaptiveMaxPool3D(object): + + def __init__(self, output_size, data_format): + self.data_format, _ = preprocess_3d_format(data_format, None) + self.output_size = output_size + + def __call__(self, inputs): + + if self.data_format == 'NDHWC': + n, d, h, w, c = inputs.shape + else: + n, c, d, h, w = inputs.shape + + out_d, out_h, out_w = self.output_size + stride_d = floor(d / out_d) + kernel_d = d - (out_d - 1) * stride_d + stride_h = floor(h / out_h) + kernel_h = h - (out_h - 1) * stride_h + stride_w = floor(w / out_w) + kernel_w = w - (out_w - 1) * stride_w + + outputs = tf.nn.max_pool3d( + inputs, ksize=(kernel_d, kernel_h, kernel_w), strides=(stride_d, stride_h, stride_w), + data_format=self.data_format, padding='VALID' + ) + + return outputs + + +class BinaryConv2D(object): + + def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, in_channel): + self.data_format, self.padding = preprocess_2d_format(data_format, padding) + self.strides = strides + self.dilations = dilations + + # @tf.RegisterGradient("TL_Sign_QuantizeGrad") + # def _quantize_grad(op, grad): + # """Clip and binarize tensor using the straight through estimator (STE) for the gradient.""" + # return tf.clip_by_value(grad, -1, 1) + + def quantize(self, x): + # ref: https://github.com/AngusG/tensorflow-xnor-bnn/blob/master/models/binary_net.py#L70 + # https://github.com/itayhubara/BinaryNet.tf/blob/master/nnUtils.py + with tf.compat.v1.get_default_graph().gradient_override_map({"Sign": "TL_Sign_QuantizeGrad"}): + return tf.sign(x) + + def __call__(self, inputs, filters): + + filters = self.quantize(filters) + + outputs = tf.nn.conv2d( + input=inputs, filters=filters, strides=self.strides, padding=self.padding, data_format=self.data_format, + dilations=self.dilations + ) + + return outputs + + +class DorefaConv2D(object): + + def __init__(self, bitW, bitA, strides, padding, data_format, dilations, out_channel, k_size, in_channel): + self.data_format, self.padding = preprocess_2d_format(data_format, padding) + self.strides = strides + self.dilations = dilations + self.bitW = bitW + self.bitA = bitA + + def _quantize_dorefa(self, x, k): + G = tf.compat.v1.get_default_graph() + n = float(2**k - 1) + with G.gradient_override_map({"Round": "Identity"}): + return tf.round(x * n) / n + + def cabs(self, x): + return tf.minimum(1.0, tf.abs(x), name='cabs') + + def quantize_active(self, x, bitA): + if bitA == 32: + return x + return self._quantize_dorefa(x, bitA) + + def quantize_weight(self, x, bitW, force_quantization=False): + + G = tf.compat.v1.get_default_graph() + if bitW == 32 and not force_quantization: + return x + if bitW == 1: # BWN + with G.gradient_override_map({"Sign": "Identity"}): + E = tf.stop_gradient(tf.reduce_mean(input_tensor=tf.abs(x))) + return tf.sign(x / E) * E + x = tf.clip_by_value( + x * 0.5 + 0.5, 0.0, 1.0 + ) # it seems as though most weights are within -1 to 1 region anyways + return 2 * self._quantize_dorefa(x, bitW) - 1 + + def __call__(self, inputs, filters): + + inputs = self.quantize_active(self.cabs(inputs), self.bitA) + + filters = self.quantize_weight(filters, self.bitW) + + outputs = tf.nn.conv2d( + input=inputs, + filters=filters, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilations=self.dilations, + ) + + return outputs diff --git a/tensorlayer/cost/paddle_cost.py b/tensorlayer/cost/paddle_cost.py index d8a79f0..cd66fa7 100644 --- a/tensorlayer/cost/paddle_cost.py +++ b/tensorlayer/cost/paddle_cost.py @@ -65,9 +65,12 @@ def sigmoid_cross_entropy(output, target): Name of this loss. """ - # tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output), name=name) - depth = output.shape[-1] - label = pd.fluid.layers.one_hot(target, depth=depth) + + if output.shape[-1] == target.shape[-1]: + pass + else: + depth = output.shape[-1] + label = pd.fluid.layers.one_hot(target, depth=depth) out = pd.fluid.layers.sigmoid_cross_entropy_with_logits(x=output, label=label) out = pd.fluid.layers.reduce_mean(out) return out @@ -92,8 +95,12 @@ def binary_cross_entropy(output, target, epsilon=1e-8): - `ericjang-DRAW `__ """ - depth = output.shape[-1] - target = pd.fluid.layers.one_hot(target, depth=depth) + + if output.shape[-1] == target.shape[-1]: + pass + else: + depth = output.shape[-1] + target = pd.fluid.layers.one_hot(target, depth=depth) out = pd.fluid.layers.reduce_sum( -(target * pd.log(output + epsilon) + (1. - target) * pd.log(1. - output + epsilon)) ) @@ -123,8 +130,12 @@ def mean_squared_error(output, target, is_mean=False, axis=-1, name="mean_square - `Wiki Mean Squared Error `__ """ - depth = output.shape[-1] - target = pd.fluid.layers.one_hot(target, depth=depth) + + if output.shape[-1] == target.shape[-1]: + pass + else: + depth = output.shape[-1] + target = pd.fluid.layers.one_hot(target, depth=depth) if is_mean: mse = F.mse_loss(input=output, label=target, reduction='mean') @@ -149,7 +160,16 @@ def normalized_mean_square_error(output, target, axis=-1, name="normalized_mean_ """ - raise NotImplementedError("Not Implemented.") + if output.shape[-1] == target.shape[-1]: + pass + else: + depth = output.shape[-1] + target = pd.fluid.layers.one_hot(target, depth=depth) + + nmse_a = pd.sqrt(pd.fluid.layers.reduce_sum(pd.fluid.layers.square_error_cost(output, target), dim=axis)) + nmse_b = pd.sqrt(pd.fluid.layers.reduce_sum(pd.square(target), dim=axis)) + nmse = pd.fluid.layers.reduce_mean(nmse_a / nmse_b) + return nmse def absolute_difference_error(output, target, is_mean=False, axis=-1, name="absolute_difference_error_loss"): @@ -172,7 +192,12 @@ def absolute_difference_error(output, target, is_mean=False, axis=-1, name="abso """ - raise NotImplementedError("Not Implemented.") + + if is_mean: + loss = pd.fluid.layers.reduce_mean(pd.fluid.layers.reduce_mean(pd.abs(output - target), axis)) + else: + loss = pd.fluid.layers.reduce_mean(pd.fluid.layers.reduce_sum(pd.abs(output - target), axis)) + return loss def dice_coe(output, target, loss_type='jaccard', axis=(1, 2, 3), smooth=1e-5): @@ -207,7 +232,20 @@ def dice_coe(output, target, loss_type='jaccard', axis=(1, 2, 3), smooth=1e-5): """ - raise NotImplementedError("Not Implemented.") + axis = list(axis) + inse = pd.fluid.layers.reduce_sum(output * target, dim=axis) + if loss_type == 'jaccard': + l = pd.fluid.layers.reduce_sum(output * output, dim=axis) + r = pd.fluid.layers.reduce_sum(target * target, dim=axis) + elif loss_type == 'sorensen': + l = pd.fluid.layers.reduce_sum(output, dim=axis) + r = pd.fluid.layers.reduce_sum(target, dim=axis) + else: + raise Exception("Unknow loss_type") + + dice = (2. * inse + smooth) / (l + r + smooth) + dice = pd.fluid.layers.reduce_mean(dice) + return dice def dice_hard_coe(output, target, threshold=0.5, axis=(1, 2, 3), smooth=1e-5): @@ -234,7 +272,16 @@ def dice_hard_coe(output, target, threshold=0.5, axis=(1, 2, 3), smooth=1e-5): """ - raise NotImplementedError("Not Implemented.") + output = pd.cast(output > threshold, dtype='float32') + target = pd.cast(target > threshold, dtype='float32') + inse = pd.fluid.layers.reduce_sum(pd.multiply(output, target), dim=list(axis)) + l = pd.fluid.layers.reduce_sum(output, dim=list(axis)) + r = pd.fluid.layers.reduce_sum(target, dim=list(axis)) + + hard_dice = (2. * inse + smooth) / (l + r + smooth) + ## + hard_dice = pd.fluid.layers.reduce_mean(hard_dice) + return hard_dice def iou_coe(output, target, threshold=0.5, axis=(1, 2, 3), smooth=1e-5): @@ -261,7 +308,13 @@ def iou_coe(output, target, threshold=0.5, axis=(1, 2, 3), smooth=1e-5): """ - raise NotImplementedError("Not Implemented.") + pre = pd.cast(output > threshold, dtype='float32') + truth = pd.cast(target > threshold, dtype='float32') + inse = pd.fluid.layers.reduce_sum(pd.multiply(pre, truth), dim=axis) # AND + union = pd.fluid.layers.reduce_sum(pd.cast(pd.add(pre, truth) >= 1, dtype='float32'), dim=axis) # OR + batch_iou = (inse + smooth) / (union + smooth) + iou = pd.fluid.layers.reduce_mean(batch_iou, name='iou_coe') + return iou def sequence_loss_by_example( @@ -389,7 +442,9 @@ def cosine_similarity(v1, v2): """ - raise NotImplementedError("Not Implemented.") + return pd.fluid.layers.reduce_sum(pd.multiply(v1, v2), 1) / \ + (pd.sqrt(pd.fluid.layers.reduce_sum(pd.multiply(v1, v1), 1)) * + pd.sqrt(pd.fluid.layers.reduce_sum(pd.multiply(v2, v2), 1))) # Regularization Functions @@ -545,4 +600,4 @@ def huber_loss( """ - raise NotImplementedError("Not Implemented.") + raise NotImplementedError("Not Implemented.") \ No newline at end of file diff --git a/tensorlayer/dataflow/__init__.py b/tensorlayer/dataflow/__init__.py index d26b322..912a238 100644 --- a/tensorlayer/dataflow/__init__.py +++ b/tensorlayer/dataflow/__init__.py @@ -3,21 +3,19 @@ from __future__ import absolute_import, division, print_function from tensorlayer.backend.ops.load_backend import BACKEND +from tensorlayer.dataflow import image if BACKEND == 'tensorflow': from .tensorflow_data import * - from .tensorflow_image import * elif BACKEND == 'mindspore': from .mindspore_data import * - from .mindspore_image import * - -elif BACKEND == 'dragon': - pass elif BACKEND == 'paddle': + from .paddle_data import * + +elif BACKEND == 'dragon': pass else: raise NotImplementedError("This backend is not supported") - diff --git a/tensorlayer/dataflow/image/__init__.py b/tensorlayer/dataflow/image/__init__.py index df05229..c0568ed 100644 --- a/tensorlayer/dataflow/image/__init__.py +++ b/tensorlayer/dataflow/image/__init__.py @@ -1,2 +1,17 @@ #! /usr/bin/python # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function + +from tensorlayer.backend.ops.load_backend import BACKEND + +if BACKEND == 'tensorflow': + from .tensorflow_image import * +elif BACKEND == 'mindspore': + from .mindspore_image import * +elif BACKEND == 'paddle': + from .paddle_image import * +elif BACKEND == 'pytorch': + pass + +else: + raise NotImplementedError("This backend is not supported") diff --git a/tensorlayer/dataflow/image/mindspore_image.py b/tensorlayer/dataflow/image/mindspore_image.py new file mode 100644 index 0000000..9f10c7d --- /dev/null +++ b/tensorlayer/dataflow/image/mindspore_image.py @@ -0,0 +1,1539 @@ +import numpy as np +from PIL import Image, ImageOps, ImageEnhance, __version__ +import random +import colorsys +import numbers +import math +import io +__all__ = [ + 'CentralCrop', + 'HsvToRgb', + 'AdjustBrightness', + 'AdjustContrast', + 'AdjustHue', + 'AdjustSaturation', + 'Crop', + 'FlipHorizontal', + 'FlipVertical', + 'GrayToRgb', + 'Standardization', + 'RgbToGray', + 'PadToBoundingbox', + 'Pad', + 'RandomBrightness', + 'RandomContrast', + 'RandomHue', + 'RandomSaturation', + 'RandomCrop', + 'Resize', + 'CropAndResize', + 'CropOrPad', + 'ResizeAndPad', + 'RgbToHsv', + 'Transpose', + 'RandomRotation', + 'RandomShift', + 'RandomShear', + 'RandomZoom', + 'Rescale', + 'RandomFlipVertical', + 'RandomFlipHorizontal', + 'HWC2CHW', + 'CHW2HWC', +] + +augment_error_message = 'img should be PIL image. Got {}.' + + +def ToTensor(image): + + image = np.asarray(image).astype(np.float32) + return image + + +def ToPIL(image): + """ + Convert the input image to PIL format. + + Args: + img: Image to be converted. + + Returns: + img (PIL image), Converted image. + """ + return Image.fromarray(np.array(image).astype(np.uint8)) + + +def Decode(image): + """ + Decode the input image to PIL image format in RGB mode. + + Args: + img: Image to be decoded. + + Returns: + img (PIL image), Decoded image in RGB mode. + """ + + try: + data = io.BytesIO(image) + img = Image.open(data) + return img.convert('RGB') + except IOError as e: + raise ValueError("{0}\nWARNING: Failed to decode given image.".format(e)) + except AttributeError as e: + raise ValueError("{0}\nWARNING: Failed to decode, Image might already be decoded.".format(e)) + + +def Crop(image, offset_height, offset_width, target_height, target_width, is_hwc=True): + ''' + + Parameters + ---------- + image: + A image or a batch of images + offset_height: + Vertical coordinate of the top-left corner of the result in the input. + offset_width: + Horizontal coordinate of the top-left corner of the result in the input. + target_height: + Height of the result. + target_width: + Width of the result. + is_hwc: + If is_hwc is True, the order of image channels is [B,H,W,C] or [H,W,C]. If is_hwc is False, the order of image channels is [B,C,H,W] or [C,H,W,] + Returns: + Output [batch, target_height, target_width, channels] or [target_height, target_width, channels] + ------- + + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + shape_size = len(image.shape) + + if not shape_size in (3, 4): + raise TypeError( + 'img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \ + Got {}'.format(image.shape) + ) + if shape_size == 3: + if is_hwc: + height, width, channels = image.shape + else: + channels, height, width = image.shape + else: + if is_hwc: + batch, height, width, channels = image.shape + else: + batch, channels, height, width = image.shape + + if offset_width < 0: + raise ValueError('offset_width must be >0.') + if offset_height < 0: + raise ValueError('offset_height must be >0.') + if target_height < 0: + raise ValueError('target_height must be >0.') + if target_width < 0: + raise ValueError('target_width must be >0.') + if offset_width + target_width > width: + raise ValueError('offset_width + target_width must be <= image width.') + if offset_height + target_height > height: + raise ValueError('offset_height + target_height must be <= image height.') + + if shape_size == 3: + if is_hwc: + return ToTensor( + image[offset_height:offset_height + target_height, offset_width:offset_width + target_width, :] + ) + else: + return ToTensor( + image[:, offset_height:offset_height + target_height, offset_width:offset_width + target_width] + ) + else: + if is_hwc: + return ToTensor( + image[:, offset_height:offset_height + target_height, offset_width:offset_width + target_width, :] + ) + else: + return ToTensor( + image[:, :, offset_height:offset_height + target_height, offset_width:offset_width + target_width] + ) + + +def CentralCrop(image, central_fraction=None, size=None, is_hwc=True): + ''' + + Parameters + ---------- + image : + input Either a 3-D float Tensor of shape [height, width, depth] or a 4-D Tensor of shape [batch, height, width, depth], + central_fraction : + float (0, 1], fraction of size to crop + size: + size (Union[int, sequence]) – The output size of the cropped image. If size is an integer, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). + Returns : + 3-D float Tensor or 4-D float Tensor, as per the input. + ------- + If backend is tensorflow, central_fraction will be used preferentially. if size is used, the height-width ratio will be equivalent to original ratio.. + If backend is mindspore, size will be used preferentially. + ''' + if size is None and central_fraction is None: + raise ValueError('central_fraction and size can not be both None') + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + shape_size = len(image.shape) + if not shape_size in (3, 4): + raise TypeError( + 'img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \ + Got {}'.format(image.shape) + ) + + if shape_size == 3: + if is_hwc: + height, width, channels = image.shape + else: + channels, height, width = image.shape + else: + if is_hwc: + batch, height, width, channels = image.shape + else: + batch, channels, height, width = image.shape + if size is None: + if central_fraction > 1 or central_fraction <= 0: + raise ValueError('central_fraction must be in (0,1].') + target_height = int(round(height * central_fraction)) + target_width = int(round(width * central_fraction)) + size = (target_height, target_width) + if isinstance(size, int): + size = (size, size) + crop_height, crop_width = size + crop_top = int(round((height - crop_height) / 2.)) + crop_left = int(round((width - crop_width) / 2.)) + + return Crop(image, crop_top, crop_left, crop_height, crop_width, is_hwc) + + +def hsv_to_rgb(np_hsv_img, is_hwc): + """ + Convert HSV img to RGB img. + + Args: + np_hsv_img (numpy.ndarray): NumPy HSV image array of shape (H, W, C) or (C, H, W) to be converted. + is_hwc (Bool): If True, the shape of np_hsv_img is (H, W, C), otherwise must be (C, H, W). + + Returns: + np_rgb_img (numpy.ndarray), NumPy HSV image with same shape of np_hsv_img. + """ + if is_hwc: + h, s, v = np_hsv_img[:, :, 0], np_hsv_img[:, :, 1], np_hsv_img[:, :, 2] + else: + h, s, v = np_hsv_img[0, :, :], np_hsv_img[1, :, :], np_hsv_img[2, :, :] + to_rgb = np.vectorize(colorsys.hsv_to_rgb) + r, g, b = to_rgb(h, s, v) + + if is_hwc: + axis = 2 + else: + axis = 0 + np_rgb_img = np.stack((r, g, b), axis=axis) + return np_rgb_img + + +def HsvToRgb(image, is_hwc=True): + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + shape_size = len(image.shape) + + if not shape_size in (3, 4): + raise TypeError( + 'img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \ + Got {}'.format(image.shape) + ) + if shape_size == 3: + batch_size = 0 + if is_hwc: + num_channels = image.shape[2] + else: + num_channels = image.shape[0] + else: + batch_size = image.shape[0] + if is_hwc: + num_channels = image.shape[3] + else: + num_channels = image.shape[1] + + if num_channels != 3: + raise TypeError('img should be 3 channels RGB img. Got {} channels'.format(num_channels)) + if batch_size == 0: + return hsv_to_rgb(image, is_hwc) + return ToTensor([hsv_to_rgb(img, is_hwc) for img in image]) + + +def AdjustBrightness(image, factor): + ''' + + Parameters + ---------- + image: + input NumPy image array or PIL image + factor: + factor should be in the range (-1,1) + Returns: + ------- + np darray image + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + if factor >= 1 or factor <= -1: + raise ValueError('factor must be in (-1,1).') + image = image + factor * 255 + image = np.clip(image, 0, 255) + + return ToTensor(image) + + +def AdjustContrast(image, factor): + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + + image = ImageEnhance.Contrast(image).enhance(factor) + + return ToTensor(image) + + +def AdjustHue(image, factor): + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + + image_hue_factor = factor + if not -1 <= image_hue_factor <= 1: + raise ValueError('image_hue_factor {} is not in [-1, 1].'.format(image_hue_factor)) + + mode = image.mode + if mode in {'L', '1', 'I', 'F'}: + return image + + hue, saturation, value = image.convert('HSV').split() + + np_hue = np.array(hue, dtype=np.uint8) + + with np.errstate(over='ignore'): + np_hue += np.uint8(image_hue_factor * 255) + hue = Image.fromarray(np_hue, 'L') + + image = Image.merge('HSV', (hue, saturation, value)).convert(mode) + + return ToTensor(image) + + +def AdjustSaturation(image, factor): + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + + enhancer = ImageEnhance.Color(image) + image = enhancer.enhance(factor) + + return ToTensor(image) + + +def FlipHorizontal(image): + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + + image = np.fliplr(image) + + return image + + +def FlipVertical(image): + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + image = np.flipud(image) + + return image + + +def GrayToRgb(image): + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + shape = image.shape + output_image = np.zeros((shape[0], shape[1], 3), dtype=np.uint8) + if len(shape) == 3: + for i in range(3): + output_image[:, :, i] = image[:, :, 1] + elif len(shape) == 2: + for i in range(3): + output_image[:, :, i] = image + + return ToTensor(output_image) + + +def RgbToGray(image): + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + ''' + 将彩色图像转换为灰度(模式“L”)时,库使用ITU-R 601-2 Luma转换: + L = R * 299/1000 + G * 587/1000 + B * 114/1000 + ''' + image = image.convert('L') + return ToTensor(image) + + +def PadToBoundingbox(image, offset_height, offset_width, target_height, target_width, padding_value=0, is_hwc=True): + ''' + + Parameters + ---------- + image: + A 3-D numpy ndarray or 4-D numpy ndarray image + offset_height: + Number of rows of zeros to add on top. + offset_width: + Number of columns of zeros to add on the left. + target_height: + Height of output image. + target_width + Width of output image. + Returns + A numpy ndarray image + ------- + ''' + + if offset_height < 0: + raise ValueError("offset_height must be >= 0") + if offset_width < 0: + raise ValueError("offset_width must be >= 0") + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + shape_size = len(image.shape) + if not shape_size in (3, 4): + raise TypeError( + 'img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \ + Got {}'.format(image.shape) + ) + if shape_size == 3: + if is_hwc: + height, width, channels = image.shape + else: + channels, height, width = image.shape + else: + if is_hwc: + batch, height, width, channels = image.shape + else: + batch, channels, height, width = image.shape + top = offset_height + bottom = target_height - height - top + left = offset_width + right = target_width - width - left + + if bottom < 0: + raise ValueError("target_height must be >= offset_height + height") + + if right < 0: + raise ValueError("target_width must be >= offset_width + width") + + if shape_size == 3: + if is_hwc: + return ToTensor( + np.pad( + image, ((top, bottom), (left, right), (0, 0)), mode='constant', + constant_values=(padding_value, padding_value) + ) + ) + else: + return ToTensor( + np.pad( + image, ((0, 0), (top, bottom), (left, right)), mode='constant', + constant_values=(padding_value, padding_value) + ) + ) + else: + if is_hwc: + return ToTensor( + np.pad( + image, ((0, 0), (top, bottom), (left, right), (0, 0)), mode='constant', + constant_values=(padding_value, padding_value) + ) + ) + else: + return ToTensor( + np.pad( + image, ((0, 0), (0, 0), (top, bottom), (left, right)), mode='constant', + constant_values=(padding_value, padding_value) + ) + ) + + +def Pad(image, padding, padding_value=0, mode='constant', is_hwc=True): + ''' + + Parameters + ---------- + image: + A 3-D or 4-D Tensor. + padding: + An integer or a list/tuple. If a single number is provided, pad all borders with this value. + If a tuple or list of 2 values is provided, pad the left and top with the first value and the right and bottom with the second value. + If 4 values are provided as a list or tuple, pad the left, top, right and bottom respectively. + padding_value: + In "CONSTANT" mode, the scalar pad value to use. Must be same type as tensor. + mode: + One of "CONSTANT", "REFLECT", or "SYMMETRIC" (case-insensitive) + Returns: + A padded Tensor. Has the same type as tensor. + ------- + + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + shape_size = image.shape + if len(shape_size) == 3: + batch_size = 0 + elif len(shape_size) == 4: + batch_size = shape_size[0] + else: + raise TypeError( + 'img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \ + Got {}'.format(image.shape) + ) + if mode not in ('constant', 'edge', 'reflect', 'symmetric'): + raise TypeError('mode should be one of (constant,edge,reflect,symmetric).') + + if isinstance(padding, int): + padding = ((padding, padding), (padding, padding)) + elif isinstance(padding, list) or isinstance(padding, tuple): + if len(padding) == 2: + padding = ((padding[0], padding[0]), (padding[1], padding[1])) + elif len(padding) == 4: + padding = ((padding[0], padding[1]), (padding[2], padding[3])) + else: + raise ValueError('The length of padding should be 2 or 4, but got {}.'.format(len(padding))) + else: + raise TypeError('Padding should be an integer or a list/tuple, but got {}.'.format(type(padding))) + + if batch_size == 0: + if is_hwc: + padding = (padding[0], padding[1], (0, 0)) + else: + padding = ( + (0, 0), + padding[0], + padding[1], + ) + else: + if is_hwc: + padding = ((0, 0), padding[0], padding[1], (0, 0)) + else: + padding = ((0, 0), (0, 0), padding[0], padding[1]) + if mode == 'constant': + return ToTensor(np.pad(image, padding, mode=mode, constant_values=(padding_value, padding_value))) + else: + return ToTensor(np.pad(image, padding, mode=mode)) + + +def Standardization(image, mean=None, std=None, channel_mode=False, is_hwc=True): + ''' + + Parameters + ---------- + image: + An n-D Tensor with at least 3 dimensions, the last 3 of which are the dimensions of each image. + mean: + List or tuple of mean values for each channel, with respect to channel order. + std: + List or tuple of standard deviations for each channel. + channel_mode: + Decide to implement standardization on whole image or each channel of image. + Returns: + A Tensor with the same shape and dtype as image. + ------- + ''' + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + num_shape = image.shape + if is_hwc: + height, width, channels = 0, 1, 2 + else: + channels, height, width = 0, 1, 2 + if mean is not None and std is not None: + if len(mean) != len(std): + raise ValueError("Length of mean and std must be equal") + if len(mean) == 1: + mean = [mean[0]] * num_shape[channels] + std = [std[0]] * num_shape[channels] + mean = np.array(mean, dtype=image.dtype) + std = np.array(std, dtype=image.dtype) + return ToTensor((image - mean[:, None, None]) / std[:, None, None]) + elif mean is None and std is None: + if channel_mode: + num_pixels = num_shape[height] * num_shape[width] + image_mean = np.mean(image, axis=(height, width)) + stddev = np.std(image, axis=(height, width)) + min_sttdev = 1 / np.sqrt(num_pixels) + min_sttdev = [min_sttdev] * num_shape[channels] + adjusted_sttdev = np.maximum(stddev, min_sttdev) + image -= image_mean + image = np.divide(image, adjusted_sttdev) + return ToTensor(image) + else: + num_pixels = num_shape[height] * num_shape[width] * num_shape[channels] + image_mean = np.mean(image, axis=(0, 1, 2)) + image_mean = [image_mean] * 3 + stddev = np.std(image, axis=(0, 1, 2)) + min_sttdev = 1 / np.sqrt(num_pixels) + adjusted_sttdev = np.maximum(stddev, min_sttdev) + adjusted_sttdev = [adjusted_sttdev] * 3 + image -= image_mean + image = np.divide(image, adjusted_sttdev) + return ToTensor(image) + else: + raise ValueError('std and mean must both be None or not None') + + +def RandomBrightness(image, factor): + ''' + + Parameters + ---------- + image: + An image or images to adjust + factor: + Float, must be non-negative. Factor must be (0,1). Random range will be [-factor, factor). + Returns: + The brightness-adjusted image(s). + ------- + + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + if factor < 0 or factor > 1: + raise ValueError('factor should be in [0,1].') + delta = random.uniform(-factor, factor) + image = image + delta * 255 + image = np.clip(image, 0, 255) + + return image + + +def RandomContrast(image, lower, upper, seed=None): + ''' + + Parameters + ---------- + image: + An image tensor with 3 or more dimensions. + lower: + float. Lower bound for the random contrast factor. + upper: + float. Upper bound for the random contrast factor. + seed: + A Python integer. Used to create a random seed. + + Returns: + The contrast-adjusted image(s). + ------- + ''' + if upper <= lower: + raise ValueError('upper must be > lower') + if lower < 0: + raise ValueError('lower must be non-negative') + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + + factor = random.uniform(lower, upper) + image = ImageEnhance.Contrast(image).enhance(factor) + + return ToTensor(image) + + +def RandomHue(image, factor, seed=None): + ''' + + Parameters + ---------- + image: + RGB image or images. The size of the last dimension must be 3. + factor: + float. The maximum value for the random factor. + seed: + An operation-specific seed. I + + Returns: + Adjusted numpy ndarrry image(s). + ------- + + ''' + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + + if factor > 0.5 or factor < 0: + raise ValueError('factor should be in [0,0.5].') + + image_hue_factor = random.uniform(-factor, factor) + mode = image.mode + if mode in {'L', '1', 'I', 'F'}: + return image + + hue, saturation, value = image.convert('HSV').split() + + np_hue = np.array(hue, dtype=np.uint8) + + with np.errstate(over='ignore'): + np_hue += np.uint8(image_hue_factor * 255) + hue = Image.fromarray(np_hue, 'L') + + image = Image.merge('HSV', (hue, saturation, value)).convert(mode) + + return ToTensor(image) + + +def RandomSaturation(image, lower, upper, seed=None): + ''' + Parameters + ---------- + image: + RGB image or images. The size of the last dimension must be 3. + lower: + float. Lower bound for the random saturation factor. + upper: + float. Upper bound for the random saturation factor. + seed: + An operation-specific seed. + + Returns; + Adjusted numpy ndarray image(s). + ------- + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + if upper <= lower: + raise ValueError('upper must be > lower.') + + if lower < 0: + raise ValueError('lower must be non-negative.') + factor = random.uniform(lower, upper) + enhancer = ImageEnhance.Color(image) + image = enhancer.enhance(factor) + + return ToTensor(image) + + +def RandomCrop(image, size, is_hwc=True): + ''' + + Parameters + ---------- + image: + Input an image to crop. + size: + if size is an integer, shape of cropped image will be [size, size, 3]. if length of size is 2. + shape of cropped image will be [height, width, 3]. + Returns: + A cropped image of the same rank as image and shape size. + ------- + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + if isinstance(size, int): + size = (size, size) + elif isinstance(size, (tuple, list)) and len(size) == 2: + size = size + else: + raise ValueError("Size should be a single integer or a list/tuple (h, w) of length 2.") + + def _input_to_factor_(image, size, is_hwc): + if len(image.shape) == 3: + if is_hwc: + height, width, channels = image.shape + else: + channels, height, width = image.shape + else: + if is_hwc: + batch, height, width, channels = image.shape + else: + batch, channels, height, width = image.shape + + target_height, target_width = size + if target_height > height or target_width > width: + raise ValueError("Crop size {} is larger than input image size {}".format(size, (height, width))) + if target_height == height and target_width == width: + return 0, 0, height, width + + top = random.randint(0, height - target_height) + left = random.randint(0, width - target_width) + return top, left, target_height, target_width + + top, left, height, width = _input_to_factor_(image, size, is_hwc) + + return Crop(image, top, left, height, width, is_hwc) + + +def Resize(image, size, method='bilinear', preserve_aspect_ratio=False, antialias=False): + ''' + + Parameters + ---------- + images: + Input an image to resize + size: + if size is an integer, shape of resized image will be [size, size, 3]. if length of size is 2. + shape of resized image will be [height, width, 3]. + method: + An image.ResizeMethod, or string equivalent. Defaults to bilinear. + preserve_aspect_ratio: + Whether to preserve the aspect ratio. + antialias: + Whether to use an anti-aliasing filter when downsampling an image. + Returns: + an resized image + ------- + ''' + DE_PY_INTER_MODE = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'cubic': Image.CUBIC, + 'lanczos': Image.LANCZOS, + 'bicubic': Image.BICUBIC + } + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + if isinstance(size, int): + size = (size, size) + elif isinstance(size, (tuple, list)) or len(size) == 2: + target_height, target_width = size + size = (target_width, target_height) + else: + raise ValueError("Size should be a single integer or a list/tuple (h, w) of length 2.") + if method not in ('nearest', 'bilinear', 'cubic', 'lanczos', 'bicubic'): + raise TypeError('Unknown resize method! resize method must be in (nearest bilinear cubic lanczos bicubic)') + + if preserve_aspect_ratio: + width, height = image.size + target_width, target_height = size + scale_factor_height = float(target_height / height) + scale_factor_width = float(target_width / width) + scale_factor = np.minimum(scale_factor_height, scale_factor_width) + new_target_height = int(scale_factor * height) + new_target_width = int(scale_factor * width) + size = (new_target_width, new_target_height) + interpolation = DE_PY_INTER_MODE[method] + image = image.resize(size, interpolation) + if antialias: + image = image.resize(size, Image.ANTIALIAS) + + return ToTensor(image) + + +def CropAndResize(image, boxes, box_indices, crop_size, method='bilinear', extrapolation_value=0, is_hwc=True): + ''' + + Parameters + ---------- + image: + A 4-D tensor of shape [batch, image_height, image_width, depth]. Both image_height and image_width need to be positive. + boxes: + A 2-D tensor of shape [num_boxes, 4]. + box_indices: + A 1-D tensor of shape [num_boxes] with int32 values in [0,batch). + The value of box_ind[i] specifies the image that the i-th box refers to. + crop_size: + A 1-D tensor of 2 elements, size = [crop_height, crop_width]. All cropped image patches are resized to this size. + The aspect ratio of the image content is not preserved. Both crop_height and crop_width need to be positive. + method: + An optional string specifying the sampling method for resizing. + It can be either "bilinear" or "nearest" and default to "bilinear". + extrapolation_value: + An optional float. Defaults to 0. Value used for extrapolation, when applicable. + Returns: + A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. + ------- + + ''' + if method not in ["bilinear", "nearest"]: + raise ValueError('method must be bilinear or nearest.') + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + boxes = np.asarray(boxes) + box_indices = np.asarray(box_indices) + image_shape = image.shape + if len(image_shape) == 4: + batch_size = image_shape[0] + elif len(image_shape) == 3: + image = np.expand_dims(image, axis=0) + else: + raise ValueError('Input must be a 3-D or 4-D image Tensor.') + + box_num = boxes.shape[0] # boxes.shape is [n,4]. n is the number of boxes. + if not is_hwc: # 判断通道顺序,为了便于后续计算,将通道顺序调整为HWC or BHWC + image = np.transpose(image, (0, 2, 3, 1)) + batch_size, height, width, channels = image.shape + return_image = np.zeros((box_num, crop_size[0], crop_size[1], 3)) + for i in range(box_num): + y1, x1, y2, x2 = boxes[i] # 首先判断图像是否需要翻转 , 若y1>y2 需要垂直翻转, 若x1>x2 需要水平翻转 + cur_image = image[box_indices[i]] + if y1 > y2: + cur_image = FlipVertical(cur_image) + y1, y2 = y2, y1 + if x1 > x2: + cur_image = FlipHorizontal(cur_image) + x1, x2 = x2, x1 + top_padding = 0 if y1 > 0 else int(round(height * (-y1))) + left_padding = 0 if x1 > 0 else int(round(width * (-x1))) + bottom_padding = 0 if y2 < 1 else int(round(height * (y2 - 1))) + right_padding = 0 if x2 < 1 else int(round(width * (x2 - 1))) + # 判断是否需要padding + target_height = top_padding + height + bottom_padding + target_width = left_padding + width + right_padding + if target_height != height or target_width != width: + cur_image = PadToBoundingbox( + cur_image, offset_height=top_padding, offset_width=left_padding, target_height=target_height, + target_width=target_width, padding_value=extrapolation_value, is_hwc=is_hwc + ) + offset_height = 0 if y1 < 0 else int(round(height * y1)) + offset_width = 0 if x1 < 0 else int(round(width * x1)) + target_height = int(round(height * (y2 - y1))) + target_width = int(round(width * (x2 - x1))) + crop_image = Crop(cur_image, offset_height, offset_width, target_height, target_width, is_hwc) + resized_image = Resize(crop_image, crop_size, method=method) + return_image[i] = resized_image + if not is_hwc: + return_image = np.transpose(return_image, (0, 3, 1, 2)) + return ToTensor(return_image) + + +def CropOrPad(image, target_height, target_width, is_hwc=True): + ''' + Resizes an image to a target width and height by either centrally cropping the image or padding it evenly with zeros. + Parameters + ---------- + image: + 3-D Tensor of shape [height, width, channels]. + target_height: + Target height. + target_width: + Target width. + Returns: + Cropped and/or padded image. + ------- + ''' + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + shape_size = len(image.shape) + if not shape_size in (3, 4): + raise TypeError( + 'img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \ + Got {}'.format(image.shape) + ) + if target_height < 0: + raise ValueError('target_height must be >0.') + if target_width < 0: + raise ValueError('target_width must be >0.') + if shape_size == 3: + if is_hwc: + height, width, channels = image.shape + else: + channels, height, width = image.shape + else: + if is_hwc: + batch, height, width, channels = image.shape + else: + batch, channels, height, width = image.shape + offset_height = height - target_height + offset_width = width - target_width + offset_crop_height = max(offset_height // 2, 0) + offset_crop_width = max(offset_width // 2, 0) + offset_pad_height = max(-offset_height // 2, 0) + offset_pad_width = max(-offset_width // 2, 0) + cropped = Crop( + image, offset_crop_height, offset_crop_width, min(height, target_height), min(width, target_width), is_hwc + ) + + padded = PadToBoundingbox(cropped, offset_pad_height, offset_pad_width, target_height, target_width, is_hwc=is_hwc) + + return ToTensor(padded) + + +def ResizeAndPad(image, target_height, target_width, method='bilinear', antialias=False, is_hwc=True): + ''' + + Parameters + ---------- + image: + 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]. + target_height: + Target height. + target_width: + Target height. + is_hwc: + The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True). + Returns: + Resized and padded image. If images was 4-D, a 4-D float Tensor of shape [batch, new_height, new_width, channels]. + If images was 3-D, a 3-D float Tensor of shape [new_height, new_width, channels]. + ------- + + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + shape_size = len(image.shape) + if not shape_size in (3, 4): + raise TypeError( + 'img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \ + Got {}'.format(image.shape) + ) + if target_height < 0: + raise ValueError('target_height must be >0.') + if target_width < 0: + raise ValueError('target_width must be >0.') + if shape_size == 3: + if is_hwc: + height, width, channels = image.shape + else: + channels, height, width = image.shape + else: + if is_hwc: + batch, height, width, channels = image.shape + else: + batch, channels, height, width = image.shape + height = float(height) + width = float(width) + ratio = max(height / target_height, width / target_width) + resized_height = int(round(height / ratio)) + resized_width = int(round(width / ratio)) + padding_height = max(0, int(round((target_height - resized_height) / 2))) + padding_width = max(0, int(round((target_width - resized_width) / 2))) + resized = Resize( + image, size=(resized_height, resized_width), method=method, antialias=antialias + ) #需要解决 batch images的resize + padded = PadToBoundingbox(resized, padding_height, padding_width, target_height, target_width, is_hwc=is_hwc) + return ToTensor(padded) + + +def rgb_to_hsv(np_rgb_img, is_hwc): + """ + Convert RGB img to HSV img. + + Args: + np_rgb_img (numpy.ndarray): NumPy RGB image array of shape (H, W, C) or (C, H, W) to be converted. + is_hwc (Bool): If True, the shape of np_hsv_img is (H, W, C), otherwise must be (C, H, W). + + Returns: + np_hsv_img (numpy.ndarray), NumPy HSV image with same type of np_rgb_img. + """ + if is_hwc: + r, g, b = np_rgb_img[:, :, 0], np_rgb_img[:, :, 1], np_rgb_img[:, :, 2] + else: + r, g, b = np_rgb_img[0, :, :], np_rgb_img[1, :, :], np_rgb_img[2, :, :] + to_hsv = np.vectorize(colorsys.rgb_to_hsv) + h, s, v = to_hsv(r, g, b) + if is_hwc: + axis = 2 + else: + axis = 0 + np_hsv_img = np.stack((h, s, v), axis=axis) + return np_hsv_img + + +def RgbToHsv(image, is_hwc=True): + + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + + shape_size = len(image.shape) + + if not shape_size in (3, 4): + raise TypeError( + 'img shape should be (H, W, C)/(N, H, W, C)/(C ,H, W)/(N, C, H, W). \ + Got {}'.format(image.shape) + ) + + if shape_size == 3: + batch_size = 0 + if is_hwc: + num_channels = image.shape[2] + else: + num_channels = image.shape[0] + else: + batch_size = image.shape[0] + if is_hwc: + num_channels = image.shape[3] + else: + num_channels = image.shape[1] + + if num_channels != 3: + raise TypeError('img should be 3 channels RGB img. Got {} channels'.format(num_channels)) + if batch_size == 0: + return ToTensor(rgb_to_hsv(image, is_hwc)) + return ToTensor([rgb_to_hsv(img, is_hwc) for img in image]) + + +def Transpose(image, order): + """ + Transpose the input image with order + """ + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + if len(image.shape) == 3: + if len(order) != 3: + raise ValueError('if image is 3-D tensor, order should be a list/tuple with length of 3') + return ToTensor(np.transpose(image, order)) + elif len(image.shape) == 4: + if len(order) != 3: + raise ValueError('if image is 3-D tensor, order should be a list/tuple with length of 3') + return ToTensor(np.transpose(image, order)) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') + + +def RandomRotation( + image, degrees, fill_mode='nearest', fill_value=0, center=None, expand=False, is_hwc=True, interpolation_order=1 +): + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + + if isinstance(degrees, numbers.Number): + if degrees < 0: + raise ValueError("If degrees is a single number, it cannot be negative.") + degrees = (-degrees, degrees) + elif isinstance(degrees, (list, tuple)): + if len(degrees) != 2: + raise ValueError("If degrees is a sequence, the length must be 2.") + else: + raise TypeError("Degrees must be a single non-negative number or a sequence") + + DE_PY_INTER_MODE = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'antialias': Image.ANTIALIAS, + 'bicubic': Image.BICUBIC + } + if fill_mode not in ('nearest', 'bilinear', 'antialias', 'bicubic'): + raise TypeError('Fill_mode must be in (nearest,bilinear, antialias,bicubic)') + + if isinstance(fill_value, int): + fill_value = tuple([fill_value] * 3) + + angle = random.uniform(degrees[0], degrees[1]) + fill_mode = DE_PY_INTER_MODE[fill_mode] + return ToTensor(image.rotate(angle, fill_mode, expand, center, fillcolor=fill_value)) + + +def RandomShift(image, shift, fill_mode='nearest', fill_value=0, is_hwc=True, interpolation_order=1): + ''' + + Parameters + ---------- + image + Input tensor. Must be 3D. + shift: + int or list/tuple, if shift is int, Width shift range will equal to height shift range. + if shift is list/tuple, shift range will be [width fraction, height fraction] + is_hwc: + The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True). + fill_mode: + Points outside the boundaries of the input are filled according to the given mode (one of {'constant', 'nearest', 'reflect', 'wrap'}). + fill_value: + Value used for points outside the boundaries of the input if mode='constant'. + interpolation_order + int, order of spline interpolation. see ndimage.interpolation.affine_transform + Returns + Shifted Numpy image tensor. + ------- + + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + + if isinstance(shift, numbers.Number): + width_fraction = shift + height_fraction = shift + elif isinstance(shift, list) or isinstance(shift, tuple): + if len(shift) == 2: + width_fraction = shift[0] + height_fraction = shift[1] + else: + raise ValueError('shift must be int or list/tuple of length 2') + + DE_PY_INTER_MODE = {'nearest': Image.NEAREST, 'bilinear': Image.BILINEAR, 'bicubic': Image.BICUBIC} + if fill_mode not in ('nearest', 'bilinear', 'bicubic'): + raise TypeError('Fill_mode must be in (nearest,bilinear,bicubic)') + fill_mode = DE_PY_INTER_MODE[fill_mode] + width, height = image.size + max_dx = width_fraction * width + max_dy = height_fraction * height + translations = (np.round(random.uniform(-max_dx, max_dx)), np.round(random.uniform(-max_dy, max_dy))) + + scale = 1.0 + shear = 0.0 + output_size = image.size + center = (width * 0.5 + 0.5, height * 0.5 + 0.5) + + angle = math.radians(0) + shear = math.radians(shear) + shear = [shear, 0] + scale = 1.0 / scale + d = math.cos(angle + shear[0]) * math.cos(angle + shear[1]) + \ + math.sin(angle + shear[0]) * math.sin(angle + shear[1]) + matrix = [ + math.cos(angle + shear[0]), + math.sin(angle + shear[0]), 0, -math.sin(angle + shear[1]), + math.cos(angle + shear[1]), 0 + ] + matrix = [scale / d * m for m in matrix] + matrix[2] += matrix[0] * (-center[0] - translations[0]) + matrix[1] * (-center[1] - translations[1]) + matrix[5] += matrix[3] * (-center[0] - translations[0]) + matrix[4] * (-center[1] - translations[1]) + + # Apply center translation: C * RSS^-1 * C^-1 * T^-1 + matrix[2] += center[0] + matrix[5] += center[1] + + if __version__ >= '5': + kwargs = {"fillcolor": fill_value} + else: + kwargs = {} + return ToTensor(image.transform(output_size, Image.AFFINE, matrix, fill_mode, **kwargs)) + + +def RandomShear(image, degree, fill_mode='nearest', fill_value=0, is_hwc=True, interpolation_order=1): + ''' + + Parameters + ---------- + image + Input tensor. Must be 3D. + shift: + int or list/tuple, if shift is int, Width shift range will equal to height shift range. + if shift is list/tuple, shift range will be [width fraction, height fraction] + is_hwc: + The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True). + fill_mode: + Points outside the boundaries of the input are filled according to the given mode (one of {'constant', 'nearest', 'reflect', 'wrap'}). + fill_value: + Value used for points outside the boundaries of the input if mode='constant'. + interpolation_order + int, order of spline interpolation. see ndimage.interpolation.affine_transform + Returns + Shifted Numpy image tensor. + ------- + + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + DE_PY_INTER_MODE = {'nearest': Image.NEAREST, 'bilinear': Image.BILINEAR, 'bicubic': Image.BICUBIC} + if fill_mode not in ('nearest', 'bilinear', 'bicubic'): + raise TypeError('Fill_mode must be in (nearest,bilinear,bicubic)') + fill_mode = DE_PY_INTER_MODE[fill_mode] + width, height = image.size + translations = (0, 0) + scale = 1.0 + shear = degree + output_size = image.size + center = (width * 0.5 + 0.5, height * 0.5 + 0.5) + angle = math.radians(0) + + if shear is not None: + if isinstance(shear, numbers.Number): + shear = (-1 * shear, shear) + shear = [random.uniform(shear[0], shear[1]), random.uniform(shear[0], shear[1])] + elif len(shear) == 2 or len(shear) == 4: + if len(shear) == 2: + shear = [shear[0], shear[1], shear[0], shear[1]] + elif len(shear) == 4: + shear = [s for s in shear] + shear = [random.uniform(shear[0], shear[1]), random.uniform(shear[2], shear[3])] + else: + raise ValueError( + "Shear should be a single value or a tuple/list containing " + "two values. Got {}".format(shear) + ) + shear = [math.radians(s) for s in shear] + else: + shear = [0, 0] + + + d = math.cos(angle + shear[0]) * math.cos(angle + shear[1]) + \ + math.sin(angle + shear[0]) * math.sin(angle + shear[1]) + matrix = [ + math.cos(angle + shear[0]), + math.sin(angle + shear[0]), 0, -math.sin(angle + shear[1]), + math.cos(angle + shear[1]), 0 + ] + matrix = [scale / d * m for m in matrix] + matrix[2] += matrix[0] * (-center[0] - translations[0]) + matrix[1] * (-center[1] - translations[1]) + matrix[5] += matrix[3] * (-center[0] - translations[0]) + matrix[4] * (-center[1] - translations[1]) + + # Apply center translation: C * RSS^-1 * C^-1 * T^-1 + matrix[2] += center[0] + matrix[5] += center[1] + + if __version__ >= '5': + kwargs = {"fillcolor": fill_value} + else: + kwargs = {} + return ToTensor(image.transform(output_size, Image.AFFINE, matrix, fill_mode, **kwargs)) + + +def RandomZoom(image, zoom_range, fill_mode='nearest', fill_value=0, is_hwc=True, interpolation_order=1): + ''' + + Parameters + ---------- + image: + Input tensor. Must be 3D. + zoom_range: + Tuple of floats; zoom range for width and height. + is_hwc: + The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True). + fill_mode: + Points outside the boundaries of the input are filled according to the given mode (one of {'constant', 'nearest', 'reflect', 'wrap'}). + fill_value: + Value used for points outside the boundaries of the input if mode='constant'. + interpolation_order: + int, order of spline interpolation. see ndimage.interpolation.affine_transform + + Returns + Zoomed Numpy image tensor. + ------- + + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, np.ndarray): + image = ToPIL(image) + if not isinstance(image, Image.Image): + raise TypeError(augment_error_message.format(type(image))) + DE_PY_INTER_MODE = {'nearest': Image.NEAREST, 'bilinear': Image.BILINEAR, 'bicubic': Image.BICUBIC} + if isinstance(zoom_range, list) or isinstance(zoom_range, tuple): + if len(zoom_range) == 2: + scale = random.uniform(zoom_range[0], zoom_range[1]) + else: + raise ValueError('The length of zoom_range must be 2') + else: + raise ValueError( + "Zoom_range should be a single value or a tuple/list containing " + "two values. Got {}".format(zoom_range) + ) + if fill_mode not in ('nearest', 'bilinear', 'bicubic'): + raise TypeError('Fill_mode must be in (nearest,bilinear,bicubic)') + fill_mode = DE_PY_INTER_MODE[fill_mode] + width, height = image.size + translations = (0, 0) + shear = (0, 0) + output_size = image.size + center = (width * 0.5 + 0.5, height * 0.5 + 0.5) + angle = math.radians(0) + + d = math.cos(angle + shear[0]) * math.cos(angle + shear[1]) + \ + math.sin(angle + shear[0]) * math.sin(angle + shear[1]) + matrix = [ + math.cos(angle + shear[0]), + math.sin(angle + shear[0]), 0, -math.sin(angle + shear[1]), + math.cos(angle + shear[1]), 0 + ] + matrix = [scale / d * m for m in matrix] + matrix[2] += matrix[0] * (-center[0] - translations[0]) + matrix[1] * (-center[1] - translations[1]) + matrix[5] += matrix[3] * (-center[0] - translations[0]) + matrix[4] * (-center[1] - translations[1]) + + # Apply center translation: C * RSS^-1 * C^-1 * T^-1 + matrix[2] += center[0] + matrix[5] += center[1] + + if __version__ >= '5': + kwargs = {"fillcolor": fill_value} + else: + kwargs = {} + return ToTensor(image.transform(output_size, Image.AFFINE, matrix, fill_mode, **kwargs)) + + +def Rescale(image, scale, offset=0): + ''' + + Parameters + ---------- + image: + 3-D image or 4-D images + scale: + Float, the scale to apply to the inputs. + offset: + Float, the offset to apply to the inputs. + Returns: + rescaled images + ------- + ''' + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + + return ToTensor(image * scale + offset) + + +def RandomFlipVertical(image, prob=0.5): + + if prob > random.random(): + image = FlipVertical(image) + return image + + +def RandomFlipHorizontal(image, prob=0.5): + + if prob > random.random(): + image = FlipHorizontal(image) + return image + + +def HWC2CHW(image): + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + + image_shape = image.shape + if (len(image_shape) == 3): + return Transpose(image, (2, 0, 1)) + elif (len(image_shape) == 4): + return Transpose(image, (0, 3, 1, 2)) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') + + +def CHW2HWC(image): + if not isinstance(image, np.ndarray) and not isinstance(image, Image.Image): + image = Decode(image) + if isinstance(image, Image.Image) or isinstance(image, np.ndarray): + image = ToTensor(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + + image_shape = image.shape + if (len(image_shape) == 3): + return Transpose(image, (1, 2, 0)) + elif (len(image_shape) == 4): + return Transpose(image, (0, 2, 3, 1)) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') diff --git a/tensorlayer/dataflow/image/paddle_image.py b/tensorlayer/dataflow/image/paddle_image.py new file mode 100644 index 0000000..b33ef15 --- /dev/null +++ b/tensorlayer/dataflow/image/paddle_image.py @@ -0,0 +1,19 @@ +import paddle +import numpy as np +from PIL import Image +from paddle.vision.transforms import functional as F + +__all_ = [ + 'Standardization', +] + + +def Standardization(img, mean, std, data_format='HWC'): + + if data_format == 'CHW': + mean = paddle.to_tensor(mean).reshape([-1, 1, 1]) + std = paddle.to_tensor(std).reshape([-1, 1, 1]) + else: + mean = paddle.to_tensor(mean) + std = paddle.to_tensor(std) + return (img - mean) / std diff --git a/tensorlayer/dataflow/image/tensorflow_image.py b/tensorlayer/dataflow/image/tensorflow_image.py new file mode 100644 index 0000000..ca0ce41 --- /dev/null +++ b/tensorlayer/dataflow/image/tensorflow_image.py @@ -0,0 +1,760 @@ +import tensorflow as tf +import numpy as np +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import array_ops +from tensorflow.python.framework import ops +from tensorflow.python.ops.image_ops_impl import _AssertAtLeast3DImage +from tensorflow.python.framework import dtypes +from tensorflow.python.ops.image_ops_impl import convert_image_dtype +import numbers + +__all__ = [ + 'CentralCrop', + 'HsvToRgb', + 'AdjustBrightness', + 'AdjustContrast', + 'AdjustHue', + 'AdjustSaturation', + 'Crop', + 'FlipHorizontal', + 'FlipVertical', + 'GrayToRgb', + 'Standardization', + 'RgbToGray', + 'PadToBoundingbox', + 'Pad', + 'RandomBrightness', + 'RandomContrast', + 'RandomHue', + 'RandomSaturation', + 'RandomCrop', + 'Resize', + 'CropAndResize', + 'CropOrPad', + 'ResizeAndPad', + 'RgbToHsv', + 'Transpose', + 'RandomRotation', + 'RandomShift', + 'RandomShear', + 'RandomZoom', + 'Rescale', + 'RandomFlipVertical', + 'RandomFlipHorizontal', + 'HWC2CHW', + 'CHW2HWC', +] + + +def CentralCrop(image, central_fraction=None, size=None): + ''' + + Parameters + ---------- + image : + input Either a 3-D float Tensor of shape [height, width, depth], + or a 4-D Tensor of shape [batch_size, height, width, depth]. + central_fraction : + float (0, 1], fraction of size to crop + size: + size (Union[int, sequence]) – The output size of the cropped image. If size is an integer, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). + Returns : + 3-D / 4-D float Tensor, as per the input. + ------- + If backend is tensorflow, central_fraction will be used preferentially. if size is used,the height-width ratio will be equivalent to original ratio.. + If backend is mindspore, size will be used preferentially. + ''' + if size is None and central_fraction is None: + raise ValueError('central_fraction and size can not be both None') + + if central_fraction is None: + outshape = np.shape(image) + if len(outshape) == 3: + h_axis = 0 + w_axis = 1 + elif len(outshape) == 4: + h_axis = 1 + w_axis = 2 + + if isinstance(size, numbers.Number): + target_height = size + target_width = size + elif isinstance(size, tuple) or isinstance(size, list): + if len(size) == 2: + target_height = size[0] + target_width = size[1] + else: + raise ValueError('The length of size must be 2') + else: + raise ValueError("Size should be a single integer or a list/tuple (h, w) of length 2.") + if target_height > outshape[h_axis] or target_width > outshape[w_axis]: + raise ValueError("Centralcrop image size must < original image size.") + central_fraction = max(target_height / outshape[h_axis], target_width / outshape[w_axis]) + else: + if central_fraction > 1 or central_fraction <= 0: + raise ValueError('central_fraction must be in (0,1].') + + return tf.image.central_crop(image, central_fraction) + + +def HsvToRgb(image): + + return tf.image.hsv_to_rgb(image) + + +def AdjustBrightness(image, factor): + + return tf.image.adjust_brightness(image, delta=factor) + + +def AdjustContrast(image, factor): + + return tf.image.adjust_contrast(image, contrast_factor=factor) + + +def AdjustHue(image, factor): + + return tf.image.adjust_hue(image, delta=factor) + + +def AdjustSaturation(image, factor): + + return tf.image.adjust_saturation(image, saturation_factor=factor) + + +def Crop(image, offset_height, offset_width, target_height, target_width, is_hwc=True): + ''' + + Parameters + ---------- + image: + A image or a batch of images + offset_height: + Vertical coordinate of the top-left corner of the result in the input. + offset_width: + Horizontal coordinate of the top-left corner of the result in the input. + target_height: + Height of the result. + target_width: + Width of the result. + + Returns: + Output [batch, target_height, target_width, channels] or [target_height, target_width, channels] + ------- + ''' + + return tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width) + + +def FlipHorizontal(image): + + return tf.image.flip_left_right(image) + + +def FlipVertical(image): + + return tf.image.flip_up_down(image) + + +def GrayToRgb(image): + + return tf.image.grayscale_to_rgb(image) + + +def RgbToGray(image): + + return tf.image.rgb_to_grayscale(image) + + +def PadToBoundingbox(image, offset_height, offset_width, target_height, target_width, padding_value=0, is_hwc=True): + + return tf.image.pad_to_bounding_box( + image, + offset_height, + offset_width, + target_height, + target_width, + ) + + +def Pad(image, padding, padding_value=0, mode='constant'): + ''' + + Parameters + ---------- + image: + A 3-D or 4-D Tensor. + padding: + An integer or a list/tuple. If a single number is provided, pad all borders with this value. + If a tuple or list of 2 values is provided, pad the left and top with the first value and the right and bottom with the second value. + If 4 values are provided as a list or tuple, pad the (top, bottom, left, right) respectively. + padding_value: + In "CONSTANT" mode, the scalar pad value to use. Must be same type as tensor. + mode: + One of "CONSTANT", "REFLECT", or "SYMMETRIC" (case-insensitive) + Returns: + A padded Tensor. Has the same type as tensor. + ------- + + ''' + image_shape = image.shape + if len(image_shape) == 3: + batch_size = 0 + elif len(image_shape) == 4: + batch_size = image_shape[0] + else: + raise TypeError('Image must be a 3-D tensor or 4-D tensor.') + + if isinstance(padding, int): + padding = ((padding, padding), (padding, padding)) + elif isinstance(padding, list) or isinstance(padding, tuple): + if len(padding) == 2: + padding = ((padding[0], padding[0]), (padding[1], padding[1])) + elif len(padding) == 4: + padding = ((padding[0], padding[1]), (padding[2], padding[3])) + else: + raise ValueError('The length of padding should be 2 or 4, but got {}.'.format(len(padding))) + else: + raise TypeError('Padding should be an integer or a list/tuple, but got {}.'.format(type(padding))) + if batch_size == 0: + padding = (padding[0], padding[1], (0, 0)) + else: + padding = ((0, 0), padding[0], padding[1], (0, 0)) + + return tf.pad(image, padding, mode=mode, constant_values=padding_value) + + +def Standardization(image, mean=None, std=None, channel_mode=False): + ''' + + Parameters + ---------- + image: + An n-D Tensor with at least 3 dimensions, the last 3 of which are the dimensions of each image. + mean: + List or tuple of mean values for each channel, with respect to channel order. + std: + List or tuple of standard deviations for each channel. + channel_mode: + Decide to implement standardization on whole image or each channel of image. + Returns: + A Tensor with the same shape and dtype as image. + ------- + ''' + image = tf.cast(image, tf.float32) + with ops.name_scope(None, 'Standardization', [image]) as scope: + image = ops.convert_to_tensor(image, name='image') + image = _AssertAtLeast3DImage(image) + + orig_dtype = image.dtype + if orig_dtype not in [dtypes.float16, dtypes.float32]: + image = convert_image_dtype(image, dtypes.float32) + + if mean is not None and std is not None: + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) + image -= mean + image = math_ops.divide(image, std, name=scope) + return convert_image_dtype(image, orig_dtype, saturate=True) + + elif mean is None and std is None: + if channel_mode: + num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:-1]) + #`num_pixels` is the number of elements in each channels of 'image' + image_mean = math_ops.reduce_mean(image, axis=[-2, -3], keepdims=True) + # `image_mean` is the mean of elements in each channels of 'image' + + stddev = math_ops.reduce_std(image, axis=[-2, -3], keepdims=True) + min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, image.dtype)) + adjusted_sttdev = math_ops.maximum(stddev, min_stddev) + + image -= image_mean + image = math_ops.divide(image, adjusted_sttdev, name=scope) + return convert_image_dtype(image, orig_dtype, saturate=True) + + else: + num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:]) + #`num_pixels` is the number of elements in `image` + image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True) + + # Apply a minimum normalization that protects us against uniform images. + stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True) + min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, image.dtype)) + adjusted_stddev = math_ops.maximum(stddev, min_stddev) + + image -= image_mean + image = math_ops.divide(image, adjusted_stddev, name=scope) + return convert_image_dtype(image, orig_dtype, saturate=True) + else: + raise ValueError('std and mean must both be None or not None') + + +def RandomBrightness(image, factor): + ''' + + Parameters + ---------- + image: + An image or images to adjust + factor: + Float, must be non-negative. Factor must be (0,1). Random range will be [-factor, factor). + Returns: + The brightness-adjusted image(s). + ------- + + ''' + + return tf.image.random_brightness(image, factor) + + +def RandomContrast(image, lower, upper, seed=None): + ''' + + Parameters + ---------- + image: + An image tensor with 3 or more dimensions. + lower: + float. Lower bound for the random contrast factor. + upper: + float. Upper bound for the random contrast factor. + seed: + A Python integer. Used to create a random seed. + + Returns: + The contrast-adjusted image(s). + ------- + ''' + + return tf.image.random_contrast(image, lower, upper, seed) + + +def RandomHue(image, factor, seed=None): + ''' + + Parameters + ---------- + image: + RGB image or images. The size of the last dimension must be 3. + factor: + float. The maximum value for the random factor. + seed: + An operation-specific seed. + + Returns: + Adjusted image(s), same shape and DType as `image`. + ------- + + ''' + + return tf.image.random_hue(image, factor, seed) + + +def RandomSaturation(image, lower, upper, seed=None): + ''' + Parameters + ---------- + image: + RGB image or images. The size of the last dimension must be 3. + lower: + float. Lower bound for the random saturation factor. + upper: + float. Upper bound for the random saturation factor. + seed: + An operation-specific seed. + + Returns: + Adjusted image(s), same shape and DType as `image`. + ------- + ''' + + return tf.image.random_saturation(image, lower, upper, seed) + + +def RandomCrop(image, size): + ''' + + Parameters + ---------- + image: + Input an image to crop. + size: + a list or tuple. if size is an integer, shape of cropped image will be [size, size, 3]. if length of size is 2. + shape of cropped image will be [height, width, 3]. + Returns: + A cropped image of the same rank as image and shape size. + ------- + ''' + + if isinstance(size, int): + crop_size = (size, size) + elif isinstance(size, (list, tuple)) and len(size) == 2: + crop_size = (size[0], size[1]) + else: + raise ValueError("Size should be a single integer or a list/tuple (h, w) of length 2.") + + if len(image.shape) == 3: + h, w, c = image.shape + crop_size = crop_size + (c, ) + elif len(image.shape) == 4: + b, h, w, c = image.shape + crop_size = (b, ) + crop_size + (c, ) + + return tf.image.random_crop(image, size=crop_size) + + +def Resize(image, size, method='bilinear', preserve_aspect_ratio=False, antialias=False): + ''' + + Parameters + ---------- + images: + Input an image to resize + size: + if size is an integer, shape of resized image will be [size, size, 3]. if length of size is 2. + shape of resized image will be [height, width, 3]. + method: + An image.ResizeMethod, or string equivalent shoulid be in + (bilinear, lanczos3, lanczos5, bicubic, gaussian, nearest, area, mitchellcubic). + Defaults to bilinear. + preserve_aspect_ratio: + Whether to preserve the aspect ratio. + antialias: + Whether to use an anti-aliasing filter when downsampling an image. + Returns: + an resized image + ------- + + ''' + if isinstance(size, int): + size = [size, size] + elif len(size) != 2: + raise ValueError('Size should be a single integer or a list/tuple (h, w) of length 2.') + + return tf.image.resize(image, size, method, preserve_aspect_ratio, antialias) + + +def CropAndResize(image, boxes, box_indices, crop_size, method='bilinear', extrapolation_value=0, is_hwc=True): + ''' + + Parameters + ---------- + image: + A 4-D tensor of shape [batch, image_height, image_width, depth]. Both image_height and image_width need to be positive. + boxes: + A 2-D tensor of shape [num_boxes, 4]. + box_indices: + A 1-D tensor of shape [num_boxes] with int32 values in [0,batch). + The value of box_ind[i] specifies the image that the i-th box refers to. + crop_size: + A 1-D tensor of 2 elements, size = [crop_height, crop_width]. All cropped image patches are resized to this size. + The aspect ratio of the image content is not preserved. Both crop_height and crop_width need to be positive. + method: + An optional string specifying the sampling method for resizing. + It can be either "bilinear" or "nearest" and default to "bilinear". + extrapolation_value: + An optional float. Defaults to 0. Value used for extrapolation, when applicable. + Returns: + A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. + ------- + + ''' + image_shape = image.shape + boxes_num = 0 + if isinstance(boxes, tf.Tensor): + boxes_num = boxes.shape[0] + elif isinstance(boxes, np.ndarray) or isinstance(boxes, list) or isinstance(boxes, tuple): + boxes = tf.constant(boxes) + boxes_num = boxes.shape[0] + + if isinstance(crop_size, int): + crop_size = (crop_size, crop_size) + crop_size = tf.constant(crop_size) + elif isinstance(crop_size, np.ndarray) or isinstance(crop_size, list) or isinstance(crop_size, tuple): + crop_size = tf.constant(crop_size) + + if isinstance(box_indices, np.ndarray) or isinstance(box_indices, list) or isinstance(box_indices, tuple): + box_indices = tf.constant(box_indices) + # if input is an image. + # a 3-D Tensor of shape [image_height, image_width, depth] should use 'tf.expand_dims(image, axis = 0)' + # to convert input to a 4-D Tensor of shape [batch_size,image_height, image_width, depth] + if len(image_shape) == 3: + image = tf.expand_dims(image, axis=0) + box_indices = np.zeros((boxes_num), dtype=np.int) + box_indices = tf.constant(box_indices) + + return tf.image.crop_and_resize( + image, boxes=boxes, box_indices=box_indices, crop_size=crop_size, method=method, + extrapolation_value=extrapolation_value + ) + + +def CropOrPad(image, target_height, target_width, is_hwc=True): + ''' + Resizes an image to a target width and height by either centrally cropping the image or padding it evenly with zeros. + Parameters + ---------- + image: + 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]. + target_height: + Target height. + target_width: + Target width. + Returns: + Cropped and/or padded image. + ------- + ''' + + return tf.image.resize_with_crop_or_pad(image, target_height, target_width) + + +def ResizeAndPad(image, target_height, target_width, method='bilinear', antialias=False, is_hwc=True): + ''' + + Parameters + ---------- + image: + 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]. + target_height: + Target height. + target_width: + Target height. + is_hwc: + The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True). + Returns: + Resized and padded image. If images was 4-D, a 4-D float Tensor of shape [batch, new_height, new_width, channels]. + If images was 3-D, a 3-D float Tensor of shape [new_height, new_width, channels]. + ------- + + ''' + + return tf.image.resize_with_pad(image, target_height, target_width, method=method, antialias=antialias) + + +def RgbToHsv(image): + + return tf.image.rgb_to_hsv(image) + + +def Transpose(image, order): + image = ops.convert_to_tensor(image) + image = _AssertAtLeast3DImage(image) + shape = image.get_shape() + if shape.ndims == 3 or shape.ndims is None: + if len(order) != 3: + raise ValueError('if image is 3-D tensor, order should be a list/tuple with length of 3') + return array_ops.transpose(image, order) + elif shape.ndims == 4: + if len(order) != 4: + raise ValueError('if image is 4-D tensor, order should be a list/tuple with length of 4') + return array_ops.transpose(image, order) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') + + +def RandomRotation( + image, degrees, fill_mode='nearest', fill_value=0, center=None, expand=False, is_hwc=True, interpolation_order=1 +): + if isinstance(image, tf.Tensor): + image = np.asarray(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + if is_hwc: + h, w, c = 0, 1, 2 + else: + h, w, c = 1, 2, 0 + if fill_mode not in ('constant', 'nearest', 'reflect', 'wrap'): + raise TypeError('fill_mode must be in (constant, nearest, reflect, wrap)') + + image = tf.keras.preprocessing.image.random_rotation( + image, degrees, h, w, c, fill_mode, fill_value, interpolation_order + ) + return tf.convert_to_tensor(image) + + +def RandomShift(image, shift, fill_mode='nearest', fill_value=0, is_hwc=True, interpolation_order=1): + ''' + + Parameters + ---------- + image + Input tensor. Must be 3D. + shift: + int or list/tuple, if shift is int, Width shift range will equal to height shift range. + if shift is list/tuple, shift range will be [width fraction, height fraction] + is_hwc: + The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True). + fill_mode: + Points outside the boundaries of the input are filled according to the given mode (one of {'constant', 'nearest', 'reflect', 'wrap'}). + fill_value: + Value used for points outside the boundaries of the input if mode='constant'. + interpolation_order + int, order of spline interpolation. see ndimage.interpolation.affine_transform + Returns + Shifted Numpy image tensor. + ------- + + ''' + if isinstance(image, tf.Tensor): + image = np.asarray(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + if isinstance(shift, numbers.Number): + width_fraction = shift + height_fraction = shift + elif isinstance(shift, list) or isinstance(shift, tuple): + if len(shift) == 2: + width_fraction = shift[0] + height_fraction = shift[1] + else: + raise ValueError('shift must be number or list/tuple of length 2') + + if is_hwc: + h, w, c = 0, 1, 2 + else: + h, w, c = 1, 2, 0 + if fill_mode not in ('constant', 'nearest', 'reflect', 'wrap'): + raise TypeError('fill_mode must be in (constant, nearest, reflect, wrap)') + + image = tf.keras.preprocessing.image.random_shift( + image, wrg=width_fraction, hrg=height_fraction, row_axis=h, col_axis=w, channel_axis=c, fill_mode=fill_mode, + cval=fill_value, interpolation_order=interpolation_order + ) + + return tf.convert_to_tensor(image) + + +def RandomShear(image, degree, fill_mode='nearest', fill_value=0, is_hwc=True, interpolation_order=1): + ''' + + Parameters + ---------- + image + Input tensor. Must be 3D. + degree: + Transformation intensity in degrees. + is_hwc: + The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True). + fill_mode: + Points outside the boundaries of the input are filled according to the given mode (one of {'constant', 'nearest', 'reflect', 'wrap'}). + fill_value: + Value used for points outside the boundaries of the input if mode='constant'. + interpolation_order + int, order of spline interpolation. see ndimage.interpolation.affine_transform + Returns + Shifted Numpy image tensor. + ------- + + ''' + if isinstance(image, tf.Tensor): + image = np.asarray(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + if is_hwc: + h, w, c = 0, 1, 2 + else: + h, w, c = 1, 2, 0 + + image = tf.keras.preprocessing.image.random_shear( + image, intensity=degree, row_axis=h, col_axis=w, channel_axis=c, fill_mode=fill_mode, cval=fill_value, + interpolation_order=interpolation_order + ) + return tf.convert_to_tensor(image) + + +def RandomZoom(image, zoom_range, fill_mode='nearest', fill_value=0, is_hwc=True, interpolation_order=1): + ''' + + Parameters + ---------- + image: + Input tensor. Must be 3D. + zoom_range: + Tuple of floats; zoom range for width and height. + is_hwc: + The flag of image shape, (H, W, C) or (N, H, W, C) if True and (C, H, W) or (N, C, H, W) if False (default=True). + fill_mode: + Points outside the boundaries of the input are filled according to the given mode (one of {'constant', 'nearest', 'reflect', 'wrap'}). + fill_value: + Value used for points outside the boundaries of the input if mode='constant'. + interpolation_order: + int, order of spline interpolation. see ndimage.interpolation.affine_transform + + Returns + Zoomed Numpy image tensor. + ------- + + ''' + if isinstance(image, tf.Tensor): + image = np.asarray(image) + if not isinstance(image, np.ndarray): + raise TypeError('img should be NumPy image. Got {}'.format(type(image))) + if isinstance(zoom_range, numbers.Number): + zoom_range = (zoom_range, zoom_range) + elif isinstance(zoom_range, list) or isinstance(zoom_range, tuple): + if len(zoom_range) == 2: + zoom_range = (zoom_range[0], zoom_range[1]) + else: + raise ValueError('shift must be number or list/tuple of length 2') + if is_hwc: + h, w, c = 0, 1, 2 + else: + h, w, c = 1, 2, 0 + + image = tf.keras.preprocessing.image.random_zoom( + image, zoom_range=zoom_range, row_axis=h, col_axis=w, channel_axis=c, fill_mode=fill_mode, cval=fill_value, + interpolation_order=interpolation_order + ) + return tf.convert_to_tensor(image) + + +def Rescale(image, scale, offset=0): + ''' + + Parameters + ---------- + image: + 3-D image or 4-D images + scale: + Float, the scale to apply to the inputs. + offset: + Float, the offset to apply to the inputs. + Returns: + rescaled images + ------- + ''' + image = tf.cast(image, dtype=tf.float32) + scale = tf.cast(scale, dtype=tf.float32) + offset = tf.cast(offset, dtype=tf.float32) + return image * scale + offset + + +def RandomFlipVertical(image): + + return tf.image.random_flip_up_down(image) + + +def RandomFlipHorizontal(image): + + return tf.image.random_flip_left_right(image) + + +def HWC2CHW(image): + + if (len(image.shape) == 3): + return Transpose(image, (2, 0, 1)) + elif (len(image.shape) == 4): + return Transpose(image, (0, 3, 1, 2)) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') + + +def CHW2HWC(image): + + if (len(image.shape) == 3): + return Transpose(image, (1, 2, 0)) + elif (len(image.shape) == 4): + return Transpose(image, (0, 2, 3, 1)) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') diff --git a/tensorlayer/dataflow/mindspore_data.py b/tensorlayer/dataflow/mindspore_data.py index fab1261..54e275f 100644 --- a/tensorlayer/dataflow/mindspore_data.py +++ b/tensorlayer/dataflow/mindspore_data.py @@ -21,6 +21,7 @@ __all__ = [ 'Take', 'TextFlieDataset', 'TFRecordDataset', + 'Dataloader', ] @@ -158,10 +159,8 @@ def Prefetch(dataset, buffer_size): return dataset.config.set_prefetch_size(prefetch_size) - def Repeat(dataset, count=None): - return dataset.repeat(count) @@ -275,3 +274,14 @@ def Zip(datasets): ''' return ds.zip(datasets) + + +def Dataloader(dataset, batch_size, shuffle=False, drop_last=False, prefetch=0, shuffle_buffer_size=0): + + if shuffle: + dataset = Shuffle(dataset, buffer_size=shuffle_buffer_size) + + dataset = Batch(dataset, batch_size=batch_size, drop_remainder=drop_last) + dataset = Prefetch(dataset, buffer_size=prefetch) + + return dataset diff --git a/tensorlayer/dataflow/mindspore_image.py b/tensorlayer/dataflow/mindspore_image.py deleted file mode 100644 index e4c1fd9..0000000 --- a/tensorlayer/dataflow/mindspore_image.py +++ /dev/null @@ -1,305 +0,0 @@ -import mindspore.dataset as ms -import mindspore.dataset.vision.c_transforms as c_vision -import mindspore.dataset.vision.py_transforms as py_vision -import mindspore.dataset.vision.py_transforms_util as py_util -import numpy as np -from PIL import Image, ImageOps, ImageEnhance, __version__ - -__all__ = [ - 'CentralCrop', 'HsvToRgb', 'AdjustBrightness', 'AdjustContrast', 'AdjustHue', 'Crop', 'FlipHorizontal', - 'FlipVertical', 'GrayToRgb', 'RgbToGray', 'PadToBoundingBox' -] - -augment_error_message = 'img should be PIL image. Got {}. Use Decode() for encoded data or ToPIL() for decoded data.' - - -def CentralCrop(image, central_fraction=None, size=None): - ''' - - Parameters - ---------- - image : - input Either a 3-D float Tensor of shape [height, width, depth], - or a 4-D Tensor of shape [batch_size, height, width, depth]. - central_fraction : - float (0, 1], fraction of size to crop - size: - size (Union[int, sequence]) – The output size of the cropped image. If size is an integer, a square crop of size (size, size) is returned. - If size is a sequence of length 2, it should be (height, width). - Returns : - 3-D / 4-D float Tensor, as per the input. - ------- - ''' - if size is None and central_fraction is None: - raise ValueError('central_fraction and size can not be both None') - - if size is None: - outshape = np.shape(image) - if len(outshape) == 3: - h_axis = 0 - w_axis = 1 - elif len(outshape) == 4: - h_axis = 1 - w_axis = 2 - - height = outshape[h_axis] - width = outshape[w_axis] - - target_height = height * central_fraction - target_width = width * central_fraction - - size = (target_height, target_width) - - return py_util.center_crop(image, size) - - -def HsvToRgb(image, is_hwc=True): - - image = np.asarray(image) - - return py_util.hsv_to_rgbs(image, is_hwc=is_hwc) - - -def AdjustBrightness(image, factor): - ''' - - Parameters - ---------- - image: - input NumPy image array or PIL image - factor: - factor should be in the range (-1,1) - Returns: - ------- - np darray image - ''' - - image = np.asarray(image) - image = image / 255 - image = image + factor - index = np.where(image > 1) - image[index] = 1 - index = np.where(image < 0) - image[index] = 0 - image = image * 255 - - return image - - -def AdjustContrast(image, factor): - - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - if not isinstance(image, Image.Image): - raise TypeError(augment_error_message.format(type(image))) - - image = ImageEnhance.Contrast(image).enhance(factor) - - image = np.array(image) - - return image - - -def AdjustHue(image, factor): - - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - if not isinstance(image, Image.Image): - raise TypeError(augment_error_message.format(type(image))) - - image_hue_factor = factor - if not -1 <= image_hue_factor <= 1: - raise ValueError('image_hue_factor {} is not in [-1, 1].'.format(image_hue_factor)) - - if not isinstance(image, Image.Image): - raise TypeError(augment_error_message.format(type(image))) - - mode = image.mode - if mode in {'L', '1', 'I', 'F'}: - return image - - hue, saturation, value = image.convert('HSV').split() - - np_hue = np.array(hue, dtype=np.uint8) - - with np.errstate(over='ignore'): - np_hue += np.uint8(image_hue_factor * 255) - hue = Image.fromarray(np_hue, 'L') - - image = Image.merge('HSV', (hue, saturation, value)).convert(mode) - return image - - -def AdjustSaturation(image, factor): - - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - if not isinstance(image, Image.Image): - raise TypeError(augment_error_message.format(type(image))) - - enhancer = ImageEnhance.Color(image) - image = enhancer.enhance(factor) - return image - - -def Crop(image, offset_height, offset_width, target_height, target_width): - - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - if not isinstance(image, Image.Image): - raise TypeError(augment_error_message.format(type(image))) - image = np.array( - image.crop((offset_width, offset_height, offset_width + target_width, offset_width + target_height)) - ) - return image - - -def FlipHorizontal(image): - - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - if not isinstance(image, Image.Image): - raise TypeError(augment_error_message.format(type(image))) - - image = np.array(image.transpose(Image.FLIP_LEFT_RIGHT)) - - return image - - -def FlipVertical(image): - - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - if not isinstance(image, Image.Image): - raise TypeError(augment_error_message.format(type(image))) - - image = np.array(image.transpose(Image.FLIP_TOP_BOTTOM)) - - return image - - -def GrayToRgb(image): - - image = np.asarray(image) - shape = image.shape - output_image = np.zeros((shape[0], shape[1], 3), dtype=np.uint8) - if len(shape) == 3: - for i in range(3): - output_image[:, :, i] = image[:, :, 1] - elif len(shape) == 2: - for i in range(3): - output_image[:, :, i] = image - - return output_image - - -def RgbToGray(image): - - if isinstance(image, np.ndarray): - image = Image.fromarray(image) - if not isinstance(image, Image.Image): - raise TypeError(augment_error_message.format(type(image))) - ''' - 将彩色图像转换为灰度(模式“L”)时,库使用ITU-R 601-2 Luma转换: - L = R * 299/1000 + G * 587/1000 + B * 114/1000 - ''' - image = image.convert('L') - image = np.asarray(image) - - return image - - -def PadToBoundingBox(image, offset_height, offset_width, target_height, target_width): - ''' - - Parameters - ---------- - image: - A PIL image - offset_height: - Number of rows of zeros to add on top. - offset_width: - Number of columns of zeros to add on the left. - target_height: - Height of output image. - target_width - Width of output image. - Returns - A numpy ndarray image - ------- - ''' - - if offset_height < 0: - raise ValueError("offset_height must be >= 0") - if offset_width < 0: - raise ValueError("offset_width must be >= 0") - image = np.array(image) - shape = image.shape - top = offset_height - bottom = target_height - shape[0] - top - left = offset_width - right = target_width - shape[1] - left - - if bottom < 0: - raise ValueError("target_height must be >= offset_height + height") - - if right < 0: - raise ValueError("target_width must be >= offset_width + width") - - return np.pad(image, ((top, bottom), (left, right), (0, 0)), mode='constant') - - -def Standardization(image, mean=None, std=None, channel_mode=False): - ''' - - Parameters - ---------- - image: - An n-D Tensor with at least 3 dimensions, the last 3 of which are the dimensions of each image. - mean: - List or tuple of mean values for each channel, with respect to channel order. - std: - List or tuple of standard deviations for each channel. - channel_mode: - Decide to implement standardization on whole image or each channel of image. - Returns: - A Tensor with the same shape and dtype as image. - ------- - ''' - image = np.array(image, dtype=np.float32) - num_shape = image.shape - if mean is not None and std is not None: - if len(mean) != len(std): - raise ValueError("Length of mean and std must be equal") - if len(mean) == 1: - mean = [mean[0]] * num_shape[2] - std = [std[0]] * num_shape[2] - mean = np.array(mean, dtype=image.dtype) - std = np.array(std, dtype=image.dtype) - return (image - mean[:, None, None]) / std[:, None, None] - elif mean is None and std is None: - if channel_mode: - num_pixels = num_shape[0] * num_shape[1] - image_mean = np.mean(image, axis=(0, 1)) - stddev = np.std(image, axis=(0, 1)) - min_sttdev = 1 / np.sqrt(num_pixels) - min_sttdev = [min_sttdev] * num_shape[2] - adjusted_sttdev = np.maximum(stddev, min_sttdev) - - image -= image_mean - image = np.divide(image, adjusted_sttdev) - return image - else: - num_pixels = num_shape[0] * num_shape[1] * num_shape[2] - image_mean = np.mean(image, axis=(0, 1, 2)) - image_mean = [image_mean] * 3 - stddev = np.std(image, axis=(0, 1, 2)) - min_sttdev = 1 / np.sqrt(num_pixels) - adjusted_sttdev = np.maximum(stddev, min_sttdev) - adjusted_sttdev = [adjusted_sttdev] * 3 - - image -= image_mean - image = np.divide(image, adjusted_sttdev) - return image - else: - raise ValueError('std and mean must both be None or not None') diff --git a/tensorlayer/dataflow/paddle_data.py b/tensorlayer/dataflow/paddle_data.py new file mode 100644 index 0000000..d001d56 --- /dev/null +++ b/tensorlayer/dataflow/paddle_data.py @@ -0,0 +1,131 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- + +import numpy as np +import paddle +from paddle.io import Dataset, BatchSampler, DataLoader, IterableDataset +__all__ = [ + 'Concat', + 'FromGenerator', + 'FromSlices', + 'Map', + # 'Shuffle', + # 'Batch', + 'Dataloader', +] + + +def to_list(value): + if value is None: + return value + if isinstance(value, (list, tuple)): + return list(value) + return [value] + + +class FromGenerator(Dataset): + + def __init__(self, generator): + + if not callable(generator): + raise TypeError("'generator' must be callable") + self.generator = generator() + self.datas = [] + self.labels = [] + for data, label in self.generator: + self.datas.append(data) + self.labels.append(label) + + def __getitem__(self, idx): + + x = self.datas[idx] + y = self.labels[idx] + + return x, y + + def __len__(self): + + return self.datas.shape[0] + + +class FromSlices(Dataset): + + def __init__(self, datas, transform = None): + self.datas = datas[0] + self.labels = datas[1] + self.transform = transform + + if len(self.datas) != len(self.labels): + raise ValueError('Datas and labels not have same shape of the 1st dimension.') + + def __getitem__(self, idx): + data = paddle.to_tensor(self.datas[idx], dtype='float32') + label = paddle.to_tensor(self.labels[idx], dtype='int64') + if self.transform is not None: + data = self.transform(data) + return data, label + + def __len__(self): + + return len(self.datas) + + +class Concat(IterableDataset): + + def __init__(self, datasets): + self.datasets = list(datasets) + assert len(self.datasets) > 0, "input datasets shoule not be empty" + for i, dataset in enumerate(self.datasets): + assert isinstance(dataset, IterableDataset), \ + "ChainDataset only support paddle.io.IterableDataset" + + def __iter__(self): + for dataset in self.datasets: + for sample in dataset: + yield sample + + +class Map(Dataset): + + def __init__(self, dataset, transform): + self.isDataset = False + self.transform = transform + if isinstance(dataset, Dataset): + self.isDataset = True + self.dataset = dataset + elif isinstance(dataset, list) or isinstance(dataset, tuple): + self.datas = dataset[0] + self.labels = dataset[1] + else: + raise TypeError( + " 'dataset' should be subclass instance of paddle.io.Dataset " + "or a [data, label] list/tulpe, not a {}".format(type(dataset)) + ) + + def __getitem__(self, idx): + if self.isDataset: + x = self.dataset[idx][0] + if not isinstance(x, np.ndarray): + x = np.asarray(x) + x = self.transform(x) + y = self.dataset[idx][1] + else: + x = self.datas[idx] + if not isinstance(x, np.ndarray): + x = np.asarray(x) + x = self.transform(x) + y = self.labels[idx] + + return x, y + + def __len__(self): + + if self.isDataset: + return len(self.dataset[0]) + else: + return len(self.datas) + + +def Dataloader(dataset, batch_size=None, shuffle=False, drop_last=False, prefetch=0, shuffle_buffer_size=0): + + return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last) diff --git a/tensorlayer/dataflow/tensorflow_data.py b/tensorlayer/dataflow/tensorflow_data.py index ce50c77..4da229a 100644 --- a/tensorlayer/dataflow/tensorflow_data.py +++ b/tensorlayer/dataflow/tensorflow_data.py @@ -21,6 +21,7 @@ __all__ = [ 'TextFlieDataset', 'TFRecordDataset', 'Zip', + 'Dataloader', ] @@ -252,3 +253,14 @@ def Zip(datasets): ''' return tf.data.Dataset.zip(datasets) + + +def Dataloader(dataset, batch_size, shuffle=False, drop_last=False, prefetch=0, shuffle_buffer_size=1024): + + if shuffle: + dataset = Shuffle(dataset, buffer_size=shuffle_buffer_size, reshuffle_each_iteration=True) + + dataset = Batch(dataset, batch_size=batch_size, drop_remainder=drop_last) + dataset = Prefetch(dataset, buffer_size=prefetch) + + return dataset diff --git a/tensorlayer/dataflow/tensorflow_image.py b/tensorlayer/dataflow/tensorflow_image.py deleted file mode 100644 index 39419b9..0000000 --- a/tensorlayer/dataflow/tensorflow_image.py +++ /dev/null @@ -1,200 +0,0 @@ -import tensorflow as tf -import numpy as np -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import array_ops -from tensorflow.python.framework import ops -from tensorflow.python.ops.image_ops_impl import _AssertAtLeast3DImage -from tensorflow.python.framework import dtypes -from tensorflow.python.ops.image_ops_impl import convert_image_dtype -__all__ = [ - 'CentralCrop', - 'HsvToRgb', - 'AdjustBrightness', - 'AdjustContrast', - 'AdjustHue', - 'AdjustSaturation', - 'Crop', - 'FlipHorizontal', - 'FlipVertical', - 'GrayToRgb', - 'Standardization', -] - - -def CentralCrop(image, central_fraction=None, size=None): - ''' - - Parameters - ---------- - image : - input Either a 3-D float Tensor of shape [height, width, depth], - or a 4-D Tensor of shape [batch_size, height, width, depth]. - central_fraction : - float (0, 1], fraction of size to crop - size: - size (Union[int, sequence]) – The output size of the cropped image. If size is an integer, a square crop of size (size, size) is returned. - If size is a sequence of length 2, it should be (height, width). - Returns : - 3-D / 4-D float Tensor, as per the input. - ------- - ''' - if size is None and central_fraction is None: - raise ValueError('central_fraction and size can not be both None') - - if central_fraction is None: - outshape = np.shape(image) - if len(outshape) == 3: - h_axis = 0 - w_axis = 1 - elif len(outshape) == 4: - h_axis = 1 - w_axis = 2 - - if isinstance(size, int): - target_height = size - target_width = size - elif isinstance(size, tuple): - target_height = size[0] - target_width = size[1] - - central_fraction = max(target_height // outshape[h_axis], target_width // outshape[w_axis]) - - return tf.image.central_crop(image, central_fraction) - - -def HsvToRgb(image): - - return tf.image.hsv_to_rgb(image) - - -def AdjustBrightness(image, factor): - - return tf.image.adjust_brightness(image, delta=factor) - - -def AdjustContrast(image, factor): - - return tf.image.adjust_contrast(image, contrast_factor=factor) - - -def AdjustHue(image, factor): - - return tf.image.adjust_hue(image, delta=factor) - - -def AdjustSaturation(image, factor): - - return tf.image.adjust_saturation(image, saturation_factor=factor) - - -def Crop(image, offset_height, offset_width, target_height, target_width): - ''' - - Parameters - ---------- - image: - A image or a batch of images - offset_height: - Vertical coordinate of the top-left corner of the result in the input. - offset_width: - Horizontal coordinate of the top-left corner of the result in the input. - target_height: - Height of the result. - target_width: - Width of the result. - - Returns: - Output [batch, target_height, target_width, channels] or [target_height, target_width, channels] - ------- - ''' - - return tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width) - - -def FlipHorizontal(image): - - return tf.image.flip_left_right(image) - - -def FlipVertical(image): - - return tf.image.flip_up_down(image) - - -def GrayToRgb(image): - - return tf.image.grayscale_to_rgb(image) - - -def RgbToGray(image): - - return tf.image.rgb_to_grayscale(image) - - -def PadToBoundingBox(image, offset_height, offset_width, target_height, target_width): - - return tf.image.pad_to_bounding_box(image, offset_height, offset_width, target_height, target_width) - - -def Standardization(image, mean=None, std=None, channel_mode=False): - ''' - - Parameters - ---------- - image: - An n-D Tensor with at least 3 dimensions, the last 3 of which are the dimensions of each image. - mean: - List or tuple of mean values for each channel, with respect to channel order. - std: - List or tuple of standard deviations for each channel. - channel_mode: - Decide to implement standardization on whole image or each channel of image. - Returns: - A Tensor with the same shape and dtype as image. - ------- - ''' - with ops.name_scope(None, 'Standardization', [image]) as scope: - image = ops.convert_to_tensor(image, name='image') - image = _AssertAtLeast3DImage(image) - - orig_dtype = image.dtype - if orig_dtype not in [dtypes.float16, dtypes.float32]: - image = convert_image_dtype(image, dtypes.float32) - - if mean is not None and std is not None: - mean = np.array(mean, dtype=np.float32) - std = np.array(std, dtype=np.float32) - image -= mean - image = math_ops.divide(image, std, name=scope) - return convert_image_dtype(image, orig_dtype, saturate=True) - - elif mean is None and std is None: - if channel_mode: - num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:-1]) - #`num_pixels` is the number of elements in each channels of 'image' - image_mean = math_ops.reduce_mean(image, axis=[-2, -3], keepdims=True) - # `image_mean` is the mean of elements in each channels of 'image' - - stddev = math_ops.reduce_std(image, axis=[-2, -3], keepdims=True) - min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, image.dtype)) - adjusted_sttdev = math_ops.maximum(stddev, min_stddev) - - image -= image_mean - image = math_ops.divide(image, adjusted_sttdev, name=scope) - return convert_image_dtype(image, orig_dtype, saturate=True) - - else: - num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:]) - #`num_pixels` is the number of elements in `image` - image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True) - - # Apply a minimum normalization that protects us against uniform images. - stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True) - min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, image.dtype)) - adjusted_stddev = math_ops.maximum(stddev, min_stddev) - - image -= image_mean - image = math_ops.divide(image, adjusted_stddev, name=scope) - return convert_image_dtype(image, orig_dtype, saturate=True) - else: - raise ValueError('std and mean must both be None or not None') diff --git a/tensorlayer/layers/convolution/__init__.py b/tensorlayer/layers/convolution/__init__.py index 668736e..12aaa14 100644 --- a/tensorlayer/layers/convolution/__init__.py +++ b/tensorlayer/layers/convolution/__init__.py @@ -9,16 +9,16 @@ layers that allow user to apply ``tf.ops.lrn`` on ``network.outputs``. More functions can be found in `TensorFlow API `__. """ -# from .binary_conv import * +from .binary_conv import * from .deformable_conv import * from .depthwise_conv import * -# from .dorefa_conv import * +from .dorefa_conv import * # from .expert_conv import * # from .expert_deconv import * -# from .group_conv import * +from .group_conv import * from .quan_conv import * from .quan_conv_bn import * -# from .separable_conv import * +from .separable_conv import * from .simplified_conv import * # from .simplified_deconv import * from .super_resolution import * @@ -52,7 +52,7 @@ __all__ = [ # 'AtrousDeConv2d', # binary - # 'BinaryConv2d', + 'BinaryConv2d', # deformable 'DeformableConv2d', @@ -61,14 +61,14 @@ __all__ = [ 'DepthwiseConv2d', # dorefa - # 'DorefaConv2d', + 'DorefaConv2d', # group - # 'GroupConv2d', + 'GroupConv2d', # separable - # 'SeparableConv1d', - # 'SeparableConv2d', + 'SeparableConv1d', + 'SeparableConv2d', # subpixel 'SubpixelConv1d', diff --git a/tensorlayer/layers/convolution/binary_conv.py b/tensorlayer/layers/convolution/binary_conv.py new file mode 100644 index 0000000..e5ab6c5 --- /dev/null +++ b/tensorlayer/layers/convolution/binary_conv.py @@ -0,0 +1,155 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- + +import tensorlayer as tl +from tensorlayer import logging +from tensorlayer.layers.core import Module +from tensorlayer.backend import BACKEND + +__all__ = [ + 'BinaryConv2d', +] + + +class BinaryConv2d(Module): + """ + The :class:`BinaryConv2d` class is a 2D binary CNN layer, which weights are either -1 or 1 while inference. + + Note that, the bias vector would not be binarized. + + Parameters + ---------- + n_filter : int + The number of filters. + filter_size : tuple of int + The filter size (height, width). + strides : tuple of int + The sliding window strides of corresponding input dimensions. + It must be in the same order as the ``shape`` parameter. + act : activation function + The activation function of this layer. + padding : str + The padding algorithm type: "SAME" or "VALID". + data_format : str + "channels_last" (NHWC, default) or "channels_first" (NCHW). + dilation_rate : tuple of int + Specifying the dilation rate to use for dilated convolution. + W_init : initializer + The initializer for the the weight matrix. + b_init : initializer or None + The initializer for the the bias vector. If None, skip biases. + in_channels : int + The number of in channels. + name : None or str + A unique layer name. + + Examples + --------- + With TensorLayer + + >>> net = tl.layers.Input([8, 100, 100, 32], name='input') + >>> binaryconv2d = tl.layers.BinaryConv2d( + ... n_filter=64, filter_size=(3, 3), strides=(2, 2), act=tl.relu, in_channels=32, name='binaryconv2d' + ... )(net) + >>> print(binaryconv2d) + >>> output shape : (8, 50, 50, 64) + + """ + + def __init__( + self, n_filter=32, filter_size=(3, 3), strides=(1, 1), act=None, padding='VALID', data_format="channels_last", + dilation_rate=(1, 1), W_init=tl.initializers.truncated_normal(stddev=0.02), + b_init=tl.initializers.constant(value=0.0), in_channels=None, name=None + ): + super(BinaryConv2d, self).__init__(name, act=act) + self.n_filter = n_filter + self.filter_size = filter_size + self._strides = self.strides = strides + self.padding = padding + self.data_format = data_format + self._dilation_rate = self.dilation_rate = dilation_rate + self.W_init = W_init + self.b_init = b_init + self.in_channels = in_channels + + if self.in_channels: + self.build(None) + self._built = True + + logging.info( + "BinaryConv2d %s: n_filter: %d filter_size: %s strides: %s pad: %s act: %s" % ( + self.name, n_filter, str(filter_size), str(strides), padding, + self.act.__class__.__name__ if self.act is not None else 'No Activation' + ) + ) + + def __repr__(self): + actstr = self.act.__class__.__name__ if self.act is not None else 'No Activation' + s = ( + '{classname}(in_channels={in_channels}, out_channels={n_filter}, kernel_size={filter_size}' + ', strides={strides}, padding={padding}' + ) + if self.dilation_rate != (1, ) * len(self.dilation_rate): + s += ', dilation={dilation_rate}' + if self.b_init is None: + s += ', bias=False' + s += (', ' + actstr) + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape): + if self.data_format == 'channels_last': + self.data_format = 'NHWC' + if self.in_channels is None: + self.in_channels = inputs_shape[-1] + self._strides = [1, self._strides[0], self._strides[1], 1] + self._dilation_rate = [1, self._dilation_rate[0], self._dilation_rate[1], 1] + elif self.data_format == 'channels_first': + self.data_format = 'NCHW' + if self.in_channels is None: + self.in_channels = inputs_shape[1] + self._strides = [1, 1, self._strides[0], self._strides[1]] + self._dilation_rate = [1, 1, self._dilation_rate[0], self._dilation_rate[1]] + else: + raise Exception("data_format should be either channels_last or channels_first") + + self.filter_shape = (self.filter_size[0], self.filter_size[1], self.in_channels, self.n_filter) + + self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init) + + self.b_init_flag = False + if self.b_init: + self.b = self._get_weights("biases", shape=(self.n_filter, ), init=self.b_init) + self.bias_add = tl.ops.BiasAdd(self.data_format) + self.b_init_flag = True + + self.act_init_flag = False + if self.act: + self.act_init_flag = True + + self.binaryconv2d = tl.ops.BinaryConv2D( + strides=self._strides, + padding=self.padding, + data_format=self.data_format, + dilations=self._dilation_rate, + out_channel=self.n_filter, + k_size=self.filter_size, + in_channel=self.in_channels, + ) + + def forward(self, inputs): + if self._forward_state == False: + if self._built == False: + self.build(tl.get_tensor_shape(inputs)) + self._built = True + self._forward_state = True + + outputs = self.binaryconv2d(inputs, self.W) + + if self.b_init_flag: + outputs = self.bias_add(outputs, self.b) + if self.act_init_flag: + outputs = self.act(outputs) + return outputs diff --git a/tensorlayer/layers/convolution/dorefa_conv.py b/tensorlayer/layers/convolution/dorefa_conv.py new file mode 100644 index 0000000..50396cd --- /dev/null +++ b/tensorlayer/layers/convolution/dorefa_conv.py @@ -0,0 +1,168 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- + +import tensorlayer as tl +from tensorlayer import logging +from tensorlayer.layers.core import Module + +__all__ = [ + 'DorefaConv2d', +] + + +class DorefaConv2d(Module): + """The :class:`DorefaConv2d` class is a 2D quantized convolutional layer, which weights are 'bitW' bits and the output of the previous layer + are 'bitA' bits while inferencing. + + Note that, the bias vector would not be binarized. + + Parameters + ---------- + bitW : int + The bits of this layer's parameter + bitA : int + The bits of the output of previous layer + n_filter : int + The number of filters. + filter_size : tuple of int + The filter size (height, width). + strides : tuple of int + The sliding window strides of corresponding input dimensions. + It must be in the same order as the ``shape`` parameter. + act : activation function + The activation function of this layer. + padding : str + The padding algorithm type: "SAME" or "VALID". + data_format : str + "channels_last" (NHWC, default) or "channels_first" (NCHW). + dilation_rate : tuple of int + Specifying the dilation rate to use for dilated convolution. + W_init : initializer + The initializer for the the weight matrix. + b_init : initializer or None + The initializer for the the bias vector. If None, skip biases. + in_channels : int + The number of in channels. + name : None or str + A unique layer name. + + Examples + --------- + With TensorLayer + + >>> net = tl.layers.Input([8, 12, 12, 32], name='input') + >>> dorefaconv2d = tl.layers.DorefaConv2d( + ... n_filter=32, filter_size=(5, 5), strides=(1, 1), act=tl.relu, padding='SAME', name='dorefaconv2d' + ... )(net) + >>> print(dorefaconv2d) + >>> output shape : (8, 12, 12, 32) + + """ + + def __init__( + self, + bitW=1, + bitA=3, + n_filter=32, + filter_size=(3, 3), + strides=(1, 1), + act=None, + padding='SAME', + data_format="channels_last", + dilation_rate=(1, 1), + W_init=tl.initializers.truncated_normal(stddev=0.02), + b_init=tl.initializers.constant(value=0.0), + in_channels=None, + name=None # 'dorefa_cnn2d', + ): + super().__init__(name, act=act) + self.bitW = bitW + self.bitA = bitA + self.n_filter = n_filter + self.filter_size = filter_size + self.strides = self._strides = strides + self.padding = padding + self.data_format = data_format + self.dilation_rate = self._dilation_rate = dilation_rate + self.W_init = W_init + self.b_init = b_init + self.in_channels = in_channels + + if self.in_channels: + self.build(None) + self._built = True + + logging.info( + "DorefaConv2d %s: n_filter: %d filter_size: %s strides: %s pad: %s act: %s" % ( + self.name, n_filter, str(filter_size), str(strides), padding, + self.act.__class__.__name__ if self.act is not None else 'No Activation' + ) + ) + + def __repr__(self): + actstr = self.act.__class__.__name__ if self.act is not None else 'No Activation' + s = ( + '{classname}(in_channels={in_channels}, out_channels={n_filter}, kernel_size={filter_size}' + ', strides={strides}, padding={padding}' + ) + if self.dilation_rate != (1, ) * len(self.dilation_rate): + s += ', dilation={dilation_rate}' + if self.b_init is None: + s += ', bias=False' + s += (', ' + actstr) + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape): + if self.data_format == 'channels_last': + self.data_format = 'NHWC' + if self.in_channels is None: + self.in_channels = inputs_shape[-1] + self._strides = [1, self._strides[0], self._strides[1], 1] + self._dilation_rate = [1, self._dilation_rate[0], self._dilation_rate[1], 1] + elif self.data_format == 'channels_first': + self.data_format = 'NCHW' + if self.in_channels is None: + self.in_channels = inputs_shape[1] + self._strides = [1, 1, self._strides[0], self._strides[1]] + self._dilation_rate = [1, 1, self._dilation_rate[0], self._dilation_rate[1]] + else: + raise Exception("data_format should be either channels_last or channels_first") + + self.filter_shape = (self.filter_size[0], self.filter_size[1], self.in_channels, self.n_filter) + + self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init) + + self.b_init_flag = False + if self.b_init: + self.b = self._get_weights("biases", shape=(self.n_filter, ), init=self.b_init) + self.bias_add = tl.ops.BiasAdd(self.data_format) + self.b_init_flag = True + + self.act_init_flag = False + if self.act: + self.act_init_flag = True + + self.dorefaconv2d = tl.ops.DorefaConv2D( + bitW=self.bitW, bitA=self.bitA, strides=self._strides, padding=self.padding, data_format=self.data_format, + dilations=self._dilation_rate, out_channel=self.n_filter, k_size=self.filter_size, + in_channel=self.in_channels + ) + + def forward(self, inputs): + + if self._forward_state == False: + if self._built == False: + self.build(tl.get_tensor_shape(inputs)) + self._built = True + self._forward_state = True + + outputs = self.dorefaconv2d(inputs, self.W) + + if self.b_init_flag: + outputs = self.bias_add(outputs, self.b) + if self.act_init_flag: + outputs = self.act(outputs) + return outputs diff --git a/tensorlayer/layers/convolution/group_conv.py b/tensorlayer/layers/convolution/group_conv.py new file mode 100644 index 0000000..cbbbd47 --- /dev/null +++ b/tensorlayer/layers/convolution/group_conv.py @@ -0,0 +1,164 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- + +import tensorlayer as tl +from tensorlayer import logging +from tensorlayer.layers.core import Module +from tensorlayer.backend import BACKEND + +__all__ = [ + 'GroupConv2d', +] + + +class GroupConv2d(Module): + """The :class:`GroupConv2d` class is 2D grouped convolution, see `here `__. + Parameters + -------------- + n_filter : int + The number of filters. + filter_size : tuple of int + The filter size. + stride : tuple of int + The stride step. + n_group : int + The number of groups. + act : activation function + The activation function of this layer. + padding : str + The padding algorithm type: "SAME" or "VALID". + data_format : str + "channels_last" (NHWC, default) or "channels_first" (NCHW). + dilation_rate : tuple of int + Specifying the dilation rate to use for dilated convolution. + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. + in_channels : int + The number of in channels. + name : None or str + A unique layer name. + Examples + --------- + With TensorLayer + >>> net = tl.layers.Input([8, 24, 24, 32], name='input') + >>> groupconv2d = tl.layers.QuanConv2d( + ... n_filter=64, filter_size=(3, 3), strides=(2, 2), n_group=2, name='group' + ... )(net) + >>> print(groupconv2d) + >>> output shape : (8, 12, 12, 64) + """ + + def __init__( + self, n_filter=32, filter_size=(1, 1), strides=(1, 1), n_group=1, act=None, padding='SAME', + data_format="channels_last", dilation_rate=(1, 1), W_init=tl.initializers.truncated_normal(stddev=0.02), + b_init=tl.initializers.constant(value=0.0), in_channels=None, name=None + ): + super().__init__(name, act=act) + self.n_filter = n_filter + self.filter_size = filter_size + self._strides = self.strides = strides + self.n_group = n_group + self.padding = padding + self.data_format = data_format + self._dilation_rate = self.dilation_rate = dilation_rate + self.W_init = W_init + self.b_init = b_init + self.in_channels = in_channels + + if self.in_channels: + self.build(None) + self._built = True + + logging.info( + "Conv2d %s: n_filter: %d filter_size: %s strides: %s n_group: %d pad: %s act: %s" % ( + self.name, n_filter, str(filter_size), str(strides), n_group, padding, + self.act.__class__.__name__ if self.act is not None else 'No Activation' + ) + ) + + def __repr__(self): + actstr = self.act.__class__.__name__ if self.act is not None else "No Activation" + s = ( + '{classname}(in_channels={in_channels}, out_channels={n_filter}, kernel_size={filter_size}' + ', strides={strides}, n_group = {n_group}, padding={padding}' + ) + if self.dilation_rate != (1, ) * len(self.dilation_rate): + s += ', dilation = {dilation_rate}' + if self.b_init is None: + s += ', bias=False' + s += (',', +actstr) + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape): + if self.data_format == 'channels_last': + self.data_format = 'NHWC' + if self.in_channels is None: + self.in_channels = inputs_shape[-1] + self._strides = [1, self._strides[0], self._strides[1], 1] + self._dilation_rate = [1, self._dilation_rate[0], self._dilation_rate[1], 1] + elif self.data_format == 'channels_first': + self.data_format = 'NCHW' + if self.in_channels is None: + self.in_channels = inputs_shape[1] + self._strides = [1, 1, self._strides[0], self._strides[1]] + self._dilation_rate = [1, 1, self._dilation_rate[0], self._dilation_rate[1]] + else: + raise Exception("data_format should be either channels_last or channels_first") + + if self.n_group < 1: + raise ValueError( + "The n_group must be a integer greater than or equal to 1, but we got :{}".format(self.n_group) + ) + + if self.in_channels % self.n_group != 0: + raise ValueError( + "The channels of input must be divisible by n_group, but we got: the channels of input" + "is {}, the n_group is {}.".format(self.in_channels, self.n_group) + ) + + if self.n_filter % self.n_group != 0: + raise ValueError( + "The number of filters must be divisible by n_group, but we got: the number of filters " + "is {}, the n_group is {}. ".format(self.n_filter, self.n_group) + ) + + # TODO channels first filter shape [out_channel, in_channel/n_group, filter_h, filter_w] + self.filter_shape = ( + self.filter_size[0], self.filter_size[1], int(self.in_channels / self.n_group), self.n_filter + ) + + self.W = self._get_weights("filters", shape=self.filter_shape, init=self.W_init) + + self.b_init_flag = False + if self.b_init: + self.b = self._get_weights("biases", shape=(self.n_filter, ), init=self.b_init) + self.bias_add = tl.ops.BiasAdd(self.data_format) + self.b_init_flag = True + + self.group_conv2d = tl.ops.GroupConv2D( + strides=self._strides, padding=self.padding, data_format=self.data_format, dilations=self._dilation_rate, + out_channel=self.n_filter, k_size=(self.filter_size[0], self.filter_size[1]), groups=self.n_group + ) + + self.act_init_flag = False + if self.act: + self.act_init_flag = True + + def forward(self, inputs): + if self._forward_state == False: + if self._built == False: + self.build(tl.get_tensor_shape(inputs)) + self._built = True + self._forward_state = True + + outputs = self.group_conv2d(inputs, self.W) + if self.b_init_flag: + outputs = self.bias_add(outputs, self.b) + if self.act_init_flag: + outputs = self.act(outputs) + return outputs diff --git a/tensorlayer/layers/convolution/separable_conv.py b/tensorlayer/layers/convolution/separable_conv.py new file mode 100644 index 0000000..b837e4e --- /dev/null +++ b/tensorlayer/layers/convolution/separable_conv.py @@ -0,0 +1,319 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- + +import tensorlayer as tl +from tensorlayer import logging +from tensorlayer.layers.core import Module +from tensorlayer.backend import BACKEND + +__all__ = [ + 'SeparableConv1d', + 'SeparableConv2d', +] + + +class SeparableConv1d(Module): + """The :class:`SeparableConv1d` class is a 1D depthwise separable convolutional layer. + This layer performs a depthwise convolution that acts separately on channels, followed by a pointwise convolution that mixes channels. + Parameters + ------------ + n_filter : int + The dimensionality of the output space (i.e. the number of filters in the convolution). + filter_size : int + Specifying the spatial dimensions of the filters. Can be a single integer to specify the same value for all spatial dimensions. + strides : int + Specifying the stride of the convolution. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any dilation_rate value != 1. + act : activation function + The activation function of this layer. + padding : str + One of "valid" or "same" (case-insensitive). + data_format : str + One of channels_last (default) or channels_first. The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width). + dilation_rate : int + Specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride value != 1. + depth_multiplier : int + The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to num_filters_in * depth_multiplier. + depthwise_init : initializer + for the depthwise convolution kernel. + pointwise_init : initializer + For the pointwise convolution kernel. + b_init : initializer + For the bias vector. If None, ignore bias in the pointwise part only. + in_channels : int + The number of in channels. + name : None or str + A unique layer name. + Examples + -------- + With TensorLayer + >>> net = tl.layers.Input([8, 50, 64], name='input') + >>> separableconv1d = tl.layers.SeparableConv1d(n_filter=32, filter_size=3, strides=2, padding='SAME', act=tf.nn.relu, name='separable_1d')(net) + >>> print(separableconv1d) + >>> output shape : (8, 25, 32) + """ + + def __init__( + self, n_filter=32, filter_size=1, stride=1, act=None, padding="SAME", data_format="channels_last", + dilation_rate=1, depth_multiplier=1, depthwise_init=tl.initializers.truncated_normal(stddev=0.02), + pointwise_init=tl.initializers.truncated_normal(stddev=0.02), b_init=tl.initializers.constant(value=0.0), + in_channels=None, name=None + ): + super(SeparableConv1d, self).__init__(name, act=act) + self.n_filter = n_filter + self.filter_size = filter_size + self.stride = stride + self.padding = padding + self.data_format = data_format + self.dilation_rate = dilation_rate + self.depth_multiplier = depth_multiplier + self.depthwise_init = depthwise_init + self.pointwise_init = pointwise_init + self.b_init = b_init + self.in_channels = in_channels + + if self.in_channels: + self.build(None) + self._built = True + + logging.info( + "SeparableConv1d %s: n_filter: %d filter_size: %s strides: %s depth_multiplier: %d act: %s" % ( + self.name, n_filter, str(filter_size), str(stride), depth_multiplier, + self.act.__class__.__name__ if self.act is not None else 'No Activation' + ) + ) + + def __repr__(self): + actstr = self.act.__class__.__name__ if self.act is not None else 'No Activation' + s = ( + '{classname}(in_channels={in_channels}, out_channels={n_filter}, kernel_size={filter_size}' + ', stride={strides}, padding={padding}' + ) + if self.dilation_rate != 1: + s += ', dilation={dilation_rate}' + if self.b_init is None: + s += ', bias=False' + s += (', ' + actstr) + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape): + if self.data_format == 'channels_last': + self.data_format = 'NWC' + if self.in_channels is None: + self.in_channels = inputs_shape[-1] + elif self.data_format == 'channels_first': + self.data_format = 'NCW' + if self.in_channels is None: + self.in_channels = inputs_shape[1] + else: + raise Exception("data_format should be either channels_last or channels_first") + + if BACKEND == 'tensorflow': + self.depthwise_filter_shape = (self.filter_size, self.in_channels, self.depth_multiplier) + self.pointwise_filter_shape = (1, self.depth_multiplier * self.in_channels, self.n_filter) + elif BACKEND == 'mindspore': + self.depthwise_filter_shape = (self.filter_size, 1, self.depth_multiplier * self.in_channels) + self.pointwise_filter_shape = (1, self.depth_multiplier * self.in_channels, self.n_filter) + + self.depthwise_W = self._get_weights( + 'depthwise_filters', shape=self.depthwise_filter_shape, init=self.depthwise_init + ) + self.pointwise_W = self._get_weights( + 'pointwise_filters', shape=self.pointwise_filter_shape, init=self.pointwise_init + ) + + self.b_init_flag = False + if self.b_init: + self.b = self._get_weights("biases", shape=(self.n_filter, ), init=self.b_init) + self.bias_add = tl.ops.BiasAdd(self.data_format) + self.b_init_flag = True + + self.act_init_flag = False + if self.act: + self.activate = self.act + self.act_init_flag = True + + self.separable_conv1d = tl.ops.SeparableConv1D( + stride=self.stride, padding=self.padding, data_format=self.data_format, dilations=self.dilation_rate, + out_channel=self.n_filter, k_size=self.filter_size, in_channel=self.in_channels, + depth_multiplier=self.depth_multiplier + ) + + def forward(self, inputs): + if self._forward_state == False: + if self._built == False: + self.build(tl.get_tensor_shape(inputs)) + self._built = True + self._forward_state = True + + outputs = self.separable_conv1d(inputs, self.depthwise_W, self.pointwise_W) + if self.b_init_flag: + outputs = self.bias_add(outputs, self.b) + if self.act_init_flag: + outputs = self.act(outputs) + return outputs + + +class SeparableConv2d(Module): + """The :class:`SeparableConv2d` class is a 2D depthwise separable convolutional layer. + This layer performs a depthwise convolution that acts separately on channels, followed by a pointwise convolution that mixes channels. + Parameters + ------------ + n_filter : int + The dimensionality of the output space (i.e. the number of filters in the convolution). + filter_size : tuple of int + Specifying the spatial dimensions of the filters. Can be a single integer to specify the same value for all spatial dimensions. + strides : tuple of int + Specifying the stride of the convolution. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any dilation_rate value != 1. + act : activation function + The activation function of this layer. + padding : str + One of "valid" or "same" (case-insensitive). + data_format : str + One of channels_last (default) or channels_first. The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width). + dilation_rate : tuple of int + Specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride value != 1. + depth_multiplier : int + The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to num_filters_in * depth_multiplier. + depthwise_init : initializer + for the depthwise convolution kernel. + pointwise_init : initializer + For the pointwise convolution kernel. + b_init : initializer + For the bias vector. If None, ignore bias in the pointwise part only. + in_channels : int + The number of in channels. + name : None or str + A unique layer name. + Examples + -------- + With TensorLayer + >>> net = tl.layers.Input([8, 50, 50, 64], name='input') + >>> separableconv2d = tl.layers.SeparableConv2d(n_filter=32, filter_size=3, strides=2, depth_multiplier = 3 , padding='SAME', act=tf.nn.relu, name='separable_2d')(net) + >>> print(separableconv2d) + >>> output shape : (8, 24, 24, 32) + """ + + def __init__( + self, n_filter=32, filter_size=(1, 1), strides=(1, 1), act=None, padding="VALID", data_format="channels_last", + dilation_rate=(1, 1), depth_multiplier=1, depthwise_init=tl.initializers.truncated_normal(stddev=0.02), + pointwise_init=tl.initializers.truncated_normal(stddev=0.02), b_init=tl.initializers.constant(value=0.0), + in_channels=None, name=None + ): + super(SeparableConv2d, self).__init__(name, act=act) + self.n_filter = n_filter + self.filter_size = filter_size + self._strides = self.strides = strides + self.padding = padding + self.data_format = data_format + self._dilation_rate = self.dilation_rate = dilation_rate + self.depth_multiplier = depth_multiplier + self.depthwise_init = depthwise_init + self.pointwise_init = pointwise_init + self.b_init = b_init + self.in_channels = in_channels + + if self.in_channels: + self.build(None) + self._built = True + + logging.info( + "SeparableConv2d %s: n_filter: %d filter_size: %s strides: %s depth_multiplier: %d act: %s" % ( + self.name, n_filter, str(filter_size), str(strides), depth_multiplier, + self.act.__class__.__name__ if self.act is not None else 'No Activation' + ) + ) + + def __repr__(self): + actstr = self.act.__class__.__name__ if self.act is not None else 'No Activation' + s = ( + '{classname}(in_channels={in_channels}, out_channels={n_filter}, kernel_size={filter_size}' + ', stride={strides }, padding={padding}' + ) + if self.dilation_rate != (1, ) * len(self.dilation_rate): + s += ', dilation={dilation_rate}' + if self.b_init is None: + s += ', bias=False' + s += (', ' + actstr) + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape): + if self.data_format == 'channels_last': + self.data_format = 'NHWC' + if self.in_channels is None: + self.in_channels = inputs_shape[-1] + self._strides = [1, self._strides[0], self._strides[1], 1] + self._dilation_rate = [1, self._dilation_rate[0], self._dilation_rate[1], 1] + elif self.data_format == 'channels_first': + self.data_format = 'NCHW' + if self.in_channels is None: + self.in_channels = inputs_shape[1] + self._strides = [1, 1, self._strides[0], self._strides[1]] + self._dilation_rate = [1, 1, self._dilation_rate[0], self._dilation_rate[1]] + else: + raise Exception("data_format should be either channels_last or channels_first") + + if BACKEND == 'tensorflow': + self.depthwise_filter_shape = ( + self.filter_size[0], self.filter_size[1], self.in_channels, self.depth_multiplier + ) + self.pointwise_filter_shape = (1, 1, self.depth_multiplier * self.in_channels, self.n_filter) + + elif BACKEND == 'mindspore': + self.depthwise_filter_shape = ( + self.filter_size[0], self.filter_size[1], 1, self.depth_multiplier * self.in_channels + ) + self.pointwise_filter_shape = (1, 1, self.depth_multiplier * self.in_channels, self.n_filter) + + self.depthwise_W = self._get_weights( + 'depthwise_filters', shape=self.depthwise_filter_shape, init=self.depthwise_init + ) + + self.pointwise_W = self._get_weights( + 'pointwise_filters', shape=self.pointwise_filter_shape, init=self.pointwise_init + ) + + self.b_init_flag = False + if self.b_init: + self.b = self._get_weights("biases", shape=(self.n_filter, ), init=self.b_init) + self.bias_add = tl.ops.BiasAdd(self.data_format) + self.b_init_flag = True + + self.act_init_flag = False + if self.act: + self.act_init_flag = True + + self.separable_conv2d = tl.ops.SeparableConv2D( + strides=self._strides, padding=self.padding, data_format=self.data_format, dilations=self._dilation_rate, + out_channel=self.n_filter, k_size=self.filter_size, in_channel=self.in_channels, + depth_multiplier=self.depth_multiplier + ) + + def forward(self, inputs): + if self._forward_state == False: + if self._built == False: + self.build(tl.get_tensor_shape(inputs)) + self._built = True + self._forward_state = True + + outputs = self.separable_conv2d(inputs, self.depthwise_W, self.pointwise_W) + if self.b_init_flag: + outputs = self.bias_add(outputs, self.b) + if self.act_init_flag: + outputs = self.act(outputs) + return outputs + + +if __name__ == '__main__': + net = tl.layers.Input([5, 400, 400, 3], name='input') + layer = SeparableConv2d( + in_channels=3, filter_size=(3, 3), strides=(2, 2), dilation_rate=(2, 2), act=tl.ReLU, depth_multiplier=3, + name='separableconv2d1' + ) + print(len(layer.all_weights)) + print(layer(net).shape) diff --git a/tensorlayer/layers/pooling.py b/tensorlayer/layers/pooling.py index 51cc9a7..006b34d 100644 --- a/tensorlayer/layers/pooling.py +++ b/tensorlayer/layers/pooling.py @@ -5,7 +5,6 @@ import tensorlayer as tl from tensorlayer import logging from tensorlayer.layers.core import Module - __all__ = [ 'PoolLayer', 'MaxPool1d', @@ -20,6 +19,12 @@ __all__ = [ 'GlobalMeanPool2d', 'GlobalMaxPool3d', 'GlobalMeanPool3d', + 'AdaptiveMeanPool1d', + 'AdaptiveMeanPool2d', + 'AdaptiveMeanPool3d', + 'AdaptiveMaxPool1d', + 'AdaptiveMaxPool2d', + 'AdaptiveMaxPool3d', 'CornerPool2d', ] @@ -923,9 +928,9 @@ class CornerPool2d(Module): """ def __init__( - self, - mode='TopLeft', - name=None # 'cornerpool2d' + self, + mode='TopLeft', + name=None # 'cornerpool2d' ): super().__init__(name) self.mode = mode @@ -958,7 +963,7 @@ class CornerPool2d(Module): ) temp_bottom = tl.ops.max_pool(temp_bottom, ksize=(input_height, 1), strides=(1, 1), padding='VALID') temp_right = tl.ops.max_pool(temp_right, ksize=(1, input_width), strides=(1, 1), padding='VALID') - outputs = tl.add(temp_bottom, temp_right)#, name=self.name) + outputs = tl.add(temp_bottom, temp_right) #, name=self.name) elif self.mode == 'BottomRight': temp_top = tl.pad( inputs, tl.constant([[0, 0], [input_height - 1, 0], [0, 0], [0, 0]]), constant_values=batch_min @@ -973,7 +978,343 @@ class CornerPool2d(Module): outputs = tl.identity(inputs) return outputs -if __name__ == '__main__': - net = tl.layers.Input([None, 32, 32, 8], name='input') - net = CornerPool2d(mode='TopLeft',name='cornerpool2d')(net) - print(net) \ No newline at end of file + +class AdaptiveMeanPool1d(Module): + """The :class:`AdaptiveMeanPool1d` class is a 1D Adaptive Mean Pooling layer. + + Parameters + ------------ + output_size : int + The target output size. It must be an integer. + data_format : str + One of channels_last (default, [batch, width, channel]) or channels_first. The ordering of the dimensions in the inputs. + name : None or str + A unique layer name. + + Examples + --------- + With TensorLayer + + >>> net = tl.layers.Input([None, 32, 3], name='input') + >>> net = tl.layers.AdaptiveMeanPool1d(output_size=16)(net) + >>> output shape : [None, 16, 3] + + """ + + def __init__(self, output_size, data_format='channels_last', name=None): + super(AdaptiveMeanPool1d, self).__init__(name) + self.output_size = output_size + self.data_format = data_format + + self.build() + self._built = True + + logging.info("AdaptiveMeanPool1d %s: output_size: %s " % (self.name, str(output_size))) + + def __repr__(self): + s = ('{classname}(output_size={output_size}') + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape=None): + if self.data_format == 'channels_last': + self.data_format = 'NWC' + elif self.data_format == 'channels_first': + self.data_format = 'NCW' + else: + raise Exception("unsupported data format") + + self.adaptivemeanpool1d = tl.ops.AdaptiveMeanPool1D(output_size=self.output_size, data_format=self.data_format) + + def forward(self, inputs): + + outputs = self.adaptivemeanpool1d(inputs) + return outputs + + +class AdaptiveMeanPool2d(Module): + """The :class:`AdaptiveMeanPool2d` class is a 2D Adaptive Mean Pooling layer. + + Parameters + ------------ + output_size : int or list or tuple + The target output size. It cloud be an int \[int,int]\(int, int). + data_format : str + One of channels_last (default, [batch, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. + name : None or str + A unique layer name. + + Examples + --------- + With TensorLayer + + >>> net = tl.layers.Input([None,32, 32, 3], name='input') + >>> net = tl.layers.AdaptiveMeanPool2d(output_size=16)(net) + >>> output shape : [None,16, 16, 3] + + """ + + def __init__(self, output_size, data_format='channels_last', name=None): + super(AdaptiveMeanPool2d, self).__init__(name) + self.output_size = output_size + self.data_format = data_format + + self.build() + self._built = True + + logging.info("AdaptiveMeanPool2d %s: output_size: %s " % (self.name, str(output_size))) + + def __repr__(self): + s = ('{classname}(output_size={output_size}') + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape=None): + if self.data_format == 'channels_last': + self.data_format = 'NHWC' + elif self.data_format == 'channels_first': + self.data_format = 'NCHW' + else: + raise Exception("unsupported data format") + + if isinstance(self.output_size, int): + self.output_size = (self.output_size, ) * 2 + + self.adaptivemeanpool2d = tl.ops.AdaptiveMeanPool2D(output_size=self.output_size, data_format=self.data_format) + + def forward(self, inputs): + + outputs = self.adaptivemeanpool2d(inputs) + return outputs + + +class AdaptiveMeanPool3d(Module): + """The :class:`AdaptiveMeanPool3d` class is a 3D Adaptive Mean Pooling layer. + + Parameters + ------------ + output_size : int or list or tuple + The target output size. It cloud be an int \[int,int,int]\(int, int, int). + data_format : str + One of channels_last (default, [batch, depth, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. + name : None or str + A unique layer name. + + Examples + --------- + With TensorLayer + + >>> net = tl.layers.Input([None,32, 32, 32, 3], name='input') + >>> net = tl.layers.AdaptiveMeanPool3d(output_size=16)(net) + >>> output shape : [None, 16, 16, 16, 3] + + """ + + def __init__(self, output_size, data_format='channels_last', name=None): + super(AdaptiveMeanPool3d, self).__init__(name) + self.output_size = output_size + self.data_format = data_format + + self.build() + self._built = True + + logging.info("AdaptiveMeanPool3d %s: output_size: %s " % (self.name, str(output_size))) + + def __repr__(self): + s = ('{classname}(output_size={output_size}') + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape=None): + if self.data_format == 'channels_last': + self.data_format = 'NDHWC' + elif self.data_format == 'channels_first': + self.data_format = 'NCDHW' + else: + raise Exception("unsupported data format") + + if isinstance(self.output_size, int): + self.output_size = (self.output_size, ) * 3 + + self.adaptivemeanpool3d = tl.ops.AdaptiveMeanPool3D(output_size=self.output_size, data_format=self.data_format) + + def forward(self, inputs): + + outputs = self.adaptivemeanpool3d(inputs) + return outputs + + +class AdaptiveMaxPool1d(Module): + """The :class:`AdaptiveMaxPool1d` class is a 1D Adaptive Max Pooling layer. + + Parameters + ------------ + output_size : int + The target output size. It must be an integer. + data_format : str + One of channels_last (default, [batch, width, channel]) or channels_first. The ordering of the dimensions in the inputs. + name : None or str + A unique layer name. + + Examples + --------- + With TensorLayer + + >>> net = tl.layers.Input([None, 32, 3], name='input') + >>> net = tl.layers.AdaptiveMaxPool1d(output_size=16)(net) + >>> output shape : [None, 16, 3] + + """ + + def __init__(self, output_size, data_format='channels_last', name=None): + super(AdaptiveMaxPool1d, self).__init__(name) + self.output_size = output_size + self.data_format = data_format + + self.build() + self._built = True + + logging.info("AdaptiveMaxPool1d %s: output_size: %s " % (self.name, str(output_size))) + + def __repr__(self): + s = ('{classname}(output_size={output_size}') + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape=None): + if self.data_format == 'channels_last': + self.data_format = 'NWC' + elif self.data_format == 'channels_first': + self.data_format = 'NCW' + else: + raise Exception("unsupported data format") + + self.adaptivemaxpool1d = tl.ops.AdaptiveMaxPool1D(output_size=self.output_size, data_format=self.data_format) + + def forward(self, inputs): + + outputs = self.adaptivemaxpool1d(inputs) + return outputs + + +class AdaptiveMaxPool2d(Module): + """The :class:`AdaptiveMaxPool2d` class is a 2D Adaptive Max Pooling layer. + + Parameters + ------------ + output_size : int or list or tuple + The target output size. It cloud be an int \[int,int]\(int, int). + data_format : str + One of channels_last (default, [batch, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. + name : None or str + A unique layer name. + + Examples + --------- + With TensorLayer + + >>> net = tl.layers.Input([None, 32, 32, 3], name='input') + >>> net = tl.layers.AdaptiveMaxPool2d(output_size=16)(net) + >>> output shape : [None, 16, 16, 3] + + """ + + def __init__(self, output_size, data_format='channels_last', name=None): + super(AdaptiveMaxPool2d, self).__init__(name) + self.output_size = output_size + self.data_format = data_format + + self.build() + self._built = True + + logging.info("AdaptiveMaxPool1d %s: output_size: %s " % (self.name, str(output_size))) + + def __repr__(self): + s = ('{classname}(output_size={output_size}') + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape=None): + if self.data_format == 'channels_last': + self.data_format = 'NHWC' + elif self.data_format == 'channels_first': + self.data_format = 'NCHW' + else: + raise Exception("unsupported data format") + if isinstance(self.output_size, int): + self.output_size = (self.output_size, ) * 2 + + self.adaptivemaxpool2d = tl.ops.AdaptiveMaxPool2D(output_size=self.output_size, data_format=self.data_format) + + def forward(self, inputs): + + outputs = self.adaptivemaxpool2d(inputs) + return outputs + + +class AdaptiveMaxPool3d(Module): + """The :class:`AdaptiveMaxPool3d` class is a 3D Adaptive Max Pooling layer. + + Parameters + ------------ + output_size : int or list or tuple + The target output size. It cloud be an int \[int,int,int]\(int, int, int). + data_format : str + One of channels_last (default, [batch, depth, height, width, channel]) or channels_first. The ordering of the dimensions in the inputs. + name : None or str + A unique layer name. + + Examples + --------- + With TensorLayer + + >>> net = tl.layers.Input([None,32, 32, 32, 3], name='input') + >>> net = tl.layers.AdaptiveMaxPool3d(output_size=16)(net) + >>> output shape : [None, 16, 16, 16, 3] + + """ + + def __init__(self, output_size, data_format='channels_last', name=None): + super(AdaptiveMaxPool3d, self).__init__(name) + self.output_size = output_size + self.data_format = data_format + + self.build() + self._built = True + + logging.info("AdaptiveMaxPool3d %s: output_size: %s " % (self.name, str(output_size))) + + def __repr__(self): + s = ('{classname}(output_size={output_size}') + if self.name is not None: + s += ', name=\'{name}\'' + s += ')' + return s.format(classname=self.__class__.__name__, **self.__dict__) + + def build(self, inputs_shape=None): + if self.data_format == 'channels_last': + self.data_format = 'NDHWC' + elif self.data_format == 'channels_first': + self.data_format = 'NCDHW' + else: + raise Exception("unsupported data format") + + if isinstance(self.output_size, int): + self.output_size = (self.output_size, ) * 3 + + self.adaptivemaxpool3d = tl.ops.AdaptiveMaxPool3D(output_size=self.output_size, data_format=self.data_format) + + def forward(self, inputs): + + outputs = self.adaptivemaxpool3d(inputs) + return outputs diff --git a/tensorlayer/metric/__init__.py b/tensorlayer/metric/__init__.py new file mode 100644 index 0000000..c11f832 --- /dev/null +++ b/tensorlayer/metric/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from tensorlayer.backend import BACKEND + +if BACKEND == 'tensorflow': + from .tensorflow_metric import * +elif BACKEND == 'mindspore': + from .mindspore_metric import * +elif BACKEND == 'dragon': + pass +elif BACKEND == 'paddle': + from .paddle_metric import * +else: + raise NotImplementedError("This backend is not supported") diff --git a/tensorlayer/metric/mindspore_metric.py b/tensorlayer/metric/mindspore_metric.py new file mode 100644 index 0000000..bcc6499 --- /dev/null +++ b/tensorlayer/metric/mindspore_metric.py @@ -0,0 +1,88 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- + +import mindspore.nn as nn +from mindspore.nn.metrics._evaluation import EvaluationBase +from mindspore.nn.metrics.metric import Metric +__all__ = [ + 'Accuracy', + 'Auc', + 'Precision', + 'Recall', +] + + +class Accuracy(object): + + def __init__(self, topk=1): + + self.accuracy = nn.TopKCategoricalAccuracy(k=topk) + + def update(self, y_pred, y_true): + + self.accuracy.update(y_pred, y_true) + + def result(self): + + return self.accuracy.eval() + + def reset(self): + + self.accuracy.clear() + + +class Auc(object): + + def __init__(self): + + pass + + def update(self, y_pred, y_true): + + raise Exception('Auc metric function not implemented') + + def result(self): + + pass + + def reset(self): + + pass + + +class Precision(object): + + def __init__(self): + + self.precision = nn.Precision(eval_type="classification") + + def update(self, y_pred, y_true): + + self.precision.update(y_pred, y_true) + + def result(self): + + return self.precision.eval() + + def reset(self): + + self.precision.clear() + + +class Recall(object): + + def __init__(self): + + self.recall = nn.Recall(eval_type="classification") + + def update(self, y_pred, y_true): + + self.recall.update(y_pred, y_true) + + def result(self): + + return self.recall.eval() + + def reset(self): + + self.recall.clear() diff --git a/tensorlayer/metric/paddle_metric.py b/tensorlayer/metric/paddle_metric.py new file mode 100644 index 0000000..b6b3f32 --- /dev/null +++ b/tensorlayer/metric/paddle_metric.py @@ -0,0 +1,89 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- + +import paddle +from paddle.metric.metrics import Metric + +__all__ = [ + 'Accuracy', + 'Auc', + 'Precision', + 'Recall', +] + + +class Accuracy(object): + + def __init__( + self, + topk=1, + ): + + self.topk = topk + self.accuracy = paddle.metric.Accuracy(topk=(self.topk, )) + + def update(self, y_pred, y_true): + + self.accuracy.update(self.accuracy.compute(y_pred, y_true)) + + def result(self): + + return self.accuracy.accumulate() + + def reset(self): + + self.accuracy.reset() + + +class Auc(object): + + def __init__(self, curve='ROC', num_thresholds=4095): + + self.auc = paddle.metric.Auc(curve=curve, num_thresholds=num_thresholds) + + def update(self, y_pred, y_true): + + self.auc.update(y_pred, y_true) + + def result(self): + + return self.auc.accumulate() + + def reset(self): + + self.auc.reset() + + +class Precision(object): + + def __init__(self): + + self.precision = paddle.metric.Precision() + + def update(self, y_pred, y_true): + + self.precision.update(y_pred, y_true) + + def result(self): + + return self.precision.accumulate() + + def reset(self): + + self.precision.reset() + + +class Recall(object): + + def __init__(self): + + self.recall = paddle.metric.Recall() + + def update(self, y_pred, y_true): + self.recall.update(y_pred, y_true) + + def result(self): + return self.recall.accumulate() + + def reset(self): + self.recall.reset() diff --git a/tensorlayer/metric/tensorflow_metric.py b/tensorlayer/metric/tensorflow_metric.py new file mode 100644 index 0000000..d7398ff --- /dev/null +++ b/tensorlayer/metric/tensorflow_metric.py @@ -0,0 +1,98 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- + +import tensorflow as tf +from tensorflow.keras.metrics import Metric + +__all__ = [ + 'Accuracy', + 'Auc', + 'Precision', + 'Recall', +] + + +class Accuracy(object): + + def __init__(self, topk=1): + self.topk = topk + if topk == 1: + self.accuary = tf.keras.metrics.Accuracy() + else: + self.accuary = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=topk) + + def update(self, y_pred, y_true): + + if self.topk == 1: + y_pred = tf.argmax(y_pred, axis=1) + self.accuary.update_state(y_true, y_pred) + else: + self.accuary.update_state(y_true, y_pred) + + def result(self): + + return self.accuary.result() + + def reset(self): + + self.accuary.reset_states() + + +class Auc(object): + + def __init__( + self, + curve='ROC', + num_thresholds=200, + ): + self.auc = tf.keras.metrics.AUC(num_thresholds=num_thresholds, curve=curve) + + def update(self, y_pred, y_true): + + self.auc.update_state(y_true, y_pred) + + def result(self): + + return self.auc.result() + + def reset(self): + + self.auc.reset_states() + + +class Precision(object): + + def __init__(self): + + self.precision = tf.keras.metrics.Precision() + + def update(self, y_pred, y_true): + + self.precision.update_state(y_true, y_pred) + + def result(self): + + return self.precision.result() + + def reset(self): + + self.precision.reset_states() + + +class Recall(object): + + def __init__(self): + + self.recall = tf.keras.metrics.Recall() + + def update(self, y_pred, y_true): + + self.recall.update_state(y_true, y_pred) + + def result(self): + + return self.recall.result() + + def reset(self): + + self.recall.reset_states() diff --git a/tensorlayer/models/core.py b/tensorlayer/models/core.py index 15b5439..e449af0 100644 --- a/tensorlayer/models/core.py +++ b/tensorlayer/models/core.py @@ -21,6 +21,8 @@ if tl.BACKEND == 'mindspore': # from mindspore.train.parallel_utils import ParallelMode from mindspore.nn.wrap import DistributedGradReducer from mindspore.common import ParameterTuple +if tl.BACKEND == 'paddle': + import paddle as pd class Model: @@ -35,28 +37,7 @@ class Model: network should contain the logic of loss and grads calculation, and the logic of parallel if needed. Default: None. optimizer : Optimizer for updating the weights. Default: None. - metrics (Union[dict, set]): Dict or set of metrics to be evaluated by the model during - training and testing. eg: {'accuracy', 'recall'}. Default: None. - eval_network (Cell): Network for evaluation. If not defined, `network` and `loss_fn` would be wrapped as - `eval_network`. Default: None. - eval_indexes (list): In case of defining the `eval_network`, if `eval_indexes` is None, all outputs of - `eval_network` would be passed to metrics, otherwise `eval_indexes` must contain three - elements, representing the positions of loss value, predict value and label, the loss - value would be passed to `Loss` metric, predict value and label would be passed to other - metric. Default: None. - amp_level (str): Option for argument `level` in `mindspore.amp.build_train_network`, level for mixed - precision training. Supports [O0, O2, O3]. Default: "O0". - - - O0: Do not change. - - O2: Cast network to float16, keep batchnorm run in float32, using dynamic loss scale. - - O3: Cast network to float16, with additional property 'keep_batchnorm_fp32=False'. - - O2 is recommended on GPU, O3 is recommended on Ascend. - - loss_scale_manager (Union[None, LossScaleManager]): If None, not scale the loss, or else - scale the loss by LossScaleManager. If it is set, overwrite the level setting. It's a eyword argument. - e.g. Use `loss_scale_manager=None` to set the value. - keep_batchnorm_fp32 (bool): Keep Batchnorm run in `float32`. If set, overwrite the level setting. Default: True. + metrics : Dict or set of metrics to be evaluated by the model during Examples: >>> import tensorlayer as tl @@ -83,10 +64,7 @@ class Model: >>> model.train(2, dataset) """ - def __init__( - self, network, loss_fn=None, optimizer=None, metrics=None, eval_network=None, eval_indexes=None, amp_level="O0", - **kwargs - ): + def __init__(self, network, loss_fn=None, optimizer=None, metrics=None, **kwargs): self.network = network self.loss_fn = loss_fn self.optimizer = optimizer @@ -110,6 +88,12 @@ class Model: train_weights=self.train_weights, optimizer=self.optimizer, metrics=self.metrics, print_train_batch=print_train_batch, print_freq=print_freq, test_dataset=test_dataset ) + elif tl.BACKEND == 'paddle': + self.pd_train( + n_epoch=n_epoch, train_dataset=train_dataset, network=self.network, loss_fn=self.loss_fn, + train_weights=self.train_weights, optimizer=self.optimizer, metrics=self.metrics, + print_train_batch=print_train_batch, print_freq=print_freq, test_dataset=test_dataset + ) def eval(self, test_dataset): self.network.eval() @@ -283,7 +267,9 @@ class Model: train_loss += _loss_ce if metrics: - train_acc += metrics(_logits, y_batch) + metrics.update(_logits, y_batch) + train_acc += metrics.result() + metrics.reset() else: train_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch)) n_iter += 1 @@ -307,7 +293,9 @@ class Model: _logits = network(X_batch) # is_train=False, disable dropout val_loss += loss_fn(_logits, y_batch, name='eval_loss') if metrics: - val_acc += metrics(_logits, y_batch) + metrics.update(_logits, y_batch) + val_acc += metrics.result() + metrics.reset() else: val_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch)) n_iter += 1 @@ -332,7 +320,9 @@ class Model: loss = loss_output.asnumpy() train_loss += loss if metrics: - train_acc += metrics(output, y_batch) + metrics.update(output, y_batch) + train_acc += metrics.result() + metrics.reset() else: train_acc += np.mean((P.Equal()(P.Argmax(axis=1)(output), y_batch).asnumpy())) n_iter += 1 @@ -356,9 +346,65 @@ class Model: _logits = network(X_batch) val_loss += loss_fn(_logits, y_batch, name='eval_loss') if metrics: - val_acc += metrics(_logits, y_batch) + metrics.update(_logits, y_batch) + val_acc += metrics.result() + metrics.reset() + else: + val_acc += np.mean((P.Equal()(P.Argmax(axis=1)(_logits), y_batch).asnumpy())) + n_iter += 1 + print(" val loss: {}".format(val_loss / n_iter)) + print(" val acc: {}".format(val_acc / n_iter)) + + def pd_train( + self, n_epoch, train_dataset, network, loss_fn, train_weights, optimizer, metrics, print_train_batch, + print_freq, test_dataset + ): + for epoch in range(n_epoch): + start_time = time.time() + + train_loss, train_acc, n_iter = 0, 0, 0 + for X_batch, y_batch in train_dataset: + network.set_train() + + output = network(X_batch) + loss = loss_fn(output, y_batch) + loss_ce = loss.numpy() + params_grads = optimizer.gradient(loss, train_weights) + optimizer.apply_gradients(params_grads) + + train_loss += loss_ce + if metrics: + metrics.update(output, y_batch) + train_acc += metrics.result() + metrics.reset() + else: + train_acc += pd.metric.accuracy(output, y_batch) + n_iter += 1 + + if print_train_batch: + print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time)) + print(" train loss: {}".format(train_loss / n_iter)) + print(" train acc: {}".format(train_acc / n_iter)) + + if epoch + 1 == 1 or (epoch + 1) % print_freq == 0: + print("Epoch {} of {} took {}".format(epoch + 1, n_epoch, time.time() - start_time)) + print(" train loss: {}".format(train_loss / n_iter)) + print(" train acc: {}".format(train_acc / n_iter)) + + if test_dataset: + # use training and evaluation sets to evaluate the model every print_freq epoch + if epoch + 1 == 1 or (epoch + 1) % print_freq == 0: + network.eval() + val_loss, val_acc, n_iter = 0, 0, 0 + for X_batch, y_batch in test_dataset: + _logits = network(X_batch) # is_train=False, disable dropout + val_loss += loss_fn(_logits, y_batch, name='eval_loss') + if metrics: + metrics.update(_logits, y_batch) + val_acc += metrics.result() + metrics.reset() else: - val_acc += np.mean((P.Equal()(P.Argmax(axis=1)(output), y_batch).asnumpy())) + val_acc += np.mean(np.equal(np.argmax(_logits, 1), y_batch)) n_iter += 1 print(" val loss: {}".format(val_loss / n_iter)) print(" val acc: {}".format(val_acc / n_iter)) diff --git a/tensorlayer/optimizers/__init__.py b/tensorlayer/optimizers/__init__.py index ffe9995..9d654bb 100644 --- a/tensorlayer/optimizers/__init__.py +++ b/tensorlayer/optimizers/__init__.py @@ -11,11 +11,11 @@ More functions can be found in `TensorFlow API = 0.") + if not 0.0 <= momentum: + raise ValueError("Invalid value of momentum, expect momentum >= 0.") + if not 0.0 <= rho: + raise ValueError("Invalid value of rho, expect rho >= 0.") + + self.learning_rate = learning_rate + self.epsilon = epsilon + self.rho = rho + self.momentum = momentum + self.centered = centered + + def gradient(self, loss, weights): + if loss is None: + raise ValueError('loss is not set.') + if weights is None: + raise ValueError('weights is not set.') + + self.rmsprop = paddle.optimizer.RMSProp( + learning_rate=self.learning_rate, epsilon=self.epsilon, rho=self.rho, momentum=self.momentum, + parameters=weights + ) + loss.backward() + weights_and_grads = self.rmsprop.backward(loss=loss, parameters=weights) + + return weights_and_grads + + def apply_gradients(self, weights_and_grads): + if weights_and_grads is None: + raise ValueError('weights_and_grads is not set.') + self.rmsprop._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads) + self.rmsprop.clear_grad() + + +class SGD(Optimizer): + + def __init__(self, learning_rate=0.001): + if learning_rate is None: + raise ValueError("learning_rate is not set.") + + self.learning_rate = learning_rate + + def gradient(self, loss, weights): + if loss is None: + raise ValueError('loss is not set.') + if weights is None: + raise ValueError('weights is not set.') + + self.sgd = paddle.optimizer.SGD(learning_rate=self.learning_rate, parameters=weights) + loss.backward() + weights_and_grads = self.sgd.backward(loss=loss, parameters=weights) + + return weights_and_grads + + def apply_gradients(self, weights_and_grads): + if weights_and_grads is None: + raise ValueError('weights_and_grads is not set.') + self.sgd._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads) + self.sgd.clear_grad() + + +class Momentum(Optimizer): + + def __init__(self, learning_rate=0.001, momentum=0.9, nesterov=False): + if learning_rate is None: + raise ValueError("learning_rate is not set") + if momentum is None: + raise ValueError("momentum is not set") + + self.learning_rate = learning_rate + self.momentum = momentum + self.nesterov = nesterov + + def gradient(self, loss, weights): + if loss is None: + raise ValueError('loss is not set.') + if weights is None: + raise ValueError('weights is not set.') + + self.moment = paddle.optimizer.Momentum( + learning_rate=self.learning_rate, momentum=self.momentum, parameters=weights, use_nesterov=self.nesterov + ) + loss.backward() + weights_and_grads = self.moment.backward(loss=loss, parameters=weights) + return weights_and_grads + + def apply_gradients(self, weights_and_grads): + if weights_and_grads is None: + raise ValueError('weights_and_grads is not set.') + self.moment._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads) + self.moment.clear_grad() + + +class Lamb(Optimizer): + + def __init__(self, learning_rate=0.001, lamb_weight_decay=0.01, beta_1=0.9, beta_2=0.999, epsilon=1.0e-6): -__all__ = ['Adadelta', 'Adagrad', 'Adam', 'Admax', 'Ftrl', 'Nadam', 'RMSprop', 'SGD', 'Momentum', 'Lamb', 'LARS'] + if learning_rate is None: + raise ValueError('learning_rate is not set.') + if lamb_weight_decay is None: + raise ValueError('lamb_weight_decay is not set.') + if beta_1 is None: + raise ValueError('beta_1 is not set.') + if beta_2 is None: + raise ValueError('beta_2 is not set.') + if epsilon is None: + raise ValueError('epsilon is not set.') -# Add module aliases + if not 0 <= beta_1 < 1: + raise ValueError("Invaild value of beta1, expect beta1 in [0,1).") + if not 0 <= beta_2 < 1: + raise ValueError("Invaild value of beta2, expect beta2 in [0,1).") -# learning_rate=0.001, rho=0.95, epsilon=1e-07, name='Adadelta' -Adadelta = None + self.learning_rate = learning_rate + self.lamb_weight_decay = lamb_weight_decay + self.beta_1 = beta_1 + self.beta_2 = beta_2 + self.epsilon = epsilon -# learning_rate=0.001, initial_accumulator_value=0.1, epsilon=1e-07,name='Adagrad' -Adagrad = None + def gradient(self, loss, weights): + if loss is None: + raise ValueError('loss is not set.') + if weights is None: + raise ValueError('weights is not set.') -# learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,name='Adam' -Adam = None + self.lamb = paddle.optimizer.Lamb( + learning_rate=self.learning_rate, lamb_weight_decay=self.lamb_weight_decay, beta1=self.beta_1, + beta2=self.beta_2, epsilon=self.epsilon, parameters=weights + ) + loss.backward() + weights_and_grads = self.lamb.backward(loss=loss, parameters=weights) -# learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name='Adamax' -Admax = None + return weights_and_grads -# learning_rate=0.001, learning_rate_power=-0.5, initial_accumulator_value=0.1, -# l1_regularization_strength=0.0, l2_regularization_strength=0.0, name='Ftrl',l2_shrinkage_regularization_strength=0.0 -Ftrl = None + def apply_gradients(self, weights_and_grads): + if weights_and_grads is None: + raise ValueError('weights_and_grads is not set.') + self.lamb._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads) + self.lamb.clear_grad() -# learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name='Nadam', -Nadam = None -# learning_rate=0.001, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False,name='RMSprop' -RMSprop = None +class LARS(Optimizer): -# learning_rate=0.01, momentum=0.0, nesterov=False, name='SGD' -SGD = None + def __init__(self): -# learning_rate, momentum, use_locking=False, name='Momentum', use_nesterov=False -Momentum = None + pass + def gradient(self): -def Lamb(**kwargs): - raise Exception('Lamb optimizer function not implemented') + pass + def apply_gradients(self, weights_and_grads): -def LARS(**kwargs): - raise Exception('LARS optimizer function not implemented') + raise Exception('LARS optimizer function not implemented') diff --git a/tensorlayer/optimizers/tensorflow_optimizers.py b/tensorlayer/optimizers/tensorflow_optimizers.py index 0cae4cc..971df38 100644 --- a/tensorlayer/optimizers/tensorflow_optimizers.py +++ b/tensorlayer/optimizers/tensorflow_optimizers.py @@ -4,7 +4,7 @@ from __future__ import absolute_import, division, print_function import tensorflow as tf -__all__ = ['Adadelta', 'Adagrad', 'Adam', 'Admax', 'Ftrl', 'Nadam', 'RMSprop', 'SGD', 'Momentum', 'Lamb', 'LARS'] +__all__ = ['Adadelta', 'Adagrad', 'Adam', 'Adamax', 'Ftrl', 'Nadam', 'RMSprop', 'SGD', 'Momentum', 'Lamb', 'LARS'] # Add module aliases @@ -18,7 +18,7 @@ Adagrad = tf.optimizers.Adagrad Adam = tf.optimizers.Adam # learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name='Adamax' -Admax = tf.optimizers.Adamax +Adamax = tf.optimizers.Adamax # learning_rate=0.001, learning_rate_power=-0.5, initial_accumulator_value=0.1, # l1_regularization_strength=0.0, l2_regularization_strength=0.0, name='Ftrl',l2_shrinkage_regularization_strength=0.0 diff --git a/tests/dataflow/__init__.py b/tests/dataflow/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dataflow/test_dataflow_image.py b/tests/dataflow/test_dataflow_image.py new file mode 100644 index 0000000..dcdf64d --- /dev/null +++ b/tests/dataflow/test_dataflow_image.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import unittest + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + +import tensorlayer as tl + +from tests.utils import CustomTestCase + + +class Dataflow_Image_Test(CustomTestCase): + + @classmethod + def setUpClass(self): + self.input_shape = [1, 100, 100, 3] + self.input_layer = tl.layers.Input(self.input_shape, name='input_layer') + self.input_shape_1 = [100, 100, 3] + self.input_layer_1 = tl.layers.Input(self.input_shape_1, name='input_layer_1') + + self.centralcrop_1 = tl.dataflow.image.CentralCrop(self.input_layer, central_fraction=0.5) + self.centralcrop_2 = tl.dataflow.image.CentralCrop(self.input_layer, size=60) + + self.hsvtorgb = tl.dataflow.image.HsvToRgb(self.input_layer) + + self.adjustbrightness = tl.dataflow.image.AdjustBrightness(self.input_layer, factor=0.5) + self.adjustconstrast = tl.dataflow.image.AdjustContrast(self.input_layer, factor=0.5) + self.adjusthue = tl.dataflow.image.AdjustHue(self.input_layer, factor=0.5) + self.adjustsaturation = tl.dataflow.image.AdjustSaturation(self.input_layer, factor=0.5) + + self.crop = tl.dataflow.image.Crop( + self.input_layer, offset_height=20, offset_width=20, target_height=60, target_width=60 + ) + + self.fliphorizontal = tl.dataflow.image.FlipHorizontal(self.input_layer) + self.flipvertical = tl.dataflow.image.FlipVertical(self.input_layer) + + self.rgbtogray = tl.dataflow.image.RgbToGray(self.input_layer) + self.graytorgb = tl.dataflow.image.GrayToRgb(self.rgbtogray) + + self.padtoboundingbox = tl.dataflow.image.PadToBoundingbox( + self.input_layer, offset_height=20, offset_width=20, target_height=150, target_width=150 + ) + + self.pad_1 = tl.dataflow.image.Pad(self.input_layer, padding=10, padding_value=1, mode='constant') + self.pad_2 = tl.dataflow.image.Pad(self.input_layer, padding=(10, 10), mode='REFLECT') + self.pad_3 = tl.dataflow.image.Pad(self.input_layer, padding=(10, 20, 30, 40), mode='SYMMETRIC') + + self.standardization_1 = tl.dataflow.image.Standardization( + self.input_layer, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5) + ) + self.standardization_2 = tl.dataflow.image.Standardization(self.input_layer, channel_mode=False) + self.standardization_3 = tl.dataflow.image.Standardization(self.input_layer, channel_mode=True) + + self.randombrightness = tl.dataflow.image.RandomBrightness(self.input_layer, factor=0.5) + self.randomcontrast = tl.dataflow.image.RandomContrast(self.input_layer, lower=0.2, upper=0.5) + self.randomhue = tl.dataflow.image.RandomHue(self.input_layer, factor=0.5) + self.randomsaturation = tl.dataflow.image.RandomSaturation(self.input_layer, lower=0.2, upper=0.5) + + self.randomcrop_1 = tl.dataflow.image.RandomCrop(self.input_layer, size=50) + self.randomcrop_2 = tl.dataflow.image.RandomCrop(self.input_layer, size=(50, 60)) + + self.resize_1 = tl.dataflow.image.Resize( + self.input_layer, size=46, method='bilinear', preserve_aspect_ratio=False, antialias=True + ) + + self.resize_2 = tl.dataflow.image.Resize( + self.input_layer, size=(32, 45), method='bilinear', preserve_aspect_ratio=True, antialias=False + ) + + self.croporpad = tl.dataflow.image.CropOrPad(self.input_layer, target_height=50, target_width=150) + self.resizeandpad = tl.dataflow.image.ResizeAndPad( + self.input_layer, target_height=50, target_width=150, method='bilinear' + ) + self.rgbtohsv = tl.dataflow.image.RgbToHsv(self.input_layer) + self.transpose = tl.dataflow.image.Transpose(self.input_layer, order=(3, 2, 1, 0)) + self.randomrotation = tl.dataflow.image.RandomRotation( + self.input_layer_1, degrees=60, fill_mode='nearest', fill_value=1 + ) + self.randomshift_1 = tl.dataflow.image.RandomShift( + self.input_layer_1, shift=0.5, fill_mode='nearest', fill_value=0 + ) + self.randomshift_2 = tl.dataflow.image.RandomShift( + self.input_layer_1, shift=(0.5, 0.4), fill_mode='nearest', fill_value=0 + ) + + self.randomshear = tl.dataflow.image.RandomShear( + self.input_layer_1, degree=30, fill_mode='nearest', fill_value=1 + ) + + self.randomzoom_1 = tl.dataflow.image.RandomZoom( + self.input_layer_1, zoom_range=0.5, fill_mode='nearest', fill_value=1 + ) + self.randomzoom_2 = tl.dataflow.image.RandomZoom( + self.input_layer_1, zoom_range=(0.5, 0.4), fill_mode='nearest', fill_value=1 + ) + + self.rescale = tl.dataflow.image.Rescale(self.input_layer, scale=3, offset=4) + self.randomflipvertical = tl.dataflow.image.RandomFlipVertical(self.input_layer) + self.randomfliphorizontal = tl.dataflow.image.RandomFlipHorizontal(self.input_layer) + self.hwc2chw = tl.dataflow.image.HWC2CHW(self.input_layer) + self.chw2hwc = tl.dataflow.image.CHW2HWC(self.hwc2chw) + + @classmethod + def tearDownClass(self): + pass + + def test_centralcrop_1(self): + + self.assertEqual(tl.get_tensor_shape(self.centralcrop_1), [1, 50, 50, 3]) + + def test_centralcrop_2(self): + + self.assertEqual(tl.get_tensor_shape(self.centralcrop_2), [1, 60, 60, 3]) + + def test_hsvtorgb(self): + + self.assertEqual(tl.get_tensor_shape(self.hsvtorgb), [1, 100, 100, 3]) + + def test_adjustbrightness(self): + + self.assertEqual(tl.get_tensor_shape(self.adjustbrightness), [1, 100, 100, 3]) + + def test_adjustconstrast(self): + + self.assertEqual(tl.get_tensor_shape(self.adjustconstrast), [1, 100, 100, 3]) + + def test_adjusthue(self): + + self.assertEqual(tl.get_tensor_shape(self.adjusthue), [1, 100, 100, 3]) + + def test_adjustsaturation(self): + + self.assertEqual(tl.get_tensor_shape(self.adjustsaturation), [1, 100, 100, 3]) + + def test_crop(self): + + self.assertEqual(tl.get_tensor_shape(self.crop), [1, 60, 60, 3]) + + def test_fliphorizontal(self): + + self.assertEqual(tl.get_tensor_shape(self.fliphorizontal), [1, 100, 100, 3]) + + def test_flipvertical(self): + + self.assertEqual(tl.get_tensor_shape(self.flipvertical), [1, 100, 100, 3]) + + def test_rgbtogray(self): + + self.assertEqual(tl.get_tensor_shape(self.rgbtogray), [1, 100, 100, 1]) + + def test_graytorgb(self): + + self.assertEqual(tl.get_tensor_shape(self.graytorgb), [1, 100, 100, 3]) + + def test_padtoboundingbox(self): + + self.assertEqual(tl.get_tensor_shape(self.padtoboundingbox), [1, 150, 150, 3]) + + def test_pad_1(self): + + self.assertEqual(tl.get_tensor_shape(self.pad_1), [1, 120, 120, 3]) + + def test_pad_2(self): + + self.assertEqual(tl.get_tensor_shape(self.pad_2), [1, 120, 120, 3]) + + def test_pad_3(self): + + self.assertEqual(tl.get_tensor_shape(self.pad_3), [1, 130, 170, 3]) + + def test_standardization_1(self): + + self.assertEqual(tl.get_tensor_shape(self.standardization_1), [1, 100, 100, 3]) + + def test_standardization_2(self): + + self.assertEqual(tl.get_tensor_shape(self.standardization_2), [1, 100, 100, 3]) + + def test_standardization_3(self): + + self.assertEqual(tl.get_tensor_shape(self.standardization_3), [1, 100, 100, 3]) + + def test_randomcontrast(self): + + self.assertEqual(tl.get_tensor_shape(self.randomcontrast), [1, 100, 100, 3]) + + def test_randomhue(self): + + self.assertEqual(tl.get_tensor_shape(self.randomhue), [1, 100, 100, 3]) + + def test_randomsaturation(self): + + self.assertEqual(tl.get_tensor_shape(self.randomsaturation), [1, 100, 100, 3]) + + def test_randomcrop_1(self): + + self.assertEqual(tl.get_tensor_shape(self.randomcrop_1), [1, 50, 50, 3]) + + def test_randomcrop_2(self): + + self.assertEqual(tl.get_tensor_shape(self.randomcrop_2), [1, 50, 60, 3]) + + def test_resize_1(self): + + self.assertEqual(tl.get_tensor_shape(self.resize_1), [1, 46, 46, 3]) + + def test_resize_2(self): + + self.assertEqual(tl.get_tensor_shape(self.resize_2), [1, 32, 32, 3]) + + def test_croporpad(self): + + self.assertEqual(tl.get_tensor_shape(self.croporpad), [1, 50, 150, 3]) + + def test_resizeandpad(self): + + self.assertEqual(tl.get_tensor_shape(self.resizeandpad), [1, 50, 150, 3]) + + def test_rgbtohsv(self): + + self.assertEqual(tl.get_tensor_shape(self.rgbtohsv), [1, 100, 100, 3]) + + def test_transpose(self): + + self.assertEqual(tl.get_tensor_shape(self.transpose), [3, 100, 100, 1]) + + def test_randomrotation(self): + + self.assertEqual(tl.get_tensor_shape(self.randomrotation), [100, 100, 3]) + + def test_randomshift_1(self): + + self.assertEqual(tl.get_tensor_shape(self.randomshift_1), [100, 100, 3]) + + def test_randomshift_2(self): + + self.assertEqual(tl.get_tensor_shape(self.randomshift_2), [100, 100, 3]) + + def test_randoshear(self): + + self.assertEqual(tl.get_tensor_shape(self.randomshear), [100, 100, 3]) + + def test_randomzoom_1(self): + + self.assertEqual(tl.get_tensor_shape(self.randomzoom_1), [100, 100, 3]) + + def test_randomzoom_2(self): + + self.assertEqual(tl.get_tensor_shape(self.randomzoom_2), [100, 100, 3]) + + def test_rescale(self): + + self.assertEqual(tl.get_tensor_shape(self.rescale), [1, 100, 100, 3]) + + def test_randomflipvertical(self): + + self.assertEqual(tl.get_tensor_shape(self.randomflipvertical), [1, 100, 100, 3]) + + def test_randomfliphorizontal(self): + + self.assertEqual(tl.get_tensor_shape(self.randomfliphorizontal), [1, 100, 100, 3]) + + def test_hwc2chw(self): + + self.assertEqual(tl.get_tensor_shape(self.hwc2chw), [1, 3, 100, 100]) + + def test_chw2hwc(self): + + self.assertEqual(tl.get_tensor_shape(self.chw2hwc), [1, 100, 100, 3]) + + +if __name__ == '__main__': + + tl.logging.set_verbosity(tl.logging.DEBUG) + + unittest.main() diff --git a/tests/layers/test_layers_convolution.py b/tests/layers/test_layers_convolution.py index 20fb15a..df2f69c 100644 --- a/tests/layers/test_layers_convolution.py +++ b/tests/layers/test_layers_convolution.py @@ -29,6 +29,19 @@ class Layer_Convolution_1D_Test(CustomTestCase): self.dconv1dlayer1 = tl.layers.DeConv1d(n_filter=64, in_channels=32, filter_size=5, name='deconv1dlayer') self.n3 = self.dconv1dlayer1(self.n2) + self.separableconv1d1 = tl.layers.SeparableConv1d(in_channels=1, n_filter=16, filter_size=3, stride=2) + self.n4 = self.separableconv1d1(self.input_layer) + + self.separableconv1d2 = tl.layers.SeparableConv1d( + in_channels=1, n_filter=16, filter_size=3, stride=2, depth_multiplier=4 + ) + self.n5 = self.separableconv1d2(self.input_layer) + + self.separableconv1d3 = tl.layers.SeparableConv1d( + in_channels=1, n_filter=16, filter_size=3, stride=2, depth_multiplier=4, b_init=None + ) + self.n6 = self.separableconv1d3(self.input_layer) + @classmethod def tearDownClass(self): pass @@ -45,6 +58,18 @@ class Layer_Convolution_1D_Test(CustomTestCase): self.assertEqual(len(self.dconv1dlayer1.all_weights), 2) self.assertEqual(tl.get_tensor_shape(self.n3), [self.batch_size, 25, 64]) + def test_layer_n4(self): + self.assertEqual(len(self.separableconv1d1.all_weights), 3) + self.assertEqual(tl.get_tensor_shape(self.n4), [self.batch_size, 50, 16]) + + def test_layer_n5(self): + self.assertEqual(len(self.separableconv1d2.all_weights), 3) + self.assertEqual(tl.get_tensor_shape(self.n5), [self.batch_size, 50, 16]) + + def test_layer_n6(self): + self.assertEqual(len(self.separableconv1d3.all_weights), 2) + self.assertEqual(tl.get_tensor_shape(self.n6), [self.batch_size, 50, 16]) + class Layer_Convolution_2D_Test(CustomTestCase): @@ -55,29 +80,58 @@ class Layer_Convolution_2D_Test(CustomTestCase): self.inputs_shape = [self.batch_size, 400, 400, 3] self.input_layer = tl.layers.Input(self.inputs_shape, name='input_layer') - self.conv2dlayer1 = tl.layers.Conv2d(n_filter=32, in_channels=3, strides=(2, 2), filter_size=(5, 5), - padding='SAME', b_init=tl.initializers.truncated_normal(0.01), name='conv2dlayer' + self.conv2dlayer1 = tl.layers.Conv2d( + n_filter=32, in_channels=3, strides=(2, 2), filter_size=(5, 5), padding='SAME', + b_init=tl.initializers.truncated_normal(0.01), name='conv2dlayer' ) self.n1 = self.conv2dlayer1(self.input_layer) - self.conv2dlayer2 = tl.layers.Conv2d(n_filter=32, in_channels=32, filter_size=(3, 3), - strides=(2, 2), act=None, name='conv2d') + self.conv2dlayer2 = tl.layers.Conv2d( + n_filter=32, in_channels=32, filter_size=(3, 3), strides=(2, 2), act=None, name='conv2d' + ) self.n2 = self.conv2dlayer2(self.n1) - self.conv2dlayer3 = tl.layers.Conv2d(in_channels=32, n_filter=32, filter_size=(3, 3), strides=(2, 2), - act=tl.ReLU, b_init=None, name='conv2d_no_bias' + self.conv2dlayer3 = tl.layers.Conv2d( + in_channels=32, n_filter=32, filter_size=(3, 3), strides=(2, 2), act=tl.ReLU, b_init=None, + name='conv2d_no_bias' ) self.n3 = self.conv2dlayer3(self.n2) - self.dconv2dlayer = tl.layers.DeConv2d(n_filter=32, in_channels=32, filter_size=(5, 5), strides=(2, 2), - name='deconv2dlayer' + self.dconv2dlayer = tl.layers.DeConv2d( + n_filter=32, in_channels=32, filter_size=(5, 5), strides=(2, 2), name='deconv2dlayer' ) self.n4 = self.dconv2dlayer(self.n3) - self.dwconv2dlayer = tl.layers.DepthwiseConv2d(in_channels=32, filter_size=(3, 3), strides=(1, 1), - dilation_rate=(2, 2), act=tl.ReLU, depth_multiplier=2,name='depthwise') + self.dwconv2dlayer = tl.layers.DepthwiseConv2d( + in_channels=32, filter_size=(3, 3), strides=(1, 1), dilation_rate=(2, 2), act=tl.ReLU, depth_multiplier=2, + name='depthwise' + ) self.n5 = self.dwconv2dlayer(self.n4) + self.separableconv2d = tl.layers.SeparableConv2d( + in_channels=3, filter_size=(3, 3), strides=(2, 2), dilation_rate=(2, 2), act=tl.ReLU, depth_multiplier=3, + name='separableconv2d' + ) + self.n6 = self.separableconv2d(self.input_layer) + + self.groupconv2d = tl.layers.GroupConv2d( + in_channels=3, n_filter=18, filter_size=(3, 3), strides=(2, 2), dilation_rate=(3, 3), n_group=3, + act=tl.ReLU, name='groupconv2d' + ) + self.n7 = self.groupconv2d(self.input_layer) + + self.binaryconv2d = tl.layers.BinaryConv2d( + in_channels=3, n_filter=32, filter_size=(3, 3), strides=(2, 2), dilation_rate=(2, 2), act=tl.ReLU, + name='binaryconv2d' + ) + self.n8 = self.binaryconv2d(self.input_layer) + + self.dorefaconv2d = tl.layers.DorefaConv2d( + bitA=2, bitW=8, in_channels=3, n_filter=16, filter_size=(3, 3), strides=(2, 2), dilation_rate=(2, 2), + act=tl.ReLU, name='dorefaconv2d' + ) + self.n9 = self.dorefaconv2d(self.input_layer) + @classmethod def tearDownClass(cls): pass @@ -103,6 +157,22 @@ class Layer_Convolution_2D_Test(CustomTestCase): self.assertEqual(len(self.dwconv2dlayer.all_weights), 2) self.assertEqual(tl.get_tensor_shape(self.n5), [self.batch_size, 100, 100, 64]) + def test_layer_n6(self): + self.assertEqual(len(self.separableconv2d.all_weights), 3) + self.assertEqual(tl.get_tensor_shape(self.n6), [self.batch_size, 198, 198, 32]) + + def test_layer_n7(self): + self.assertEqual(len(self.groupconv2d.all_weights), 2) + self.assertEqual(tl.get_tensor_shape(self.n7), [self.batch_size, 200, 200, 18]) + + def test_layer_n8(self): + self.assertEqual(len(self.binaryconv2d.all_weights), 2) + self.assertEqual(tl.get_tensor_shape(self.n8), [self.batch_size, 198, 198, 32]) + + def test_layer_n9(self): + self.assertEqual(len(self.dorefaconv2d.all_weights), 2) + self.assertEqual(tl.get_tensor_shape(self.n9), [self.batch_size, 200, 200, 16]) + class Layer_Convolution_3D_Test(CustomTestCase): @@ -117,12 +187,13 @@ class Layer_Convolution_3D_Test(CustomTestCase): self.conv3dlayer1 = tl.layers.Conv3d(n_filter=32, in_channels=3, filter_size=(2, 2, 2), strides=(2, 2, 2)) self.n1 = self.conv3dlayer1(self.input_layer) - self.deconv3dlayer = tl.layers.DeConv3d(n_filter=128, in_channels=32, filter_size=(2, 2, 2), strides=(2, 2, 2) - ) + self.deconv3dlayer = tl.layers.DeConv3d(n_filter=128, in_channels=32, filter_size=(2, 2, 2), strides=(2, 2, 2)) self.n2 = self.deconv3dlayer(self.n1) - self.conv3dlayer2 = tl.layers.Conv3d(n_filter=64, in_channels=128,filter_size=(3, 3, 3), strides=(3, 3, 3), - act=tl.ReLU, b_init=None, name='conv3d_no_bias') + self.conv3dlayer2 = tl.layers.Conv3d( + n_filter=64, in_channels=128, filter_size=(3, 3, 3), strides=(3, 3, 3), act=tl.ReLU, b_init=None, + name='conv3d_no_bias' + ) self.n3 = self.conv3dlayer2(self.n2) @classmethod diff --git a/tests/layers/test_layers_pooling.py b/tests/layers/test_layers_pooling.py index 39582aa..65643fc 100644 --- a/tests/layers/test_layers_pooling.py +++ b/tests/layers/test_layers_pooling.py @@ -30,6 +30,8 @@ class Layer_Pooling_Test(CustomTestCase): n16 = tl.layers.MaxPool1d(filter_size=3, strides=1, padding='VALID', dilation_rate=2, name='test_maxpool1d')(n1) n17 = tl.layers.MeanPool1d(filter_size=3, strides=1, padding='VALID', dilation_rate=2, name='test_meanpool1d')(n1) + n19 = tl.layers.AdaptiveMeanPool1d(output_size=44, name='test_adaptivemeanpool1d')(n1) + n20 = tl.layers.AdaptiveMaxPool1d(output_size=44, name='test_adaptivemaxpool1d')(n1) cls.n1_shape = n1.get_shape().as_list() cls.n2_shape = n2.get_shape().as_list() @@ -38,6 +40,8 @@ class Layer_Pooling_Test(CustomTestCase): cls.n5_shape = n5.get_shape().as_list() cls.n16_shape = n16.get_shape().as_list() cls.n17_shape = n17.get_shape().as_list() + cls.n19_shape = n19.get_shape().as_list() + cls.n20_shape = n20.get_shape().as_list() ## 2D ======================================================================== @@ -51,6 +55,8 @@ class Layer_Pooling_Test(CustomTestCase): n10 = tl.layers.GlobalMeanPool2d(name='test_meanpool2d')(n6) n15 = tl.layers.PoolLayer(name='test_pool2d')(n6) # n18 = tl.layers.CornerPool2d('TopLeft', name='test_cornerpool2d')(n6) + n21 = tl.layers.AdaptiveMeanPool2d(output_size=(45, 32), name='test_adaptivemeanpool2d')(n6) + n22 = tl.layers.AdaptiveMaxPool2d(output_size=(45, 32), name='test_adaptivemaxpool2d')(n6) cls.n6_shape = n6.get_shape().as_list() cls.n7_shape = n7.get_shape().as_list() @@ -59,7 +65,8 @@ class Layer_Pooling_Test(CustomTestCase): cls.n10_shape = n10.get_shape().as_list() cls.n15_shape = n15.get_shape().as_list() # cls.n18_shape = n18.get_shape().as_list() - + cls.n21_shape = n21.get_shape().as_list() + cls.n22_shape = n22.get_shape().as_list() ## 3D ======================================================================== @@ -73,10 +80,17 @@ class Layer_Pooling_Test(CustomTestCase): n14 = tl.layers.MaxPool3d(filter_size=(3, 3, 3), strides=(2, 2, 2), padding='SAME', name='test_maxpool3d')(nin_3) + n23 = tl.layers.AdaptiveMeanPool3d(output_size=(45, 32, 55), name='test_adaptivemeanpool3d')(nin_3) + n24 = tl.layers.AdaptiveMaxPool3d(output_size=(45, 32, 55), name='test_adaptivemaxpool3d')(nin_3) + cls.n11_shape = n11.get_shape().as_list() cls.n12_shape = n12.get_shape().as_list() cls.n13_shape = n13.get_shape().as_list() cls.n14_shape = n14.get_shape().as_list() + cls.n21_shape = n21.get_shape().as_list() + cls.n22_shape = n22.get_shape().as_list() + cls.n23_shape = n23.get_shape().as_list() + cls.n24_shape = n24.get_shape().as_list() @classmethod def tearDownClass(cls): @@ -134,6 +148,24 @@ class Layer_Pooling_Test(CustomTestCase): def test_n17_shape(self): self.assertEqual(self.n17_shape[1:4], [46, 32]) + def test_n19_shape(self): + self.assertEqual(self.n19_shape[1:3], [44, 32]) + + def test_n20_shape(self): + self.assertEqual(self.n20_shape[1:3], [44, 32]) + + def test_n21_shape(self): + self.assertEqual(self.n21_shape[1:4], [45, 32, 32]) + + def test_n22_shape(self): + self.assertEqual(self.n22_shape[1:4], [45, 32, 32]) + + def test_n23_shape(self): + self.assertEqual(self.n23_shape[1:5], [45, 32, 55, 3]) + + def test_n24_shape(self): + self.assertEqual(self.n24_shape[1:5], [45, 32, 55, 3]) + # def test_n18_shape(self): # self.assertEqual(self.n18_shape[1:], [50, 50, 32])