Browse Source

update ops headers

tags/v0.7.0-beta
zhangzhenghai 4 years ago
parent
commit
d91cff537e
19 changed files with 168 additions and 959 deletions
  1. +0
    -2
      third_party/fwkacllib/inc/ops/all_ops.h
  2. +0
    -37
      third_party/fwkacllib/inc/ops/array_ops.h
  3. +1
    -71
      third_party/fwkacllib/inc/ops/ctc_ops.h
  4. +11
    -107
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  5. +10
    -1
      third_party/fwkacllib/inc/ops/image_ops.h
  6. +3
    -3
      third_party/fwkacllib/inc/ops/math_ops.h
  7. +0
    -39
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  8. +15
    -15
      third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
  9. +56
    -56
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  10. +34
    -34
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  11. +5
    -55
      third_party/fwkacllib/inc/ops/nn_norm_ops.h
  12. +12
    -310
      third_party/fwkacllib/inc/ops/nn_pooling_ops.h
  13. +1
    -1
      third_party/fwkacllib/inc/ops/nn_training_ops.h
  14. +4
    -4
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  15. +2
    -185
      third_party/fwkacllib/inc/ops/reduce_ops.h
  16. +1
    -8
      third_party/fwkacllib/inc/ops/rnn.h
  17. +5
    -6
      third_party/fwkacllib/inc/ops/selection_ops.h
  18. +3
    -6
      third_party/fwkacllib/inc/ops/split_combination_ops.h
  19. +5
    -19
      third_party/fwkacllib/inc/ops/transformation_ops.h

+ 0
- 2
third_party/fwkacllib/inc/ops/all_ops.h View File

@@ -31,9 +31,7 @@
#include "functional_ops.h"
#include "get_data_ops.h"
#include "hcom_ops.h"
#include "hvd_ops.h"
#include "image_ops.h"
#include "internal_ops.h"
#include "linalg_ops.h"
#include "logging_ops.h"
#include "lookup_ops.h"


+ 0
- 37
third_party/fwkacllib/inc/ops/array_ops.h View File

@@ -1084,43 +1084,6 @@ REG_OP(TransShape)
.ATTR(outShape,ListInt ,{})
.OP_END_FACTORY_REG(TransShape);

/**
*@brief Computes the (possibly normalized) Levenshtein Edit Distance.

*@par Inputs:
*@li hypothesis_indices: The indices of the hypothesis list SparseTensor.\n
This is an N x R int64 matrix.
*@li hypothesis_shape: The values of the hypothesis list SparseTensor.\n
This is an N-length vector.
*@li hypothesis_shape: The shape of the hypothesis list SparseTensor.\n
This is an R-length vector.
*@li truth_indices: The indices of the truth list SparseTensor.\n
This is an M x R int64 matrix.
*@li truth_shape: The values of the truth list SparseTensor.\n
This is an M-length vector.
*@li truth_shape: The shape of the truth list SparseTensor.\n
This is an R-length vector

*@par Attributes:
*@li normalize: boolean (if true, edit distances are normalized by length of truth).

*@par Outputs:
*@li output: A dense float tensor with rank R - 1.

*@par Third-party framework compatibility
* Compatible with TensorFlow EditDistance operator.
*/
REG_OP(EditDistance)
.INPUT(hypothesis_indices, TensorType({DT_INT64}))
.INPUT(hypothesis_values, TensorType::BasicType())
.INPUT(hypothesis_shape, TensorType({DT_INT64}))
.INPUT(truth_indices, TensorType({DT_INT64}))
.INPUT(truth_values, TensorType::BasicType())
.INPUT(truth_shape, TensorType({DT_INT64}))
.ATTR(normalize, Bool, true)
.OUTPUT(output, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(EditDistance)

} // namespace ge

#endif // GE_OP_ARRAY_OPS_H_

+ 1
- 71
third_party/fwkacllib/inc/ops/ctc_ops.h View File

@@ -50,6 +50,7 @@ If not specified, defaults to true
*@par Third-party framework compatibility
* Compatible with TensorFlow CTCLoss operator.
*/

REG_OP(CTCLoss)
.INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(labels_indices, TensorType({DT_INT64}))
@@ -62,77 +63,6 @@ REG_OP(CTCLoss)
.ATTR(ignore_longer_outputs_than_inputs, Bool, false)
.OP_END_FACTORY_REG(CTCLoss)

/**
*@brief Performs greedy decoding on the logits given in inputs.

*@par Inputs:
*@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`.

*@par Attributes:
*@li merge_repeated: If True, merge repeated classes in output.

*@par Outputs:
*@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,\n
of a `SparseTensor<int64, 2>`. The rows store: [batch, time].
*@li decoded_values: Values vector, size: `(total_decoded_outputs)`,\n
of a `SparseTensor<int64, 2>`. The vector stores the decoded classes.
*@li decoded_shape: Shape vector, size `(2)`, of the decoded SparseTensor.\n
Values are: `[batch_size, max_decoded_length]`.
*@li log_probability: Matrix, size `(batch_size x 1)`, containing sequence\n
log-probabilities.

*@par Third-party framework compatibility
* Compatible with TensorFlow CTCGreedyDecoder operator.
*/
REG_OP(CTCGreedyDecoder)
.INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(sequence_length, TensorType({DT_INT32}))
.ATTR(merge_repeated, Bool, false)
.OUTPUT(decoded_indices, TensorType({DT_INT64}))
.OUTPUT(decoded_values, TensorType({DT_INT64}))
.OUTPUT(decoded_shape, TensorType({DT_INT64}))
.OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(CTCGreedyDecoder)

/**
*@brief Performs beam search decoding on the logits given in input.

*@par Inputs:
*@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
*@li sequence_length: A vector containing sequence lengths, size `(batch_size)`.

*@par Attributes:
*@li merge_repeated: If True, merge repeated classes in output.

*@par Outputs:
*@li decoded_indices: A list (length: top_paths) of indices matrices. Matrix j,\n
size `(total_decoded_outputs[j] x 2)`, has indices of a\n
`SparseTensor<int64, 2>`. The rows store: [batch, time].
*@li decoded_values: A list (length: top_paths) of values vectors. Vector j,\n
size `(length total_decoded_outputs[j])`, has the values of a\n
`SparseTensor<int64, 2>`. The vector stores the decoded classes for beam j.
*@li decoded_shape: A list (length: top_paths) of shape vector. Vector j,\n
size `(2)`, stores the shape of the decoded `SparseTensor[j]`.\n
Its values are: `[batch_size, max_decoded_length[j]]`.
*@li log_probability: A matrix, shaped: `(batch_size x top_paths)`. The\n
sequence log-probabilities.

*@par Third-party framework compatibility
* Compatible with TensorFlow CTCBeamSearchDecoder operator.
*/
REG_OP(CTCBeamSearchDecoder)
.INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
.INPUT(sequence_length, TensorType({DT_INT32}))
.REQUIRED_ATTR(beam_width, Int)
.REQUIRED_ATTR(top_paths, Int)
.ATTR(merge_repeated, Bool, true)
.DYNAMIC_OUTPUT(decoded_indices, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(decoded_values, TensorType({DT_INT64}))
.DYNAMIC_OUTPUT(decoded_shape, TensorType({DT_INT64}))
.OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(CTCBeamSearchDecoder)

} // namespace ge

#endif //GE_OP_CTC_OPS_H

+ 11
- 107
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -483,9 +483,9 @@ REG_OP(Equal)
*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128.

*@par Attributes:
*@li base: An optional attribute of type float32, specifying the base gamma. Defaults to "-1.0".
*@li scale: An optional attribute of type float32, specifying the scale alpha. Defaults to "1.0".
*@li shift: An optional attribute of type float32, specifying the shift beta. Defaults to "0.0".
*@li base: An optional attribute of type float32, specifying the base gamma. Defaults to "-1".
*@li scale: An optional attribute of type float32, specifying the scale alpha. Defaults to "1".
*@li shift: An optional attribute of type float32, specifying the shift beta. Defaults to "0".

*@par Outputs:
*y: A Tensor of the same type as "x".
@@ -1016,17 +1016,17 @@ REG_OP(BesselI1e)
* y = log_base(shift + scale * x), with "base" > 0.

* @par Inputs:
* @li x: A Tensor of type complex64, complex128, float16, float32 or double.
* @li x: A Tensor of type UnaryDataType.

* @par Attributes:
* @li base: An optional float32, specifying the base "e". Defaults to "-1.0"
* @li base: An optional float32, specifying the base "e". Defaults to "-1"

* @li scale: An optional float32, specifying the scale of input "x". Defaults
* to "1.0"
* @li shift: An optional float32, specifying the shift. Defaults to "0.0"
* to "1"
* @li shift: An optional float32, specifying the shift. Defaults to "0"

* @par Outputs:
* y: A Tensor has same type as "x".
* y: A Tensor of type UnaryDataType.

* @attention Constraints:
* @li "base" is supposed to be greater than 0. Retaining the default
@@ -2262,7 +2262,7 @@ REG_OP(ArgMinD)
*dtype: The output type, either "int32" or "int64". Defaults to "int64".

*@par Outputs:
*y: A multi-dimensional Tensor of type int32 or int64, specifying the index with the largest value. The dimension is one less than that of "x".
*y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x".

*@attention Constraints:
*@li x: If there are multiple maximum values, the index of the first maximum value is used.
@@ -2398,8 +2398,8 @@ REG_OP(ArgMinWithValue)
*y: A Tensor. Has the same type and format as "x".

*@par Attributes:
*@li N: A required attribute. the number of input x, max size is 32. Type is int.
*@li model: An optional attribute. Type is int. Defaults to "1".
*@li N: A required attribute. the number of input x, max size is 32.
*@li model: An optional attribute. Defaults to "1".
* "0": product, "1": sum, "2": max.
*@li coeff: A required attribute. Must met all of following rules:
* size of "coeff" must be equal to len("x") or is null.
@@ -2692,86 +2692,6 @@ REG_OP(AdamApplyOne)
.OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(AdamApplyOne)

/**
*@brief A fusion operator for bert lamb.

*@par Inputs:
*Eleven inputs, including:
* @li input0: A Tensor. Must be one of the following types: float16, float32.
* @li input1: A Tensor. Must be one of the following types: float16, float32.
* @li input2: A Tensor. Must be one of the following types: float16, float32.
* @li input3: A Tensor. Must be one of the following types: float16, float32.
* @li input4: A Tensor. Must be one of the following types: float16, float32.
* @li mul0_x: A Tensor. Must be one of the following types: float16, float32.
* @li mul1_x: A Tensor. Must be one of the following types: float16, float32.
* @li mul2_x: A Tensor. Must be one of the following types: float16, float32.
* @li mul3_x: A Tensor. Must be one of the following types: float16, float32.
* @li mul4_x: A Tensor. Must be one of the following types: float16, float32.
* @li add2_y: A Tensor. Must be one of the following types: float16, float32.

*@par Outputs:
*Three outputs, including:
* @li output0: A Tensor. Must be one of the following types: float16, float32.
* @li output1: A Tensor. Must be one of the following types: float16, float32.
* @li output2: A Tensor. Must be one of the following types: float16, float32.

*/
REG_OP(AdamApplyOneWithDecayAssign)
.INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul2_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul3_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul4_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(add2_y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(output1, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(AdamApplyOneWithDecayAssign)

/**
*@brief A fusion operator for bert lamb.

*@par Inputs:
*Ten inputs, including:
* @li input0: A Tensor. Must be one of the following types: float16, float32.
* @li input1: A Tensor. Must be one of the following types: float16, float32.
* @li input2: A Tensor. Must be one of the following types: float16, float32.
* @li input3: A Tensor. Must be one of the following types: float16, float32.
* @li input4: A Tensor. Must be one of the following types: float16, float32.
* @li mul0_x: A Tensor. Must be one of the following types: float16, float32.
* @li mul1_x: A Tensor. Must be one of the following types: float16, float32.
* @li mul2_x: A Tensor. Must be one of the following types: float16, float32.
* @li mul3_x: A Tensor. Must be one of the following types: float16, float32.
* @li add2_y: A Tensor. Must be one of the following types: float16, float32.

*@par Outputs:
*Three outputs, including:
* @li output0: A Tensor. Must be one of the following types: float16, float32.
* @li output1: A Tensor. Must be one of the following types: float16, float32.
* @li output2: A Tensor. Must be one of the following types: float16, float32.

*/
REG_OP(AdamApplyOneAssign)
.INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul2_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(mul3_x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(add2_y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(output1, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT}))
.OP_END_FACTORY_REG(AdamApplyOneAssign)

/**
*@brief Confuse select, maximum, greater and sqrt.

@@ -3122,22 +3042,6 @@ REG_OP(KLDiv)
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OP_END_FACTORY_REG(KLDiv)

/**
*@brief copy data from x to y..

*@par Inputs:
*One inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32, bool.

*@par Outputs:
*y: A Tensor. Has the same type as "x".

*@par Third-party framework compatibility
*/
REG_OP(TensorMove)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
.OP_END_FACTORY_REG(TensorMove)
} // namespace ge




+ 10
- 1
third_party/fwkacllib/inc/ops/image_ops.h View File

@@ -934,7 +934,6 @@ REG_OP(EncodeJpeg)

/**
*@brief PNG-encode an image.

*@par Inputs:
*Input image must be unit8 or uint16 type. Inputs include: \n
*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] \n
@@ -1224,6 +1223,16 @@ REG_OP(CombinedNonMaxSuppression)
.ATTR(clip_boxes, Bool, true)
.OP_END_FACTORY_REG(CombinedNonMaxSuppression)

REG_OP(SpatialTransformerD)
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
.OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
.ATTR(output_size, ListInt, {-1, -1})
.ATTR(default_theta, ListFloat, {})
.ATTR(align_corners, Bool, false)
.ATTR(use_default_theta, ListBool, {})
.OP_END_FACTORY_REG(SpatialTransformerD)

} // namespace ge

#endif // GE_OP_MAGE_OPS_H_

+ 3
- 3
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -29,9 +29,9 @@ namespace ge {
* x: A Tensor of type float16 or float32.

*@par Attributes:
*@li power: Optional. Must be one of the following types: float32. Defaults to 1.0.
*@li scale: Optional. Must be one of the following types: float32. Defaults to 1.0.
*@li shift: Optional. Must be one of the following types: float32. Defaults to 0.0.
*@li power: Optional. Defaults to 1.0.
*@li scale: Optional. Defaults to 1.0.
*@li shift: Optional. Defaults to 0.0.

*@par Outputs:
* y: A Tensor. Has the same type and shape as "x".


+ 0
- 39
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -698,45 +698,6 @@ REG_OP(FullyConnection)
.ATTR(offset_x, Int, 0)
.OP_END_FACTORY_REG(FullyConnection)

/**
*@brief Also known as a "fully-connected-compress" layer, computes an inner product with a set of learned weights, and (optionally) adds biases.

*@par Inputs:
* Four inputs, including:
*@li x: A Tensor of type uint8, int8.
*@li w: A weight matrix of type int8, int8.
*@li w: A compress index matrix of type int8, int8.
*@li b: A Tensor of type float16, int32, int32.
*@li offset_w: A Tensor of type int8.i

*@par Attributes:
*@li num_output: Reserved.
*@li transpose: A bool, specifying whether to transpose, either "true" or "false". Defaults to "false".
*@li axis: Reserved.
*@li offset_x: Reserved.

*@par Outputs:
*y: The result tensor of type int32.

*@par Third-party framework compatibility
* Compatible with the Caffe operator InnerProduct.

*@par Quantization supported or not
* Yes
*/
REG_OP(FullyConnectionCompress)
.INPUT(x, TensorType({DT_UINT8, DT_INT8}))
.INPUT(w, TensorType({DT_INT8}))
.INPUT(comress_index, TensorType({DT_INT8}))
.OPTIONAL_INPUT(b, TensorType({DT_INT32}))
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
.OUTPUT(y, TensorType({DT_INT32}))
.REQUIRED_ATTR(num_output, Int)
.ATTR(transpose, Bool, false)
.ATTR(axis, Int, 1)
.ATTR(offset_x, Int, 0)
.OP_END_FACTORY_REG(FullyConnectionCompress)

/**
*@brief Computes the confusion matrix from predictions and labels.



+ 15
- 15
third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h View File

@@ -33,12 +33,12 @@ namespace ge {
* @li variance: A Tensor. Must be one of the following types: float32.

*@par Attributes:
* @li mode: A Tensor. Must be one of the following types: int. defaults: 1.
* @li epsilon: A Tensor. Must be one of the following types: float32. Defaults to 0.000001.
* @li momentum: A Tensor. Must be one of the following types: float32. Defaults to 0.9.
* @li is_training: A Tensor. Must be one of the following types: bool. Defaults to true.
* @li is_training_fusion: A Tensor. Must be one of the following types: bool. Defaults to true.
* @li moving_average_fraction: A Tensor. Must be one of the following types: float32. Defaults to 0.00300002098.
* @li mode: A Tensor. Must be one of the following types: int.
* @li epsilon: A Tensor. Must be one of the following types: float32.
* @li momentum: A Tensor. Must be one of the following types: float32.
* @li is_training: A Tensor. Must be one of the following types: bool.
* @li is_training_fusion: A Tensor. Must be one of the following types: bool.
* @li moving_average_fraction: A Tensor. Must be one of the following types: float32.

*@par Outputs:
*Three outputs, including:
@@ -83,8 +83,8 @@ REG_OP(FusedBatchNorm)
* @li save_inv_variance1: A Tensor. Must be one of the following types: float32.

*@par Attributes:
* @li epsilon: A Tensor. Must be one of the following types: float32. Defaults to 0.0.
* @li momentum: A Tensor. Must be one of the following types: float32. Defaults to 0.0.
* @li epsilon: A Tensor. Must be one of the following types: float32.
* @li momentum: A Tensor. Must be one of the following types: float32.

*@par Outputs:
*Three outputs, including:
@@ -361,14 +361,14 @@ REG_OP(BatchNormGradExt2)
*@par Inputs:
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
*@li momentum: A Tensor,represents the mean and the variance's scale factor
*@li variance: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the variance used for inference.
*@li momentum: A Tensor of type float32 or float16, represents the mean and the variance's scale factor
*@li scale: An optional tensor of type float16 or float32, no use
*@li offset: An optional tensor of type float16 or float32, no use
*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
*@li use_global_stats: mean inference mode , only can be "True".
*@li mode: An optional input, not use
*@li mode: An optional attr, not use
*@par Outputs:\n
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
*/
@@ -391,11 +391,11 @@ REG_OP(BNInference)

*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
*@li momentum: An optional float, mean and variance's Scale factor
*@li momentum: A Tensor of type float32 or float16, the mean and the variance's Scale factor
*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
*@li use_global_stats: mean inference mode , only can be "True".
*@li mode: An optional attr, not use
*@li mode: An optional inpout, not use
*@par Outputs:
*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean
*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance
@@ -418,8 +418,8 @@ REG_OP(BnHost)

*@par Inputs:
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
*@li scale: An optional tensor of type float16 or float32, no use
*@li offset: An optional tensor of type float16 or float32, no use
*@par Attributes:


+ 56
- 56
third_party/fwkacllib/inc/ops/nn_calculation_ops.h View File

@@ -143,29 +143,31 @@ REG_OP(DepthwiseConv2DBackpropFilterD)
* @par Inputs:
* Three inputs include: \n
* @li input_size: 4D shape of input tensor [N, C, H, W] or [N, H, W, C],
* support int32, int64
* @li filter: 4D filter tensor with shape of [H, W, C, K], support float16.
* support int32
* @li filter: 4D filter tensor with shape of [H, W, C, K], support float16,
* float32, double
* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C].
* Must be one of the following types: float16.
* Must be one of the following types: float16, float32, double.

* @par Attributes:
* @li strides: A required list or tuple of int32. The stride of the sliding window for
* @li strides: A required list or tuple. The stride of the sliding window for
* height and width of input "x" of the convolution.
* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height,
* stride_width, 1].
* @li dilations: An optional list or tuple of int32. The dilation factor for each
* dimension of input "x". Defaults to "[1, 1, 1, 1]".
* @li dilations: An optional list or tuple. The dilation factor for each
* dimension of input "x".
* If set to k > 1, there will be k-1 skipped cells between each filter element
* on that dimension. Must be with shape [1, 1, dilation_height, dilation_width]
* or [1, dilation_height, dilation_width, 1].
* @li pads: A required list or tuple of int32. Padding added to each dimension of the
* @li pads: A required list or tuple. Padding added to each dimension of the
* input.
* @li data_format: An optional string. Input data format, either "NHWC" or
* "NCHW". Defaults to "NHWC".
* "NCHW".

* @par Outputs:
* input_grad: Gradient of the deep convolution relative to the input with shape
* [N, C, H, W] or [N, H, W, C] Must be one of the following types: float16.
* [N, C, H, W] or [N, H, W, C] Must be one of the following types: float16,
* float32, double.

* @attention Constraints:\n
* The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but
@@ -257,8 +259,8 @@ REG_OP(DepthwiseConv2DBackpropInputD)

*@par Inputs:
*Two required inputs and two optional inputs, including: \n
* @li x: A 4D tensor of type float16 or int8, with shape [N, C, H, W] or [N, H, W, C]
* @li filter: A 4D tensor of type float16 or int8, with shape [H, W, C, K]
* @li x: A 4D tensor of type float16, with shape [N, C, H, W] or [N, H, W, C]
* @li filter: A 4D tensor of type float16, with shape [H, W, C, K]
* @li bias: An optional tensor of type float16 or int32
* @li offset_w: An optional float16 or int8, used for quantized inference

@@ -271,8 +273,8 @@ REG_OP(DepthwiseConv2DBackpropInputD)
* dimension of input "x".
* If set to k > 1, there will be k-1 skipped cells between each filter element
* on that dimension. Must be with shape [1, 1, dilation_height, dilation_width]
* or [1, dilation_height, dilation_width, 1]. Defaults to "[1, 1, 1, 1]".
* @li pads: A required list or tuple of int32. Padding added to each dimension of the
* or [1, dilation_height, dilation_width, 1].
* @li pads: A required list or tuple. Padding added to each dimension of the
* input.
* @li data_format: An optional string. Input data format, either "NHWC" or
* "NCHW". Defaults to "NHWC".
@@ -280,7 +282,7 @@ REG_OP(DepthwiseConv2DBackpropInputD)
* Defaults to 0.

* @par Outputs:
* y: 4D tensor of type float16 or int32, with shape [N, C, H, W] or [N, H, W, C]
* y: 4D tensor of type float16, with shape [N, C, H, W] or [N, H, W, C]

* @attention Constraints:\n
* The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but
@@ -460,24 +462,24 @@ REG_OP(Conv2DBackpropInputD)
* @li x: A Tensor. Must have the same type as "filter". 4D with shape
* [batch, out_channels, out_height, out_width]. Gradients with respect
* to the output of the convolution.
* @li filter: A Tensor of type float16, float32, double or int8.
* @li filter: A Tensor of type float16.
* 4D with shape [out_channels, in_channel, filter_height, filter_width].\n
* Two optional inputs:
* @li bias: An optional tensor of type float16, float32, int32 or int64.
* @li offset_w: An optional 1D tensor for quantized deconvolution. Type is int8. Reserved.\n
* @li bias: An optional tensor of type float16
* @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved.\n
*@par Attributes:
* Six attributes:
* @li strides: A tuple or list of 2 integers. The stride of the sliding window
* for H/W dimension. Defaults to [1, 1, 1, 1].
* for H/W dimension.
* @li pads: A tuple or list of 4 integers. The [top, bottom, left, right]
* padding on the feature map. Defaults to [0, 0, 0, 0].
* padding on the feature map
* @li dilations: A tuple or list of 4 integers. The dilation factor for each
* dimension of input. Must be [1, 1, 1, 1].
* @li groups: Number of blocked connections from input channels to
output channels. Defaults to "1".
* @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n
* output channels.
* @li data_format: An optional string from: "NCHW". Defaults to "NCHW".\n
Specify the data format of the input and output data.
* @li offset_x: An optional integer for quantized deconvolution. Defaults to "0".
* @li offset_x: An optional integer for quantized deconvolution.
*@par Outputs:
* y: A Tensor. Has the same type as "filter". 4D tensor with shape
* [batch, channels, height, width].
@@ -575,19 +577,17 @@ REG_OP(Conv2DBackpropFilterD)
*
* The input and output tensor attributes are listed as follows:
* @verbatim
|Tensor | x | filter | bias | offset_w | y
Tensor | x | filter | bias | offset_w | y
-----------|---------|---------|---------|----------|--------
|Data Type | float16 | float16 | float16 | _ | float16
| |---------|---------|---------|----------|--------
| | float32 | float32 | float32 | _ | float32
| |---------|---------|---------|----------|--------
| | float64 | float64 | float64 | _ | float64
| |---------|---------|---------|----------|--------
| | int8 | int8 | int32 | int8 | int32
Data Type | float16 | float16 | float16 | _ | float16
|---------|---------|---------|----------|--------
| float32 | float32 | float32 | _ | float32
|---------|---------|---------|----------|--------
| int8 | int8 | int32 | int8 | int32
-----------|---------|---------|---------|----------|--------
|Format | NCHW | NCHW | ND | ND | NCHW
| | NHWC | NHWC | | | NHWC
| | | HWCN | | |
Format | NCHW | NCHW | ND | ND | NCHW
| NHWC | NHWC | | | NHWC
| | HWCN | | |
@endverbatim
* It should be noted that the data types must correspond to each other, but the
* format does not need to.
@@ -602,10 +602,10 @@ REG_OP(Conv2DBackpropFilterD)
* for dilated convolution. Has the same dimension order and value as "strides".
* @li groups: Number of blocked connections from input channels to output
* channels. Input channels and output channels must both be divisible by
* "groups".Type is int32. Must be set to 1.
* @li offset_x: An optional integer for quantized convolution. Type is int32. Defaults to "0".
* "groups".
* @li offset_x: An optional integer for quantized convolution.
* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the
* data format of the input and output images. Type is string. Defaults to "NHWC". Reserved.
* data format of the input and output images. Reserved.

*@par Outputs:
* @li y: A 4D Tensor of output images.
@@ -613,23 +613,23 @@ REG_OP(Conv2DBackpropFilterD)
*@attention
* @li The parameter scope is listed as follows:
* @verbatim
|Name | Field | Scope
Name | Field | Scope
------------------|--------------|----------
|Input Image Size | H dimension | [1, 4096]
| | W dimension | [1, 4096]
Input Image Size | H dimension | [1, 4096]
| W dimension | [1, 4096]
------------------|--------------|----------
|Filter Size | H dimension | [1, 255]
| | W dimension | [1, 255]
Filter Size | H dimension | [1, 255]
| W dimension | [1, 255]
------------------|--------------|----------
|Stride Size | H dimension | [1, 63]
| | W dimension | [1, 63]
Stride Size | H dimension | [1, 63]
| W dimension | [1, 63]
------------------|--------------|----------
|Padding Size | top side | [0, 255]
| | bottom side | [0, 255]
| | left side | [0, 255]
| | right side | [0, 255]
Padding Size | top side | [0, 255]
| bottom side | [0, 255]
| left side | [0, 255]
| right side | [0, 255]
------------------|--------------|----------
|Dilation Size | H dimension | [1, 255]
Dilation Size | H dimension | [1, 255]
| W dimension | [1, 255]
@endverbatim

@@ -654,11 +654,11 @@ REG_OP(Conv2DBackpropFilterD)
*@li Compatible with the Caffe operator 2D "Convolution".
*/
REG_OP(Conv2D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -710,8 +710,8 @@ REG_OP(Conv3D)
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(strides, ListInt, {1, 1, 1, 1, 1})
.ATTR(pads, ListInt, {0, 0, 0, 0, 0, 0})
.ATTR(data_format, String, "NDHWC")
.ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
.OP_END_FACTORY_REG(Conv3D)
@@ -742,7 +742,7 @@ REG_OP(Conv3DBackpropInput)
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(pads, ListInt, {0, 0, 0, 0, 0, 0})
.ATTR(data_format, String, "NDHWC")
.ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
.OP_END_FACTORY_REG(Conv3DBackpropInput)
@@ -771,7 +771,7 @@ REG_OP(Conv3DBackpropInputD)
.OUTPUT(y, TensorType({DT_FLOAT16}))
.REQUIRED_ATTR(input_size, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(pads, ListInt, {0, 0, 0, 0, 0, 0})
.ATTR(data_format, String, "NDHWC")
.ATTR(dilations, ListInt, {1, 1, 1, 1, 1})
.OP_END_FACTORY_REG(Conv3DBackpropInputD)


+ 34
- 34
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -187,15 +187,14 @@ REG_OP(ROIAlignGrad)
*@li features: A 5HD Tensor of type float32 or float16.
*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located,
* "x0", "y0", "x1", and "y1".
*@li rois_n: An optional input of type int32, specifying the number of valid ROIs. This parameter is reserved.
*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved.

*@par Attributes:
*@li spatial_scale: A required attribute of type float32, specifying the scaling ratio of "features" to the original image.
*@li pooled_height: A required attribute of type int32, specifying the H dimension.
*@li pooled_width: A required attribute of type int32, specifying the W dimension.
*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0",
*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
*@li pooled_height: A required attribute of type int, specifying the H dimension.
*@li pooled_width: A required attribute of type int, specifying the W dimension.
*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0",
* the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2".
*@li roi_end_mode: An optional attribute of type int32. Defaults to "1".

*@par Outputs:
* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16. The axis N is the number of input ROIs. Axes H, W, and C are consistent
@@ -363,15 +362,15 @@ REG_OP(PSROIPooling)
*@li im_info: An ND tensor of type float16 or float32, specifying the Image information.
*@li actual_rois_num: An optional NCHW tensor of type int32, specifying the number of valid boxes per batch.
*@par Attributes:
*@li batch_rois: An optional int32, specifying the number of images to be predicted. Defaults to "1".
*@li batch_rois: An optional int32, specifying the number of images to be predicted.
*@li num_classes: An required int32, specifying the number of classes to be predicted. The value must be greater than 0.
*@li score_threshold: An required float32, specifying the threshold for box filtering. The value range is [0.0, 1.0].
*@li iou_threshold: An required float32, specifying the confidence threshold for box filtering, which is the output "obj" of operator Region. The value range is (0.0, 1.0).
*@par Outputs:
*@li box: A tensor of type float16 or float32 for proposal of actual output, with output shape [batch, numBoxes,8].
* 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024.
*@li box: An NCHW tensor of type float16 or float32, describing the information of each output box, including the coordinates, class, and confidence.
Proposal of actual output, with output shape [batch, numBoxes,8], 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024.
That is, take min (the maximum number of input boxes, 1024)
*@li actual_bbox_num: A tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes.
*@li actual_bbox_num: An NCHW tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes.

*@attention Constraints:\n
*@li totalnum < max_rois_num * batch_rois.
@@ -415,9 +414,9 @@ REG_OP(FSRDetectionOutput)
*@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
*@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output".
*@par Outputs:
*@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
*@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
* In output shape, 8 means (batchID, label(classID), score (class probability), xmin, ymin, xmax, ymax, null)
*@li out_boxnum: An NCHW tensor of type int32, specifying the number of output boxes.
*@li y: An NCHW tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box, including the coordinates,
* class, and confidence. In output shape, 8 means (batchID, label(classID), score (class probability), xmin, ymin, xmax, ymax, null)
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(SSDDetectionOutput)
@@ -448,10 +447,10 @@ REG_OP(SSDDetectionOutput)
*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false".
*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".
*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2".
*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2".
*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used.

*@par Outputs:
*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2], where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
@@ -502,10 +501,10 @@ and the actual image height and width.
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
*
*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
* the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*@li boxout: An NCHW tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box, including the coordinates, class,
and confidence. In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: An NCHW tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*
*@attention Constraints:\n
*@li This operator applies only to the YOLO v2 network.
@@ -562,10 +561,10 @@ and the actual image height and width.
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
*
*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
* the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*@li boxout: An NCHW tensor of type float16, describing the information of each output box, including the coordinates, class, and confidence.
With shape [batch,6,post_nms_topn], 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: An NCHW tensor of type int32, specifying the number of output boxes.
With shape [batch,8,1,1], means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*
*@attention Constraints:\n
*@li This operator applies only to the YOLO v2 network.
@@ -622,11 +621,11 @@ and the actual image height and width.
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
*
*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn], describing the information of each output box.
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes.
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*
*@li boxout: An NCHW tensor of type float16 or float32 with shape [batch,6,post_nms_topn], describing the information of each output box, including the coordinates, class, and confidence.
In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: An NCHW tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes.
The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*@attention Constraints:\n
*@li This operator applies only to the YOLO v3 network.
*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
@@ -689,11 +688,12 @@ and the actual image height and width.
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
*
*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn], describing the information of each output box.
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes.
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*@li boxout: An NCHW tensor of type float16, describing the information of each output box, including the coordinates, class, and confidence.
With shape [batch,6,post_nms_topn], 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: An NCHW tensor of type int32, specifying the number of output boxes.
With shape [batch,8,1,1], means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*

*@attention Constraints:\n
*@li This operator applies only to the YOLO v3 network.
*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.


+ 5
- 55
third_party/fwkacllib/inc/ops/nn_norm_ops.h View File

@@ -291,8 +291,8 @@ REG_OP(BinaryCrossEntropyGrad)
* double. Should be a Variable Tensor.

*@par Attributes:
*axes: A list of int. The dimension softmax would be performed on. Defaults
* to "[-1]".
*axes: A list of ints. The dimension softmax would be performed on. Defaults
* to "{-1}".

*@par Outputs:
*y: A Tensor. Has the same dimensionality and shape as the "x" with values in
@@ -632,7 +632,7 @@ REG_OP(DropOutDoMask)
* Three inputs, including:
*@li x: An ND tensor of type float16 or float32.
*@li scale: An ND tensor of type float16 or float32.
*@li bias: An optional ND tensor of type float16 or float32.
*@li bias: An ND tensor of type float16 or float32.

*@par Attributes:
*@li axis: An optional int32 used to compute the shape of scale and bias input from the online bottoms. Defaults to "1".
@@ -679,9 +679,9 @@ REG_OP(Scale)
* depth_radius = (local_size - 1) / 2. local_size is the number of channels to sum over (for ACROSS_CHANNELS)
* or the side length of the square region to sum over (for WITHIN_CHANNEL).
*@li bias: An optional float32. An offset, usually > 0 to avoid dividing by 0.
* Defaults to "1.0".
* Defaults to "1".
*@li alpha: An optional float32. A scaling factor, usually positive.
* Defaults to "1.0".
* Defaults to "1".
*@li beta: An optional float32. An exponent. Defaults to "0.75" for the caffe framework, Defaults to "0.5" for others.
*@li norm_region: An optional string. A mode option. "ACROSS_CHANNELS":0, "WITHIN_CHANNEL":1. Defaults to "ACROSS_CHANNELS".

@@ -836,56 +836,6 @@ REG_OP(GroupNorm)
.ATTR(num_groups, Int, 2)
.OP_END_FACTORY_REG(GroupNorm)

/**
*@brief Performs instance normalization.

*@par Inputs:\n
* Five inputs, including: (NC1HWC0, supported)
*@li x: A 5D Tensor of type float16 or float32, NC1HWC0.
*@li gamma: A Tensor of type float32.
A 5D Tensor for scaling factor, to scale the normalized x.
*@li beta: A Tensor of type float32.
A 5D Tensor for offset, to shift to the normalized x.
*@li mean: A Tensor of type float32.
A 5D Tensor Specifies the mean used for inference. Reserved.
*@li variance: A Tensor of type float32.
A 5D Tensor Specifies the variance used for inference. Reserved.

*@par Attributes:
*@li is_training: An optional bool, specifying if the operation is used for \n
training or inference. Defaults to "True".
*@li momentum: An optional float32, \n
the value used for the running_mean and running_var computation. Default: "0.1".
*@li epsilon: An optional float32, specifying the small value added to \n
variance to avoid dividing by zero. Defaults to "0.00001".

*@par Outputs:\n
* Three outputs, including: (NHWC, NCHW NC1HWC0 supported)
*@li y: A 5D tensor of type float16 or float32 for the normalized "x", \n
*@li batch_mean: A Tensor of type float32.
Specifies the mean of "x".
*@li batch_variance: A Tensor of type float32.
Specifies the variance of "x".

*@par Third-party framework compatibility
*@li Compatible with the PyTorch operator InstanceNorm.
*/
REG_OP(InstanceNormV2)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(beta, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))

.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))

.ATTR(is_training, Bool, true)
.ATTR(momentum, Float, 0.1)
.ATTR(epsilon, Float, 0.00001)
.OP_END_FACTORY_REG(InstanceNormV2)

} // namespace ge

#endif //GE_OP_NN_NORM_OPS_H

+ 12
- 310
third_party/fwkacllib/inc/ops/nn_pooling_ops.h View File

@@ -101,42 +101,6 @@ REG_OP(AvgPool)
.ATTR(data_format, String, "NHWC")
.OP_END_FACTORY_REG(AvgPool)

/**
*@brief Performs average pooling on the input.

*@par Inputs:
*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.

*@par Attributes:
*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
*@li pads: List of ints, implicit zero paddings on both sides of the input.
*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
*@li count_include_pad: When true, will include the zero-padding in the averaging calculation.
*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
*@li data_format: A string, format of input data.

*@par Outputs:
*y: The average pooled output tensor.

*@attention Constraints:
*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator AvgPool3D.
*/
REG_OP(AvgPool3D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(ceil_mode, Bool, false)
.ATTR(count_include_pad, Bool, true)
.ATTR(divisor_override, Int, 0)
.ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(AvgPool3D)

/**
*@brief Performs max_pool_ext2 on the input.

@@ -220,62 +184,17 @@ REG_OP(MaxPool)
.OP_END_FACTORY_REG(MaxPool)

REG_OP(MaxPool3D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
.INPUT(x, TensorType({DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT16}))
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(padding, String)
.ATTR(pads, ListInt, {0,0,0})
.ATTR(dilation, ListInt, {1,1,1})
.ATTR(dilation, ListInt, {0,0,0})
.ATTR(ceil_mode, Int, 0)
.ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(MaxPool3D)


/**
* @brief Computes second-order gradients of the maxpooling3d function.

* @par Inputs:
* @li orig_x: Original forward input tensor(NDC1HWC0) of type float16
* @li orig_y: Original forward output tensor(NDC1HWC0) of type float16
* @li grads: Gradient tensor(NDC1HWC0) of type float16
* @li assist: Assist tensor(NDC1HWC0) of type float16

* @par Attributes:
* @li ksize: A required list or tuple,
* specifying the size of the sliding window.
* @li strides: A required list or tuple,
* specifying the stride of the sliding window.
* @li pads: A required list or tuple
* @li padding: A required string, window sliding mode. Either SAME or VALID.
* @li data_format: An optional string.
* Format of the original input, either NCDHW or NDHWC. Defaults to NDHWC.

* @attention Constraints:
* @li Only the Ascend 910 platform is supported.
* @li "orig_x" and "grads" must have the same shape.
* @li "orig_y" and "y" must have the same shape. Otherwise, an error is reported.
* @li "orig_x", "orig_y", "grads", and "y" must be NDC1HWC0 tensors.

* @par Outputs:
* @li y: Result tensor of type float16

* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator MaxPool3DGradGrad.
*/

REG_OP(MaxPool3DGradGrad)
.INPUT(orig_x, TensorType::RealNumberType())
.INPUT(orig_y, TensorType::RealNumberType())
.INPUT(grads, TensorType::RealNumberType())
.OUTPUT(y, TensorType::RealNumberType())
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(MaxPool3DGradGrad)


/**
* @brief Computes gradients of the maxpooling function.

@@ -320,10 +239,9 @@ REG_OP(MaxPoolGrad)
* @brief Computes second-order gradients of the maxpooling function.

* @par Inputs:
* @li x1: Original forward input tensor. Supported type:float, double, int32,
* uint8, int16, int8, int64, uint16, half, uint32, uint64.
* @li x2: Has the same type and format as input "x1".
* @li grad:Has the same type and format as input "x1".
* @li x1: Original forward input tensor of type RealNumberType
* @li x2: Original forward output tensor of type RealNumberType
* @li grad: Gradient tensor of type RealNumberType

* @par Attributes:
* @li ksize: A required list or tuple,
@@ -344,7 +262,7 @@ REG_OP(MaxPoolGrad)
* @li Other dimensions of ksize and strides is 1.

* @par Outputs:
* @li y: Has the same type and format as input "x1".
* @li y: Result tensor of type RealNumberType

* @par Third-party framework compatibility
* @li Compatible with the TensorFlow operator MaxPoolGradGrad.
@@ -479,56 +397,19 @@ REG_OP(MaxPoolGradWithArgmax)
.REQUIRED_ATTR(padding, String)
.OP_END_FACTORY_REG(MaxPoolGradWithArgmax)

/**
*@brief Performs transform mask to argmax.

*@par Inputs:
* Two input:
*x: An NC1HWC0 Tensor of type float16.
*mask: An NC1HWC0 Tensor of type uint16.

*@par Attributes:
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
*@li padding: A required string. No default value.

*@par Outputs:
*argmax: An NC1HWC0 Tensor of type int32.

*@attention Constraints:
*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
*@li "padding" is either "SAME" or "VALID".

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Mask2Argmax.
*/
REG_OP(Mask2Argmax)
.INPUT(x, TensorType::RealNumberType())
.INPUT(mask, TensorType::IndexNumberType())
.OUTPUT(argmax, TensorType::IndexNumberType())
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(padding, String)
.REQUIRED_ATTR(originshape, ListInt)
.OP_END_FACTORY_REG(Mask2Argmax)

/**
* @brief Computes second-order gradients of the maxpooling function.

* @par Inputs:
* @li x: Original forward input tensor. Supported type: float, double, int32,
* uint8, int16, int8, int64, uint16, half, uint32, uint64.
* @li grad: Gradient tensor. Supported type: float, double, int32,
* uint8, int16, int8, int64, uint16, half, uint32, uint64.
* @li argmax: An tensor of type int32 or int64.
* @li x: Original forward input tensor of type RealNumberType
* @li grad: Gradient tensor of type RealNumberType
* @li argmax: An tensor of type IndexNumberType
* @par Attributes:
* @li ksize: A required list, specifying the size of the sliding window.
* @li strides: A required list, specifying the stride of the sliding window.
* @li padding: A required string, window sliding mode. Either SAME or VALID.
* @par Outputs:
* @li y:Result tensor. Supported type: float, double, int32,
* uint8, int16, int8, int64, uint16, half, uint32, uint64
* @li y:Result tensor of type RealNumberType

* @attention Constraints:
* @li Only the cloud platform is supported.
@@ -650,7 +531,7 @@ REG_OP(MaxPoolGradWithArgmaxCCE)
* one input, including:
*@li x: A tensor of type float16 or float32.
*@par Attributes:
*@li scale: A optional float32, scale factor of x. Defaults to "1.0".
*@li scale: A optional float, scale factor of x. Defaults to "1.0".
*@li stride_h: An optional int32, broadcast the axis of h. Defaults to "2".
*@li stride_w: An optional int32, broadcast the axis of w. Defaults to "2".
*@par Outputs:
@@ -868,186 +749,7 @@ REG_OP(DataFormatVecPermute)
.ATTR(dst_format, String, "NCHW")
.OP_END_FACTORY_REG(DataFormatVecPermute)

/**
* @brief Computes gradients of the MaxPool3D function.

* @par Inputs:
* @li orig_x: A mutable NDC1HWC0 tensor of type float16.
* @li orig_y: A mutable NDC1HWC0 tensor of type float16.
* @li grads: A mutable NDC1HWC0 tensor of type float16.

* @par Attributes:
* @li ksize: A required tuple or list, specifying the size of the window for
* each dimension of the input tensor.
* @li strides: A required tuple or list, specifying the stride of the sliding
* window for each dimension of the input tensor.
* @li pads: A list of 6 ints. Supports only padding along the D,
* H and W dimensions in sequence of head, tail, top, bottom, left and right.
* to use.
* @li data_format: An optional string, Specify the data format of the input and
* output data. With the default format "NDHWC".

* @par Outputs:
* y: A mutable tensor. Has the same shape as "orig_x", but type is float32.

* @par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPool3DGrad.
*/
REG_OP(MaxPool3DGrad)
.INPUT(orig_x, TensorType::RealNumberType())
.INPUT(orig_y, TensorType::RealNumberType())
.INPUT(grads, TensorType::RealNumberType())
.OUTPUT(y, TensorType::RealNumberType())
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(data_format, String, "NDHWC")
.OP_END_FACTORY_REG(MaxPool3DGrad)

/**
*@brief Performs AvgPool1D on the input.

*@par Inputs:
*x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64.

*@par Attributes:
*@li ksize: An required int, specifying the size of the window.
*@li strides: An required int.
*@li pads: A required tuple or list.
*@li ceil_mode: An optional bool. Defaults to False.
*@li count_include_pad: An optional bool. Defaults to False.

*@par Outputs:
*y: A Tensor. Has the same type as x.

*@par Third-party framework compatibility
*@li compatible with pytorch AvgPool1D operator.
*/
REG_OP(AvgPool1D)
.INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(ksize, Int)
.REQUIRED_ATTR(strides, Int)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(ceil_mode, Bool, false)
.ATTR(count_include_pad, Bool, false)
.OP_END_FACTORY_REG(AvgPool1D)

/**
*@brief Performs AvgPool1D on the input.

*@par Inputs:
*x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64.

*@par Attributes:
*@li ksize: An required int, specifying the size of the window.
*@li strides: An required int.
*@li pads: A required tuple or list.
*@li ceil_mode: An optional bool. Defaults to False.
*@li count_include_pad: An optional bool. Defaults to False.

*@par Outputs:
*y: A Tensor. Has the same type as x.

*@par Third-party framework compatibility
*@li compatible with pytorch AvgPool1D operator.
*/
REG_OP(AvgPool1DD)
.INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.INPUT(assist_matrix, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
.REQUIRED_ATTR(ksize, Int)
.REQUIRED_ATTR(strides, Int)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(ceil_mode, Bool, false)
.ATTR(count_include_pad, Bool, false)
.OP_END_FACTORY_REG(AvgPool1DD)
/**
*@brief Performs max pooling on the input and outputs both max values and indices.

*@par Inputs:
* One input:
*x: An NC1HWC0 Tensor of type float16.
*@par Attributes:
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
* each dimension of the input tensor. No default value.
*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
* each dimension of the input tensor. No default value.
*@li pads: A required string. No default value.
*@li dtype: A optional int. default value is 3.
*@li dilation: A optional list of int8, int16, int32, or int64 values.
*@li ceil_mode: A optional bool. default value is false.

*@par Outputs:
*y: A Tensor. Has the same type and format as input "x".
*argmax: A Tensor. type:uint16, format:NC1HWC0.
*@attention Constraints:
*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
*@li "strides is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
* strides[2] <= 63, strides[2] >= 1.
*@li "dilation" is a list that has length 4.
*@li "ceil_mode" is a bool, default is false.

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolWithArgmax.
*/
REG_OP(MaxPoolWithArgmaxV2)
.INPUT(x, TensorType({DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT16}))
.OUTPUT(argmax, TensorType({DT_UINT16}))
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(dtype, Int, 3)
.ATTR(dilation, ListInt, {1, 1, 1, 1})
.ATTR(ceil_mode, Bool, false)
.OP_END_FACTORY_REG(MaxPoolWithArgmaxV2)

/**
*@brief Performs the backpropagation of MaxPoolWithArgmaxV2.

*@par Inputs:
* Three inputs, including:
*@li x: An NC1HWC0 tensor of type float16.
*@li grad: An NC1HWC0 tensor of type float16.
*@li argmx: An NC1HWC0 tensor of type uint16 or int64.

*@par Attributes:
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
* each dimension of the input tensor. No default value.
*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
* each dimension of the input tensor. No default value.
*@li pads: A required string. No default value.
*@li dtype: A optional int. default value is 3.
*@li dilation: A optional list of int8, int16, int32, or int64 values.
*@li ceil_mode: A optional bool. default value is false.

*@par Outputs:
*y: A Tensor. Has the same type and format as input "x".

*@attention Constraints:
*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
*@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
*@li "dilation" is a list that has length 4.
*@li "ceil_mode" is a bool, default is false.

*@see max_pool_grad_with_argmaxv2
*@par Third-party framework compatibility
* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV2.
*/

REG_OP(MaxPoolGradWithArgmaxV2)
.INPUT(x, TensorType({DT_FLOAT16}))
.INPUT(grad, TensorType({DT_FLOAT16}))
.INPUT(argmax, TensorType({DT_UINT16}))
.OUTPUT(y, TensorType({DT_FLOAT16}))
.REQUIRED_ATTR(ksize, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(dtype, Int, 3)
.ATTR(dilation, ListInt, {1,1,1,1})
.ATTR(ceil_mode, Bool, false)
.OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV2)
} // namespace ge

#endif // GE_OP_NN_POOLING_OPS_H

+ 1
- 1
third_party/fwkacllib/inc/ops/nn_training_ops.h View File

@@ -1508,7 +1508,7 @@ REG_OP(ApplyProximalAdagradD)
*@par Attributes:
*use_locking: An optional bool. Defaults to "False".\n
* If "True", updating of the var and accum tensors will be protected by a lock; \n
* If "False", the behavior is undefined, but may exhibit less contention.
* If "False", the behavior is undefined, but may exhibit less contention.

*@par Outputs:
*var: A mutable Tensor. Has the same type as "var".


+ 4
- 4
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -83,7 +83,7 @@ REG_OP(TanhGrad)

*@par Inputs:
*One input:
*x: A Tensor. Must be one of the following types: float16, float32, complex64, complex128, double.
*x: A Tensor. Must be one of the following types: float16, float32, complex64, complex128, int32, int64

*@par Outputs:
*y: A Tensor. Has the same type as "x".
@@ -184,7 +184,7 @@ REG_OP(Relu6Grad)
* @brief Compute sigmoid of "x" element-wise.

* @par Inputs:
* A Tensor of type complex64, complex128, float16, float32 or double.
* A Tensor of type UnaryDataType.

* @par Outputs:
* A Tensor. Has the same type as "x".
@@ -220,7 +220,7 @@ REG_OP(SigmoidGrad)
*if x>0, x+log(1+exp(-x)); otherwise log(1+exp(x)).

*@par Inputs:
*x: A Tensor of type double, float16 or float32.
*x: A Tensor of type float16 or float32.

*@par Outputs:
*y: A tensor. Has the same type and format as input "x".
@@ -442,7 +442,7 @@ REG_OP(PReluGrad)
*x: A float16, float32 or double, for the input data type.

*@par Attributes:
*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0".
*alpha: A float. Defines at which negative value the ELU saturates. Defaults to "1.0".

*@par Outputs:
*y: A float16, float32 or double, for the normalized result.


+ 2
- 185
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -673,7 +673,7 @@ REG_OP(ReduceAnyD)

*@par Attributes:
*@li operation: An optional int32 from 1(SUM), 2(ASUM), 3(SUMSQ), and 4(MEAN),
*specifying the reduction algorithm. Defaults to "1".
*specifying the reduction algorithm. Defaults to 1.
*@li axis: An optional int32, specifying the first axis to reduce. Defaults to "0".
*The value range is [-N, N-1], where N is the input tensor rank.
*@li coeff: An optional float32, specifying the scale coefficient. Defaults to "1.0".
@@ -745,190 +745,7 @@ REG_OP(EuclideanNormD)
.ATTR(keep_dims, Bool, false)
.OP_END_FACTORY_REG(EuclideanNormD)



/**
*@brief Performs instance normalization for inference.

*@par Inputs:\n
* Five inputs, including: (NC1HWC0 supported)
*@li x: A Tensor of type float16 or float32.
*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
*@li mean: A [N, C1, 1, 1, C0] ensor of type float32, for the mean.
*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.

*@par Attributes:
*epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero.
Defaults to "0.00001".

*@par Outputs:\n
*y: A Tensor of type float16 or float32 for the normalized "x".
*batch_mean: A Tensor of type float32 for the result mean.
*batch_ variance: A Tensor of type float32 for the result variance.

*@attention Constraints:
*For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
*/
REG_OP(INInferV2)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(beta, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.00001)
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(INInferV2)

/**
*@brief Performs reduced instance normalization.

*@par Inputs:\n
*x: A Tensor of type float16 or float32, with format NC1HWC0.

*@par Outputs:
*@li sum: A Tensor of type float32 for SUM reduced "x".
*@li square_sum: A Tensor of type float32 for SUMSQ reduced "x".

*@attention Constraints:\n
* This operator is a InstanceNorm fusion operator for updating the moving averages for training. \n
* This operator is used in conjunction with INTrainingUpdateV2.
*/
REG_OP(INTrainingReduceV2)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(sum, TensorType({DT_FLOAT}))
.OUTPUT(square_sum, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(INTrainingReduceV2)


/**
*@brief Performs update instance normalization.

*@par Inputs:\n
* Seven inputs, including: (NC1HWC0supported)
*@li x: A Tensor of type float16 or float32.
*@li sum: A T [N, C1, 1, 1, C0] ensor of type float32 for the output of operator INTrainingReduceV2.
*@li square_sum: A [N, C1, 1, 1, C0] Tensor of type float32 for the output of operator INTrainingReduceV2.
*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
*@li mean: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated mean.
*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the updated variance.

*@par Attributes:
*@li momentum: A required float32, specifying the momentum to update mean and var.
*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero.

*@par Outputs:\n
* Three outputs, including: (NC1HWC0 supported)
*@li y: A Tensor of type float16 or float32, for normalized "x".
*@li batch_mean: A Tensor of type float32, for the updated mean.
*@li batch_variance: A Tensor of type float32, for the updated variance.

*@attention Constraints:
*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. \n
* This operator is used in conjunction with INTrainingReduceV2.
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
*/
REG_OP(INTrainingUpdateV2)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(sum, TensorType({DT_FLOAT}))
.INPUT(square_sum, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(beta, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
.ATTR(momentum, Float, 0.1)
.ATTR(epsilon, Float, 0.00001)
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(INTrainingUpdateV2)


/**
*@brief Performs reduced group normalization.

*@par Inputs:\n
*x: A Tensor of type float16 or float32, with format NCHW NHWC.

*@par Outputs:
*@li sum: A Tensor of type float32 for SUM reduced "x".
*@li square_sum: A Tensor of type float32 for SUMSQ reduced "x".


*@par Attributes:
*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate.

*@attention Constraints:\n
* This operator is a GroupNorm fusion operator for updating the moving averages for training. \n
* This operator is used in conjunction with GNTrainingUpdate.
*/
REG_OP(GNTrainingReduce)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(sum, TensorType({DT_FLOAT}))
.OUTPUT(square_sum, TensorType({DT_FLOAT}))
.ATTR(num_groups, Int, 2)
.OP_END_FACTORY_REG(GNTrainingReduce)


/**
*@brief Performs update group normalization.

*@par Inputs:\n
* Eight inputs, including: (NCHW NHWC supported)
*@li x: A Tensor of type float16 or float32.
*@li sum: A 5D Tensor of type float32,
shape is [N, G, D, 1, 1] for NCHW, [N, 1, 1, G, D] for NHWC
for the output of operator GNTrainingReduce.
*@li square_sum: A 5D Tensor of type float32,
shape is [N, G, D, 1, 1] for NCHW, [N, 1, 1, G, D] for NHWC
for the output of operator GNTrainingReduce.
*@li scale: A 5D Tensor of type float32,
shape is [1, G, D, 1, 1] for NCHW, [1, 1, 1, G, D] for NHWC
is for the scaling gamma.
*@li offset: A 5D Tensor of type float32,
shape is [1, G, D, 1, 1] for NCHW, [1, 1, 1, G, D] for NHWC
for the scaling beta.
*@li mean: A 5D Tensor of type float32,
shape is [N, G, D, 1, 1] for NCHW, [N, 1, 1, G, D] for NHWC
for the updated mean.
*@li variance: A 5D Tensor of type float32,
shape is [N, G, D, 1, 1] for NCHW, [N, 1, 1, G, D] for NHWC
for the updated variance.


*@par Attributes:
*@li epsilon: A float32, specifying the small value added to variance to avoid dividing by zero.
*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingReduce

*@par Outputs:\n
* Three outputs, including: (NC1HWC0 supported)
*@li y: A Tensor of type float16 or float32, for normalized "x".
*@li batch_mean: A Tensor of type float32, for the updated mean.
*@li batch_variance: A Tensor of type float32, for the updated variance.

*@attention Constraints:
*@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. \n
* This operator is used in conjunction with GNTrainingUpdate.
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
*/
REG_OP(GNTrainingUpdate)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(sum, TensorType({DT_FLOAT}))
.INPUT(square_sum, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(scale, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(offset, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
.ATTR(num_groups, Int, 2)
.ATTR(epsilon, Float, 0.0001)
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(GNTrainingUpdate)

} //namespace ge


#endif /* GE_OP_REDUCE_OPS_H */

+ 1
- 8
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -67,13 +67,6 @@ REG_OP(BasicLSTMCell)
.ATTR(activation, String, "tanh")
.OP_END_FACTORY_REG(BasicLSTMCell)

REG_OP(DynamicLSTM)
.INPUT(x, TensorType({DT_FLOAT32}))
.INPUT(w, TensorType({DT_FLOAT32}))
.INPUT(b, TensorType({DT_FLOAT32}))
.OUTPUT(output_h, TensorType({DT_FLOAT32}))
.OP_END_FACTORY_REG(DynamicLSTM)

/**
*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state.
*@par Inputs:
@@ -94,7 +87,7 @@ REG_OP(BasicLSTMCellInputGrad)
.INPUT(dgate, TensorType({DT_FLOAT16}))
.INPUT(w, TensorType({DT_FLOAT16}))
.OPTIONAL_INPUT(dropout_mask, TensorType({DT_UINT8}))
.OUTPUT(dxt, TensorType({DT_FLOAT16, DT_FLOAT32}))
.OUTPUT(dxt, TensorType({DT_FLOAT16}))
.OUTPUT(dht, TensorType({DT_FLOAT16, DT_FLOAT32}))
.ATTR(keep_prob, Float, 1.0)
.OP_END_FACTORY_REG(BasicLSTMCellInputGrad)


+ 5
- 6
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -89,8 +89,7 @@ REG_OP(RangeD)

*@par Inputs:
*Two inputs, including:
* @li x: A Tensor.
* Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
* @li x: A Tensor of type TensorType::BasicType().
* @li multiples: A 1D Tensor of type int32 or int64.
* The length must be the same as the number of dimensions in "input"

@@ -497,7 +496,7 @@ REG_OP(UnsortedSegmentSumD)
*@par Inputs:
* Two inputs, including:\n
*@li x: An ND Tensor (up to 8D). \n
*Must be one of the following types: int8, uint8, int16, uint16, int32, int64, bool, float16, float32, double, complex64, complex128, string.
*Must be one of the following types: int8, uint8, int16, uint16, int32, int64, bool, float32, double
*@li axis: A 1D Tensor.\n
*Must be one of the following types: int32, int64

@@ -1560,14 +1559,14 @@ REG_OP(ProposalD)
* If reverse=false: (N, H, W, C)->(N, H/stride, W/stride, C*(stride*stride))

*@par Inputs:
*x: An (N, H, W, C) tensor. Type is float16, float32, int8, uint8, int16, uint16, int32, uint32, int64 or uint64..
*x: An (N, H, W, C) tensor. All types except double are supported.

*@par Attributes:
*@li stride: An optional int32, specifying the plane or channel scaling factor. Defaults to "2".
*@li reverse: An optional bool, specifying the conversion mode. If "true", depth to space conversion is performed. If "false", space to depth conversion is performed. Defaults to "false".

*@par Outputs:
*y: An (N, H, W, C) tensor. Has same type as "x".
*y: An (N, H, W, C) tensor. All types except double are supported.

*@attention Constraints:
*@li If reverse=true: C/(stride*stride) yields an integer result. If reverse=false: W/stride and H/stride yield integer results.
@@ -1594,7 +1593,7 @@ REG_OP(PassThrough)
* @li x: A required Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32,int64, uint64.
* @li size: A required Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64.
*@par Attributes:
*@li axis: A required int32, specifying the first dimension to crop. Defaults to "2".
*@li axis: A required int32, specifying the first dimension to crop.
*@li offset: A required array, specifying the shift for all/each dimension to align the cropped bottom with the reference bottom. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64.
*@par Outputs:
*y: A required Tensor. Has the same type and shape as "size".


+ 3
- 6
third_party/fwkacllib/inc/ops/split_combination_ops.h View File

@@ -25,11 +25,11 @@ namespace ge {
*@par Inputs:
* Two inputs, including:
*@li x: An ND Tensor.
*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split.

*@par Attributes:
*num_split: A required int32. Specifies the number of output tensors. No default value.
*num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value.

*@par Outputs:
*y: Dynamic output.A list of output tensors. Has the same type and format as "x".
@@ -186,7 +186,6 @@ REG_OP(ParallelConcat)

*@par Attributes:
*concat_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to concatenate. No default value.
*N: An attribute int8, int16, int32, or int64. Specifies the number of elements in "x". Defaults to "1".

*@par Outputs:
*y: A Tensor. Has the same type and format as "x".
@@ -268,9 +267,7 @@ REG_OP(ConcatD)
*@par Inputs:
* Two inputs, including:
*@li x: Dynamic input.An NC1HWC0 or ND Tensor.
*Must be one of the following types: float16, float32, double, int32,
* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16,
* complex128, uint32, uint64, qint16, quint16.
*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
*@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate.

*@par Attributes:


+ 5
- 19
third_party/fwkacllib/inc/ops/transformation_ops.h View File

@@ -94,13 +94,6 @@ REG_OP(Transpose)
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(Transpose)

REG_OP(TransData)
.INPUT(src, TensorType::BasicType())
.OUTPUT(dst, TensorType::BasicType())
.REQUIRED_ATTR(src_format, String)
.REQUIRED_ATTR(dst_format, String)
.OP_END_FACTORY_REG(TransData)

/**
*@brief Permutes the dimensions according to order.\n
The returned tensor's dimension i will correspond to the input dimension order[i].
@@ -109,7 +102,7 @@ REG_OP(TransData)
*x: A Tensor. Must be one of the following types: float16, float32.

*@par Attributes:
*order: A permutation of the dimensions of "x".Type is int32.support any axis transformation.Defaults to "{0}"
*order: A permutation of the dimensions of "x".support any axis transformation

*@par Outputs:
*y: A Tensor. Has the same type as "x".
@@ -298,7 +291,7 @@ REG_OP(DepthToSpace)
*@brief Permutes data into spatial data blocks and then prunes them.

*@par Inputs:
*@li x: A 4D Tensor with format NHWC.
*@li x: A 4D Tensor with format NC1HWC0.
*@li crops: A 1D list or tuple of int32 or int64.

*Must be one of the following types: float16, float32
@@ -307,7 +300,7 @@ REG_OP(DepthToSpace)
*block_size: A required int8, int16, int32, or int64. No default value.

*@par Outputs:
*y: A 4D Tensor with format NHWC,
*y: A 4D Tensor with format NC1HWC0,

* of type float16 or float32.

@@ -372,7 +365,7 @@ REG_OP(BatchToSpaceD)

*@par Inputs:
* Two inputs, including:
*@li x: An NHWC Tensor. Must be one of the following types:
*@li x: An NC1HWC0 Tensor. Must be one of the following types:
* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
*@li paddings: A 2D tensor of type int, specifying the input.
@@ -396,7 +389,7 @@ REG_OP(SpaceToBatch)
*@brief Outputs a copy of the input tensor where values from the "height" and "width" dimensions are padded and rearranged to the "batch" dimension.

*@par Inputs:
*x: An NHWC Tensor. Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
*x: An NC1HWC0 Tensor. Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.


*@par Attributes:
@@ -605,13 +598,6 @@ REG_OP(Compress)
.OUTPUT(compress_index, TensorType({DT_INT8}))
.REQUIRED_ATTR(compress_parameters, ListInt)
.OP_END_FACTORY_REG(Compress)

REG_OP(CompressFcOp)
.INPUT(weight, TensorType({DT_INT8}))
.OUTPUT(weight_compress, TensorType({DT_INT8}))
.OUTPUT(compress_index, TensorType({DT_INT8}))
.REQUIRED_ATTR(compress_parameters, ListInt)
.OP_END_FACTORY_REG(CompressFcOp)
} // namespace ge

#endif // GE_OP_TRANSFORMATION_OPS_H

Loading…
Cancel
Save