run ge head

4 years ago · 2089d50d14
--- a/inc/external/acl/acl_mdl.h
+++ b/inc/external/acl/acl_mdl.h
@@ -43,6 +43,7 @@ extern "C" {
 #define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
 #define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
 #define ACL_ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES "_datadump_original_op_names"
 typedef struct aclmdlDataset aclmdlDataset;
 typedef struct aclmdlDesc aclmdlDesc;
@@ -636,6 +637,18 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetOutputDims(const aclmdlDesc *modelDesc, si
 */
 ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
 /**
 * @ingroup AscendCL
 * @brief get attr value by op name
 *
 * @param modelDesc [IN]   model description
 * @param opName [IN]      op name
 * @param attr [IN]        attr name
 *
 * @retval the attr value
 */
 ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr);
 /**
 * @ingroup AscendCL
 * @brief get input name by index
--- a/inc/external/hccl/hccl.h
+++ b/inc/external/hccl/hccl.h
@@ -118,7 +118,23 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC
 */
 extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm,
                                aclrtStream stream);
 /**
 * @brief Get the rank size of this comm.
 *
 * @param comm A pointer identifying the communication resource based on.
 * @param rankSize  A pointer identifying the rank size.
 * @return HcclResult
 */
 extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize);
 /**
 * @brief Get the rank id of this comm.
 *
 * @param comm A pointer identifying the communication resource based on.
 * @param rankSize  A pointer identifying the rank id.
 * @return HcclResult
 */
 extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
 /**
 * @brief Barrier operator.
 *
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 7cb171b9c511fec57ccc0ad746ef2126267fe18b
 Subproject commit 7cbdf95765133b5a5b979c2231013f7c76c3d529
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -1202,6 +1202,29 @@ REG_OP(Expand)
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .OP_END_FACTORY_REG(Expand)
 /**
 *@Returns a tensor containing the indices of all non-zero elements of input. \n
 *@par Inputs:
 *@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
 *@par Attributes:
 * transpose: the output tensor will be transposed if true. \n
 *@par Outputs:
 * y: A Tensor. Has the same type as "x" . \n
 *@par Third-party framework compatibility
 *Compatible with the PyTorch operator NonZero.
 */
 REG_OP(NonZero)
    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
              DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
    .OUTPUT(y, TensorType({DT_INT64}))
    .ATTR(transpose, Bool, false)
    .OP_END_FACTORY_REG(NonZero)
 /**
 * @brief Expand the input tensor to a compatible shape. \n
--- a/third_party/fwkacllib/inc/ops/batch_ops.h
+++ b/third_party/fwkacllib/inc/ops/batch_ops.h
@@ -64,10 +64,10 @@ the same types as "x_tensors" .  It's a dynamic output.  \n
 REG_OP(Batch)
  .DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \
      DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
  .OUTPUT(y_index, TensorType({ DT_INT64 }))
  .OUTPUT(y_id, TensorType({ DT_INT64 }))
  .DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \
      DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL}))
  .OUTPUT(y_index, TensorType({ DT_INT64 }))
  .OUTPUT(y_id, TensorType({ DT_INT64 }))
  .REQUIRED_ATTR(num_batch_threads, Int)
  .REQUIRED_ATTR(max_batch_size, Int)
  .ATTR(max_enqueued_batches, Int, 10)
--- a/third_party/fwkacllib/inc/ops/correlation.h
+++ b/third_party/fwkacllib/inc/ops/correlation.h
@@ -0,0 +1,52 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /*!
 * \file correlation.h
 * \brief
 */
 #ifndef GE_OP_CORRELATION_OPS_H
 #define GE_OP_CORRELATION_OPS_H
 #include "graph/operator_reg.h"
 namespace ge {
 /**
 *@brief Computes a 2D Correlation given 4D "x" and "filter" tensors.
 *
 *@par Inputs:
 * @li filter: A 4D tensor of filters.
 * @li x: A 4D tensor of input images, batch number must equal to batch
 * number of "filter", and channel must equal to channel of "filter".
 *
 *@par Attributes:
 * @li groups: set correlation mode, must be 1 or channel.
 *
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
 *@par Third-party framework compatibility
 * Compatible with caffe correlation custom operator.
 */
 REG_OP(Correlation)
    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
    .ATTR(groups, Int, 1)
    .OP_END_FACTORY_REG(Correlation)
 }  // namespace ge
 #endif  // GE_OP_NN_CALCULATION_OPS_H
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -3467,25 +3467,6 @@ REG_OP(AxpyV2)
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OP_END_FACTORY_REG(AxpyV2)
 /**
 * @brief Computes the result of x1 + x2.
 * @par Inputs:
 * @li x1: An ND tensor of type float16, float, int32.
 * @li x2: An ND tensor of type float16, float, int32. \n
 * @par Outputs:
 * @li y: An ND tensor tensor with the same type as "x1". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Add.
 */
 REG_OP(PtAdd)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OP_END_FACTORY_REG(PtAdd)
 /**
 * @brief Computes the result of x1 * x2.
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -1483,6 +1483,55 @@ REG_OP(CombinedNonMaxSuppression)
    .ATTR(clip_boxes, Bool, true)
    .OP_END_FACTORY_REG(CombinedNonMaxSuppression)
 /**
 *@brief Resizes "images" with "offset" using bilinear interpolation. \n
 *@par Inputs:
 *@li img: input image, A 4-D tensor of shape `[n, h, w, c]`.
 *@li warp_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for offset point.
 *@par Outputs:
 *warp_img: A Tensor after resize. \n
 */
 REG_OP(IMGWarp)
    .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
    .INPUT(warp_offset, TensorType({DT_FLOAT32}))
    .OUTPUT(warp_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
    .OP_END_FACTORY_REG(IMGWarp)
 /**
 *@brief Resizes "images" with "offset" using bilinear interpolation. \n
 *@par Inputs:
 *@li img: input image, A 4-D tensor of shape `[n, h, w, c]`.
 *@li map_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for resize point.
 *@par Outputs:
 *map_img: A Tensor after resize. \n
 */
 REG_OP(Remap)
    .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
    .INPUT(map_offset, TensorType({DT_FLOAT32}))
    .OUTPUT(map_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
    .OP_END_FACTORY_REG(Remap)
 /**
 *@brief Resizes "images" with "offset" using bilinear interpolation. \n
 *@par Inputs:
 *@li img: input image, A 5-D tensor of shape `[n, 4, c, h, w]`,
 and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left),  (h_bottom, w_right)].
 *@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point.
 *@par Outputs:
 *remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
 */
 REG_OP(IMGWarpResize)
    .INPUT(img, TensorType({DT_FLOAT32}))
    .INPUT(warp_index, TensorType({DT_FLOAT32}))
    .OUTPUT(warp_img, TensorType({DT_FLOAT32}))
    .OP_END_FACTORY_REG(IMGWarpResize)
 /**
 *@brief Function spatial transformer . \n
@@ -1802,5 +1851,22 @@ REG_OP(ImageUnfold)
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(padding_mode, String, "zeros")
    .OP_END_FACTORY_REG(ImageUnfold)
 /**
 *@brief This operation select images to warp_images according to offsets.
 *@par Inputs:
 *@li images: 4-D Tensor with shape `[batch, height, width, 3]`.
 *@li offsets: 4-D Tensor with shape `[batch, 4, new_height, new_width]`.
 *@par Outputs:
 *warp_images: Returns 5-D Tensor with shape
 `[batch, 4, new_height, new_width, 3]` and the same dtype as `images`.
 */
 REG_OP(IMGWarpOffsets)
    .INPUT(images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT}))
    .INPUT(offsets, TensorType({DT_FLOAT, DT_INT32}))
    .OUTPUT(warp_images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT}))
    .OP_END_FACTORY_REG(IMGWarpOffsets)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -179,7 +179,7 @@ REG_OP(GEMM)
 *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
 *@par Inputs:
 *Three inputs, including:
 *Two inputs, including:
 * @li x1: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
 * @li x2: A matrix Tensor. Must be one of the following types: float16,
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1460,8 +1460,6 @@ REG_OP(DecodeBboxV2)
 * @li y1: A Tensor. Must have the same type as x.
 * @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
 *
 *@attention Constraints:
 * The upper limit of data on the direction axis is 7040.
 */
 REG_OP(Sort)
    .INPUT(x, TensorType({ DT_FLOAT16 }))
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -55,7 +55,9 @@ REG_OP(LogSoftmaxGrad)
 *Two inputs, including:
 * @li features: A Tensor. Must be one of the following types: half, float32, double.
 *    A "batch_size * num_classes" matrix.
 * @li labels: A Tensor of the same type as "features". batch_size vector with values in [0, num_classes).
 * @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'.
 *             batch_size vector with values in [0, num_classes).
 *             This is the label for the given minibatch entry.
 *@par Outputs:
@@ -638,6 +640,48 @@ REG_OP(LayerNormXBackprop)
    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OP_END_FACTORY_REG(LayerNormXBackprop)
 /**
 *@brief LayerNormXBackpropV2 operator interface implementation
 *  calculating: dy, x, variance, mean, gamma
 *  pd_xl = data_dy*data_gamma
 *  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
 *           np.power((data_variance + EPSLON), (-1.5))),
 *           reduce_axis, keepdims=True)
 *  pd_mean = np.sum(((-1.0)*pd_xl
 *            np.power((data_variance + EPSLON), (-0.5))),
 *            reduce_axis, keepdims=True)
 *            + pd_var*(1.0/m)
 *            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
 *  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
 *         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
 *  res_for_gamma = (data_x - data_mean) * np.power((data_variance + EPSLON), (-0.5))
 *@par Inputs:
 *Five inputs, including:
 * @li dy: A Tensor. Must be one of the following types: float16, float32.
 * @li x: A Tensor. Must be one of the following types: float16, float32.
 * @li variance: A Tensor. Must be one of the following types: float16, float32.
 * @li mean: A Tensor. Must be one of the following types: float16, float32.
 * @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
 *@par Outputs:
 *Three outputs, including:
 * @li pd_x: A Tensor. Must be one of the following types: float16, float32.
 * @li res_for_gamma: A Tensor. Must be one of the following types: float32.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LayerNormXBackpropV2)
    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(LayerNormXBackpropV2)
 /**
 *@brief LayerNormBetaGammaBackprop operator interface implementation
 *  calculating: dy, x, variance, mean
@@ -681,6 +725,35 @@ REG_OP(LayerNormBetaGammaBackprop)
    .REQUIRED_ATTR(shape_gamma, ListInt)
    .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop)
 /**
 *@brief LayerNormBetaGammaBackpropV2 operator interface implementation
 *  calculating: dy, x, variance, mean
 *  pd_gamma = np.sum((data_dy*res_for_gamma), param_axis, keepdims=True)
 *  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
 *@par Inputs:
 *Three inputs, including:
 * @li dy: A Tensor. Must be one of the following types: float16, float32.
 * @li x: A Tensor. Must be one of the following types: float16, float32.
 * @li variance: A Tensor. Must be one of the following types: float16, float32.
 * @li mean: A Tensor. Must be one of the following types: float16, float32 . \n
 *@par Outputs:
 *Three outputs, including:
 * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
 * @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LayerNormBetaGammaBackpropV2)
    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(res_for_gamma, TensorType({DT_FLOAT}))
    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
    .REQUIRED_ATTR(shape_gamma, ListInt)
    .OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2)
 /**
 *@brief Return "output" according to the algorithm of dropout_do_mask:
 *  scale_x = x *(1 / keep_prob)
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -536,13 +536,19 @@ REG_OP(Elu)
 *       max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n
 *@par Inputs:
 *x: A float16, float32 or double, for the input data type . \n
 *x: A float16, float32, for the input data type . \n
 *@par Attributes:
 *alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
 *alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
 *@par Attributes:
 *alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
 *@par Attributes:
 *alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
 *@par Outputs:
 *y: A float16, float32 or double, for the normalized result . \n
 *y: A float16, float32, for the normalized result . \n
 *@attention Constraints:
 *@li The input is of type float16 or float32 . \n
@@ -553,9 +559,11 @@ REG_OP(Elu)
 *@li Compatible with ONNX's Celu operator
 */
 REG_OP(Celu)
    .INPUT(x, TensorType::FloatingDataType())
    .OUTPUT(y, TensorType::FloatingDataType())
    .ATTR(alpha, Float, 1.0)
    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
    .ATTR(alpha1, Float, 1.0)
    .ATTR(alpha2, Float, 1.0)
    .ATTR(alpha3, Float, 1.0)
    .OP_END_FACTORY_REG(Celu)
 /**
@@ -690,6 +698,25 @@ REG_OP(Mish)
    .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 }))
    .OP_END_FACTORY_REG(Mish)
 /**
 * @brief: pytorch mish_grad operator.
 * @par Inputs:
 * three input, including:
 * @li grad: A Tensor. shape, datatype and format is same as x
 * @li x: A Tensor. Must be one of the following types: float16, float32
 * @li tanhx: A Tensor. shape, datatype and format is same as x
 * @par Outputs:
 * 1 output, including:
 * @li x_grad: A Tensor. shape, datatype and format is same as x
 */
 REG_OP(MishGrad)
    .INPUT(grad, TensorType({ DT_FLOAT,DT_FLOAT16 }))
    .INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 }))
    .OPTIONAL_INPUT(tanhx, TensorType({ DT_FLOAT,DT_FLOAT16 }))
    .OUTPUT(x_grad, TensorType({ DT_FLOAT,DT_FLOAT16 }))
    .OP_END_FACTORY_REG(MishGrad)
 /**
 * @brief pytorch hardtanh_backward operator.
 *
@@ -993,6 +1020,30 @@ REG_OP(HardSigmoidGrad)
    .ATTR(beta, Float, 0.5)
    .OP_END_FACTORY_REG(HardSigmoidGrad)
 /**
 * @brief Calculate the shrink function. \n
 * @par Inputs:
 * One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @par Attributes:
 * @li lambd: An optional float. Defaults to 0.5. \n
 * @li bias: An optional float. Defaults to 0.0. \n
 * @par Outputs:
 * y: A Tensor with the same dtype and shape of input_x's. \n
 * @par Third-party framework compatibility
 * Compatible with the ONNX operator Shrink. \n
 */
 REG_OP(Shrink)
    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(lambd, Float, 0.5)
    .ATTR(bias, Float, 0.0)
    .OP_END_FACTORY_REG(Shrink)
 } // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -273,14 +273,11 @@ REG_OP(PadV3)
 *@brief Pads a tensor.
 *@par Inputs:
 *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32.
 * @li x: A Tensor. Must be one of the following types: float16, float32.
 * @li paddings: A Tensor. Must be int32 type 
 *     paddings is a required input tensor.
 *@par Attributes:
 * @li paddings: An required "vector<vector<int>>".
 *     For each dimension D of input, paddings[D, 0] indicates how many
 *     values to add before the contents of tensor in that dimension,
 *     and paddings[D, 1] indicates how many values to add after the
 *     contents of tensor in that dimension.
 * @li constant_values: An optional int value for pad.
 * @li mode: An optional string, Defaults to "constant", indicates paddings mode,
 *     support "constant", "reflect", "edge"
@@ -298,9 +295,9 @@ REG_OP(PadV3)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead.
 */
 REG_OP(PadV3D)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
    .REQUIRED_ATTR(paddings, ListListInt)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(paddings, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(constant_values, Int, 0)
    .ATTR(mode, String, "constant")
    .ATTR(paddings_contiguous, Bool, true)
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -214,7 +214,7 @@ REG_OP(AscendRequant)
 *@brief Requantizes the input of int16 . \n
 *@par Inputs:
 *@li x: An NC1HWC0 tensor of type int16, specifying the input.
 *@li x0: An NC1HWC0 tensor of type int16, specifying the input.
 *@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio.
 *@li x1: An NC1HWC0 tensor of type int16 . \n
@@ -223,17 +223,17 @@ REG_OP(AscendRequant)
 *@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
 *@par Outputs:
 *@li y: The dequantized output tensor of type int8 and with format NC1HWC0.
 *@li y0: The dequantized output tensor of type int8 and with format NC1HWC0.
 *@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(AscendRequantS16)
  .INPUT(x, TensorType({DT_INT16}))
  .INPUT(x0, TensorType({DT_INT16}))
  .INPUT(req_scale, TensorType({DT_UINT64}))
  .OPTIONAL_INPUT(x1, TensorType({DT_INT16}))
  .OUTPUT(y, TensorType({DT_INT8}))
  .OUTPUT(y0, TensorType({DT_INT8}))
  .OUTPUT(y1, TensorType({DT_INT16}))
  .ATTR(dual_output, Bool, false)
  .ATTR(relu_flag, Bool, false)
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -33,6 +33,7 @@ namespace ge {
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n
 *@li mask:A 1D Tensor. Must be one of the following types: uint8.
 *@par Attributes:
 *@li keep_prob:An integer identifying the keep prob in the op. Default to 1.
@@ -42,7 +43,6 @@ namespace ge {
 *@par Outputs:
 *seven outputs:
 *@li mask:A 1D Tensor. Must be one of the following types: uint8.
 *@li ct:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li ht:A 4D Tensor. Must be one of the following types: float16.
 *@li it:A 4D Tensor. Must be one of the following types: float16, float32.
@@ -209,6 +209,7 @@ REG_OP(DynamicRNNGrad)
 *@li time_major:An bool identifying the time major in the op. Default to true.
 *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
 *@li forget_bias:An float identifying the forget bias in the op. Default to 0.
 *@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifjo". Default to "ijfo".
 *@li is_training:An bool identifying is training in the op. Default to true . \n
 *@par Outputs:
@@ -253,9 +254,103 @@ REG_OP(DynamicRNN)
    .ATTR(time_major, Bool, true)
    .ATTR(activation, String, "tanh")
    .ATTR(forget_bias, Float, 0.0)
    .ATTR(gate_order, String, "ijfo")
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(DynamicRNN)
 /**
 *@brief: DynamicRNNV2 calculation.
 *@par Inputs:
 *ten inputs:
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32.
 *The format must be FRACTAL_Z.
 *@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32.
 *The format must be FRACTAL_Z.
 *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
 *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
 *@par Attributes:
 *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
 *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL".
 *Only UNIDIRECTIONAL is currently supported.
 *@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
 *@li use_peephole:An bool identifying if use peephole in the op. Default to false.
 *@li keep_prob:An float identifying the keep prob in the op. Default to 1.
 *@li cell_clip:An float identifying the cell clip in the op. Default to -1.
 *@li num_proj:An integer identifying the num projection in the op. Default to 0.
 *@li time_major:An bool identifying the time major in the op. Default to true.
 *@li activation:An string identifying the type of activation function in the op. Default to "tanh".
 *Only tanh is currently supported.
 *@li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid".
 *Supprot "sigmoid" and "hard_sigmoid". In general, set "hard_sigmoid" for TF Keras LSTM.
 *@li forget_bias:An float identifying the forget bias in the op. Default to 0.
 *@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo".
 *Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras LSTM.
 *@li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported.
 *@li merge_mode: An string identifying the type of merge_modein the op. Default to "concat".
 *Only "concat" is currently supported
 *@li is_training:An bool identifying is training in the op. Default to true . \n
 *@par Outputs:
 *eight outputs:
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *Return the last output_h.
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *Return the last output_c.
 *@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@par Third-party framework compatibility:
 * Compatible with the TF operator LSTM or TF keras operator LSTM.
 */
 REG_OP(DynamicRNNV2)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(cell_type, String, "LSTM")
    .ATTR(direction, String, "UNIDIRECTIONAL")
    .ATTR(cell_depth, Int, 1)
    .ATTR(use_peephole, Bool, false)
    .ATTR(keep_prob, Float, 1.0)
    .ATTR(cell_clip, Float, -1.0)
    .ATTR(num_proj, Int, 0)
    .ATTR(time_major, Bool, true)
    .ATTR(activation, String, "tanh")
    .ATTR(recurrent_activation, String, "sigmoid")
    .ATTR(forget_bias, Float, 0.0)
    .ATTR(gate_order, String, "ijfo")
    .ATTR(stateful, Bool, false)
    .ATTR(merge_mode, String, "concat")
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(DynamicRNNV2)
 /**
 *@brief: DynamicRNNV3 calculation.
 *@par Inputs:
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -742,6 +742,52 @@ REG_OP(Col2im)
    .REQUIRED_ATTR(stride, ListInt)
    .OP_END_FACTORY_REG(Col2im)
 /**
 * @brief Performs Im2col for each batch entry. \n
 * @par Inputs:
 * x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
 *    following types:float32, int8, float16. The inputs must have data_format with
 *    one of follows:NHWC, NCHW.
 * @par Attributes:
 * @li ksizes: A required list or tuple. The size of the sliding window for each
 * dimension of images.
 * @li strides: A optional list or tuple. How far the centers of two consecutive
 * patches are in the images. Defaults to "{1}".
 * @li dilations: A optional list or tuple. Defaults to "{1}".
 * This is the input stride, specifying how far two consecutive patch
 * samples are in the input. Equivalent to extracting patches
 * with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *
 * (dilations - 1), followed by subsampling them spatially by a factor of dilations.
 * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
 * @li padding_mode: A optional String. The type of padding algorithm to use,
 * support "SAME", "VALID", "CALCULATED". Among the three modes, only the "CALCULATED"
 * means to use the pads below. Defaults to "CALCULATED".
 * @li pads: A optional list or tuple. The pad distance. Defaults to "{0}". \n
 * @par Outputs:
 * y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
 * ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols
 * x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols"
 * are the dimensions of the output patches . \n
 * @attention Constraints:
 * "ksizes", "strides", "dilations" and "pads" are lists of integers . \n
 * @par Third-party framework compatibility
 * Compatible with Pytorch Im2col operator.
 */
 REG_OP(Im2col)
    .INPUT(x, TensorType::RealNumberType())
    .OUTPUT(y, TensorType::RealNumberType())
    .REQUIRED_ATTR(ksizes, ListInt)
    .ATTR(strides, ListInt, {1})
    .ATTR(dilations, ListInt, {1})
    .ATTR(padding_mode, String, "CALCULATED")
    .ATTR(pads, ListInt, {0})
    .OP_END_FACTORY_REG(Im2col)
 /**
 *@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine
 matrices theta. \n
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -186,9 +186,9 @@ typedef void (*rtCallback_t)(void *fnData);
 #define RT_KERNEL_CUSTOM_AICPU (0x08)
 // STARS topic scheduler sqe : topic_type
 #define RT_KERNEL_DEVICE_FIRST (0X10)
 #define RT_KERNEL_HOST_ONLY (0X20)
 #define RT_KERNEL_HOST_FIRST (0X30)
 #define RT_KERNEL_DEVICE_FIRST (0x10)
 #define RT_KERNEL_HOST_ONLY (0x20)
 #define RT_KERNEL_HOST_FIRST (0x40)
 /**
 * @ingroup rt_kernel
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -17,6 +17,8 @@
 #ifndef D_SYSLOG_H_
 #define D_SYSLOG_H_
 static const int TMP_LOG = 0;
 #ifdef __cplusplus
 #ifndef LOG_CPP
 extern "C" {
@@ -261,7 +263,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
 #define dlog_error(moduleId, fmt, ...)                                          \
  do {                                                                          \
    DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog
@@ -276,7 +278,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
    if(CheckLogLevel(moduleId, DLOG_WARN) == 1) {                                   \
        DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
    }                                                                               \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog
@@ -291,7 +293,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
    if(CheckLogLevel(moduleId, DLOG_INFO) == 1) {                                   \
        DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
    }                                                                               \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog
@@ -306,7 +308,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
    if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) {                                  \
        DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
    }                                                                               \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog
@@ -318,7 +320,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
 #define dlog_event(moduleId, fmt, ...)                                          \
  do {                                                                          \
    DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog
@@ -334,7 +336,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
    if(CheckLogLevel(moduleId, level) == 1) {                                           \
        DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
     }                                                                                  \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog
@@ -351,7 +353,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
    if(CheckLogLevel(moduleId, level) == 1) {                                                           \
        DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
    }                                                                                                   \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog
@@ -369,7 +371,7 @@ DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
    if(CheckLogLevel(moduleId, level) == 1) {                                                                   \
        DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
    }                                                                                                           \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog
@@ -453,7 +455,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
    if(CheckLogLevelForC(moduleId, level) == 1) {                                           \
        DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
     }                                                                                  \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog
@@ -470,7 +472,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
    if(CheckLogLevelForC(moduleId, level) == 1) {                                                           \
        DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
    }                                                                                                   \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog
@@ -488,7 +490,7 @@ DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr);
    if(CheckLogLevelForC(moduleId, level) == 1) {                                                                   \
        DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
    }                                                                                                           \
  } while (0)
  } while (TMP_LOG != 0)
 /**
 * @ingroup slog