update headers

4 years ago · 0e43f8c1d5
--- a/inc/external/acl/acl.h
+++ b/inc/external/acl/acl.h
@@ -25,9 +25,9 @@
 extern "C" {
 #endif

 // Current version is 1.0.0
 // Current version is 1.1.0
 #define ACL_MAJOR_VERSION 1
 #define ACL_MINOR_VERSION 0
 #define ACL_MINOR_VERSION 1
 #define ACL_PATCH_VERSION 0

 /**
--- a/inc/external/acl/acl_base.h
+++ b/inc/external/acl/acl_base.h
@@ -150,6 +150,8 @@ typedef enum {
  ACL_DOUBLE = 11,
  ACL_BOOL = 12,
  ACL_STRING = 13,
  ACL_COMPLEX64 = 16,
  ACL_COMPLEX128 = 17
 } aclDataType;

 typedef enum {
--- a/inc/external/acl/acl_mdl.h
+++ b/inc/external/acl/acl_mdl.h
@@ -295,11 +295,23 @@ ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclD
 ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, aclTensorDesc *tensorDesc,
                                                        size_t index);

 /**
 * @ingroup AscendCL
 * @brief Get aclTensorDesc from aclmdlDataset
 *
 * @param dataset [IN]    aclmdlDataset pointer;
 * @param index [IN]      index of tensorDesc
 *
 * @retval Get address of aclTensorDesc when executed successfully.
 * @retval Failure return NULL
 */
 ACL_FUNC_VISIBILITY aclTensorDesc *aclmdlGetDatasetTensorDesc(const aclmdlDataset *dataset, size_t index);

 /**
 * @ingroup AscendCL
 * @brief Get the number of aclDataBuffer in aclmdlDataset
 *
 * @param dataset [IN]   aclmdlDataset poiter
 * @param dataset [IN]   aclmdlDataset pointer
 *
 * @retval the number of aclDataBuffer
 */
@@ -309,7 +321,7 @@ ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *datas
 * @ingroup AscendCL
 * @brief Get the aclDataBuffer in aclmdlDataset by index
 *
 * @param dataset [IN]   aclmdlDataset poiter
 * @param dataset [IN]   aclmdlDataset pointer
 * @param index [IN]     the index of aclDataBuffer
 *
 * @retval Get successfully, return the address of aclDataBuffer
--- a/inc/external/acl/acl_op.h
+++ b/inc/external/acl/acl_op.h
@@ -135,6 +135,34 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attr
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue);

 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is aclDataType
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param attrValue [IN]   attribute value
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrDataType(aclopAttr *attr, const char *attrName, aclDataType attrValue);

 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is list of aclDataType
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param numValues [IN]   number of values. false if attrValue is 0, true otherwise.
 * @param values [IN]      pointer to values
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrListDataType(aclopAttr *attr, const char *attrName, int numValues,
                                                      const aclDataType values[]);

 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is list of bools
--- a/inc/external/acl/acl_op_compiler.h
+++ b/inc/external/acl/acl_op_compiler.h
@@ -86,9 +86,9 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, con
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(
  const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
  int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
  aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);
    const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
    int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
    aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);

 /**
 * @ingroup AscendCL
--- a/inc/external/acl/acl_prof.h
+++ b/inc/external/acl/acl_prof.h
@@ -40,13 +40,20 @@ typedef enum {
  ACL_AICORE_MEMORY_BANDWIDTH = 2,
  ACL_AICORE_L0B_AND_WIDTH = 3,
  ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
  ACL_AICORE_MEMORY_UB = 5,
  ACL_AICORE_NONE = 0xFF
 } aclprofAicoreMetrics;

 typedef enum {
  ACL_STEP_START = 0,  // step  start
  ACL_STEP_END = 1     // step  end
 } aclprofStepTag;

 typedef struct aclprofConfig aclprofConfig;
 typedef struct aclprofStopConfig aclprofStopConfig;
 typedef struct aclprofAicoreEvents aclprofAicoreEvents;
 typedef struct aclprofSubscribeConfig aclprofSubscribeConfig;
 typedef struct aclprofStepInfo aclprofStepInfo;

 /**
 * @ingroup AscendCL
@@ -322,6 +329,36 @@ ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opI
 */
 ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);

 /**
 * @ingroup AscendCL
 * @brief
 *
 * @param  stepInfo [IN]     pointer to stepInfo data
 * @param  aclprofstepTag [IN] start or end flag
 * @param  stream [IN] steam info
 *
 * @retval 0 for failed
 */
 ACL_FUNC_VISIBILITY aclError aclprofGetStepTimestamp(aclprofStepInfo *stepInfo, aclprofStepTag tag, aclrtStream stream);

 /**
 * @ingroup AscendCL
 * @brief create pointer to aclprofStepInfo data
 *
 *
 * @retval aclprofStepInfo pointer
 */
 ACL_FUNC_VISIBILITY aclprofStepInfo *aclprofCreateStepInfo();

 /**
 * @ingroup AscendCL
 * @brief destroy aclprofStepInfo pointer
 *
 *
 * @retval void
 */
 ACL_FUNC_VISIBILITY void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo);

 #ifdef __cplusplus
 }
 #endif
--- a/inc/external/acl/acl_rt.h
+++ b/inc/external/acl/acl_rt.h
@@ -44,6 +44,12 @@ typedef enum aclrtEventStatus {
  ACL_EVENT_STATUS_RESERVED = 2,
 } aclrtEventStatus;

 typedef enum aclrtEventWaitStatus {
  ACL_EVENT_WAIT_STATUS_COMPLETE = 0,
  ACL_EVENT_WAIT_STATUS_NOT_READY = 1,
  ACL_EVENT_WAIT_STATUS_RESERVED = 0xffff,
 } aclrtEventWaitStatus;

 typedef enum aclrtCallbackBlockType {
  ACL_CALLBACK_NO_BLOCK,
  ACL_CALLBACK_BLOCK,
@@ -499,6 +505,18 @@ ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream strea
 */
 ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status);

 /**
 * @ingroup AscendCL
 * @brief Queries an event's wait-status
 *
 * @param  event [IN]    event to query
 * @param  status [OUT]  event wait-status
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtQueryEventWaitStatus(aclrtEvent event, aclrtEventWaitStatus *status);

 /**
 * @ingroup AscendCL
 * @brief Block Host Running, wait event to be complete
--- a/inc/external/acl/ops/acl_dvpp.h
+++ b/inc/external/acl/ops/acl_dvpp.h
@@ -158,6 +158,20 @@ enum acldvppJpegFormat {
  ACL_JPEG_CSS_UNKNOWN = 1000
 };

 enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0 };

 enum aclvdecChannelDescParamType { ACL_VDEC_CSC_MATRIX_UINT32 = 0 };

 // Csc Matrix can be used both for acldvppChannelDescParamType and aclvdecChannelDescParamType
 enum acldvppCscMatrix {
  ACL_DVPP_CSC_MATRIX_BT601_WIDE = 0,
  ACL_DVPP_CSC_MATRIX_BT601_NARROW,
  ACL_DVPP_CSC_MATRIX_BT709_WIDE,
  ACL_DVPP_CSC_MATRIX_BT709_NARROW,
  ACL_DVPP_CSC_MATRIX_BT2020_WIDE,
  ACL_DVPP_CSC_MATRIX_BT2020_NARROW
 };

 /**
 * @ingroup AscendCL
 * @brief alloc device memory for dvpp.
@@ -1910,9 +1924,9 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc
 * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
 */
 ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(
  acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
  acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[],
  acldvppResizeConfig *resizeConfig, aclrtStream stream);
    acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
    acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[],
    acldvppResizeConfig *resizeConfig, aclrtStream stream);

 /**
 * @ingroup AscendCL
@@ -2557,10 +2571,93 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
 * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
 */
 ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(
  acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
  acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
  acldvppResizeConfig *resizeConfig, aclrtStream stream);
    acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
    acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
    acldvppResizeConfig *resizeConfig, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief set param for dvpp channel desc
 *
 * @par Function
 * set attribution in dvpp channelDesc for specified type
 *
 * @param channelDesc [OUT]             the channel destruction
 * @param paramType [IN]                specified param type
 * @param length [IN]                   mem length of param
 * @param param [IN]                    pointer to param
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see acldvppGetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc
 */
 ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescParam(acldvppChannelDesc *channelDesc,
                                                        acldvppChannelDescParamType paramType, size_t length,
                                                        const void *param);

 /**
 * @ingroup AscendCL
 * @brief get param of dvpp channel desc
 *
 * @par Function
 * get attribution value in dvpp channelDesc for specified type
 *
 * @param channelDesc [IN]              the channel destruction
 * @param paramType [IN]                specified param type
 * @param length [IN]                   mem length allocated for output param
 * @param paramRetSize [OUT]            mem length of output param
 * @param param [OUT]                   pointer to output param
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see acldvppSetChannelDescParam | acldvppCreateChannelDesc | acldvppDestroyChannelDesc
 */
 ACL_FUNC_VISIBILITY aclError acldvppGetChannelDescParam(const acldvppChannelDesc *channelDesc,
                                                        acldvppChannelDescParamType paramType, size_t length,
                                                        size_t *paramRetSize, void *param);
 /**
 * @ingroup AscendCL
 * @brief set param for vdec channel desc
 *
 * @par Function
 * set attribution in channelDesc for specified type
 *
 * @param channelDesc [OUT]             the vdec channel destruction
 * @param paramType [IN]                specified param type
 * @param length [IN]                   mem length of param
 * @param param [IN]                    pointer to param
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclvdecGetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc
 */
 ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescParam(aclvdecChannelDesc *channelDesc,
                                                        aclvdecChannelDescParamType paramType, size_t length,
                                                        const void *param);

 /**
 * @ingroup AscendCL
 * @brief get param of vdec channel desc
 *
 * @par Function
 * get attribution value in channelDesc for specified type
 *
 * @param channelDesc [IN]              the vdec channel destruction
 * @param paramType [IN]                specified param type
 * @param length [IN]                   mem length allocated for output param
 * @param paramRetSize [OUT]            mem length of output param
 * @param param [OUT]                   pointer to output param
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclvdecSetChannelDescParam | aclvdecCreateChannelDesc | aclvdecDestroyChannelDesc
 */
 ACL_FUNC_VISIBILITY aclError aclvdecGetChannelDescParam(const aclvdecChannelDesc *channelDesc,
                                                        aclvdecChannelDescParamType paramType, size_t length,
                                                        size_t *paramRetSize, void *param);
 #ifdef __cplusplus
 }
 #endif
--- a/inc/external/ge/ge_ir_build.h
+++ b/inc/external/ge/ge_ir_build.h
@@ -1,18 +1,18 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd

 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at

 * http://www.apache.org/licenses/LICENSE-2.0

 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_EXTERNAL_GE_IR_BUILD_H_
 #define INC_EXTERNAL_GE_IR_BUILD_H_
--- a/inc/external/hccl/hccl.h
+++ b/inc/external/hccl/hccl.h
@@ -144,6 +144,33 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
 */
 extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream);

 /**
 * @brief AllGather operator.
 *
 * @param sendBuff A pointer identifying the input data address of the operator.
 * @param count An integer(u64) identifying the number of the send data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param destRank An integer identifying the destination rank.
 * @param comm A pointer identifying the communication resource based on.
 * @param stream A pointer identifying the stream information.
 * @return HcclResult
 */
 extern HcclResult HcclSend(void *sendBuf, uint64_t count, HcclDataType dataType, uint32_t destRank, HcclComm comm,
                           aclrtStream stream);
 /**
 * @brief AllGather operator.
 *
 * @param recvBuff A pointer identifying the output data address of the operator.
 * @param count An integer(u64) identifying the number of the receive data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param srcRank An integer identifying the source rank.
 * @param comm A pointer identifying the communication resource based on.
 * @param stream A pointer identifying the stream information.
 * @return HcclResult
 */
 extern HcclResult HcclRecv(void *recvBuf, uint64_t count, HcclDataType dataType, uint32_t srcRank, HcclComm comm,
                           aclrtStream stream);

 /**
 * @brief Destroy HCCL comm
 *
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -50,14 +50,30 @@ class GE_FUNC_VISIBILITY GeExecutor {
 public:
  GeExecutor();
  ~GeExecutor() = default;
  ge::Status Initialize();
  ge::Status Finalize();

  ge::Status UnloadModel(uint32_t modelId);
  Status Initialize();
  Status Finalize();

  ///
  /// @ingroup ge
  /// @brief Initialize global execute environment.
  /// @param [in] options: environment variables.
  /// @return init result
  ///
  static Status Initialize(const std::map<std::string, std::string> &options);

  ///
  /// @ingroup ge
  /// @brief Finalize global execute environment.
  /// @return execute result
  ///
  static Status FinalizeEx();

  Status UnloadModel(uint32_t modelId);

  // Get input and output descriptor
  ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
                              std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false);
  Status GetModelDescInfo(uint32_t model_id, std::vector<TensorDesc> &input_desc, std::vector<TensorDesc> &output_desc,
                          bool new_model_desc = false);

  ///
  /// @ingroup ge
@@ -68,7 +84,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario
  /// @return execute result
  ///
  ge::Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size);
  Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size);

  ///
  /// @ingroup ge
@@ -80,8 +96,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario
  /// @return execute result
  ///
  ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height,
                                 uint64_t image_width);
  Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height,
                             uint64_t image_width);

  ///
  /// @ingroup ge
@@ -93,8 +109,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [in] dynamic_dims: array of dynamic dimensions
  /// @return execute result
  ///
  ge::Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
                            const std::vector<uint64_t> &dynamic_dims);
  Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
                        const std::vector<uint64_t> &dynamic_dims);

  ///
  /// @ingroup ge
@@ -104,8 +120,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] cur_dynamic_dims: current dynamic dims
  /// @return execute result
  ///
  ge::Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
                               std::vector<uint64_t> &cur_dynamic_dims);
  Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
                           std::vector<uint64_t> &cur_dynamic_dims);

  ///
  /// @ingroup ge
@@ -115,8 +131,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] dynamic_type
  /// @return execute result
  ///
  ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
                                 int32_t &dynamic_type);
  Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type);

  ///
  /// @ingroup ge
@@ -125,7 +140,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] batch_info
  /// @return execute result
  ///
  ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
  Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

  ///
  /// @ingroup ge
@@ -134,9 +149,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] user_designate_shape_order
  /// @return execute result
  ///
  ge::Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order);
  Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order);

  ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);
  Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);

  ///
  /// @ingroup ge
@@ -148,22 +163,22 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp
  /// @return execute result
  ///
  ge::Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
                                const std::vector<kAippDynamicBatchPara> &aippBatchPara,
                                const kAippDynamicPara &aippParms);
  Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
                            const std::vector<kAippDynamicBatchPara> &aipp_batch_para,
                            const kAippDynamicPara &aippParms);

  ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
  Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);

  ge::Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
                       std::string &attr_value);
  Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
                   std::string &attr_value);

  ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);
  Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);

  ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
  Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);

  ge::Status CommandHandle(const ge::Command &command);
  Status CommandHandle(const Command &command);

  ge::Status SetDump(const DumpConfig &dump_config);
  Status SetDump(const DumpConfig &dump_config);

  ///
  /// @ingroup ge
@@ -173,7 +188,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @return SUCCESS
  /// @return FAILED
  ///
  ge::Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size);
  Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size);

  ///
  /// @ingroup ge
@@ -182,7 +197,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] ModelData &model_data: Offline model memory data
  /// @return SUCCESS handle successfully / others handle failed
  ///
  ge::Status LoadDataFromFile(const std::string &path, ge::ModelData &model_data);
  Status LoadDataFromFile(const std::string &path, ModelData &model_data);

  ///
  /// @ingroup ge
@@ -195,8 +210,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] uint32_t &model_id: Corresponding identification after model loading
  /// @return SUCCESS handle successfully / others handle failed
  ///
  ge::Status LoadModelFromData(uint32_t &model_id, const ge::ModelData &model_data, void *dev_ptr, size_t mem_size,
                               void *weight_ptr, size_t weight_size);
  Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size,
                           void *weight_ptr, size_t weight_size);

  ///
  /// @ingroup ge
@@ -207,9 +222,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [in] output_queue_ids: input queue ids create from user.
  /// @return: 0 for success / others for fail
  ///
  ge::Status LoadModelWithQ(uint32_t &model_id, const ge::ModelData &model_data,
                            const std::vector<uint32_t> &input_queue_ids,
                            const std::vector<uint32_t> &output_queue_ids);
  Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids,
                        const std::vector<uint32_t> &output_queue_ids);

  ///
  /// @ingroup ge
@@ -221,8 +235,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] domi::OutputData *output_data: Model output data
  /// @return SUCCESS handle successfully / others handle failed
  ///
  ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data,
                       ge::RunModelData &output_data, bool async_mode = false);
  Status ExecModel(uint32_t model_id, void *stream, const RunModelData &input_data, RunModelData &output_data,
                   bool async_mode = false);

  ///
  /// @ingroup ge
@@ -236,9 +250,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data
  /// @return SUCCESS handle successfully / others handle failed
  ///
  ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
                       const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data,
                       std::vector<GeTensorDesc> &output_desc, bool async_mode = false);
  Status ExecModel(uint32_t model_id, void *stream, const RunModelData &run_input_data,
                   const std::vector<GeTensorDesc> &input_desc, RunModelData &run_output_data,
                   std::vector<GeTensorDesc> &output_desc, bool async_mode = false);

  ///
  /// @ingroup ge
@@ -248,7 +262,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] size_t &weight_size Weight memory space size
  /// @return SUCCESS handle successfully / others handle failed
  ///
  ge::Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size);
  Status GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size);

  ///
  /// @ingroup ge
@@ -259,39 +273,39 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] size_t &weight_size Weight memory space size
  /// @return SUCCESS handle successfully / others handle failed
  ///
  ge::Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size);
  Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size);

  static ge::Status LoadSingleOp(const std::string &modelName, const ge::ModelData &modelData, void *stream,
                                 SingleOp **single_op);
  static Status LoadSingleOp(const std::string &modelName, const ModelData &modelData, void *stream,
                             SingleOp **single_op);

  static ge::Status LoadSingleOpV2(const std::string &modelName, const ge::ModelData &modelData, void *stream,
                                   SingleOp **single_op, const uint64_t model_id);
  static Status LoadSingleOpV2(const std::string &modelName, const ModelData &modelData, void *stream,
                               SingleOp **single_op, const uint64_t model_id);

  static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
                                 std::vector<DataBuffer> &outputs);
  static Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
                             std::vector<DataBuffer> &outputs);

  static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                        DynamicSingleOp **single_op);
  static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &modelData, void *stream,
                                    DynamicSingleOp **single_op);

  static ge::Status LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
                                          DynamicSingleOp **single_op, const uint64_t model_id);
  static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &modelData, void *stream,
                                      DynamicSingleOp **single_op, const uint64_t model_id);

  static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
                                 const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
                                 std::vector<DataBuffer> &outputs);
  static Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
                             const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
                             std::vector<DataBuffer> &outputs);

  static ge::Status ReleaseSingleOpResource(void *stream);
  static Status ReleaseSingleOpResource(void *stream);

  static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);
  static Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);

  ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
  ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
  ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                       std::vector<InputOutputDims> &output_dims);
  ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);
  Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
  Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
  Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                   std::vector<InputOutputDims> &output_dims);
  Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);

 private:
  static bool isInit_;
  static std::atomic_bool is_inited_;
 };
 }  // namespace ge

--- a/inc/framework/ge_runtime/task_info.h
+++ b/inc/framework/ge_runtime/task_info.h
@@ -50,10 +50,18 @@ enum TaskInfoType {
 class TaskInfo {
 public:
  virtual ~TaskInfo() {}
  uint32_t stream_id() const { return stream_id_; }
  TaskInfoType type() const { return type_; }
  std::string op_name() const { return op_name_; }
  bool dump_flag() const { return dump_flag_; }
  uint32_t stream_id() const {
    return stream_id_;
  }
  TaskInfoType type() const {
    return type_;
  }
  std::string op_name() const {
    return op_name_;
  }
  bool dump_flag() const {
    return dump_flag_;
  }

 protected:
  TaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, bool dump_flag)
@@ -84,15 +92,33 @@ class CceTaskInfo : public TaskInfo {
        is_flowtable_(is_flowtable) {}
  ~CceTaskInfo() override {}

  cce::ccOpContext cc_context() const { return ctx_; }
  std::string stub_func() const { return stub_func_; }
  uint32_t block_dim() const { return block_dim_; }
  const std::vector<uint8_t> &args() const { return args_; }
  uint32_t args_size() const { return args_size_; }
  const std::vector<uint8_t> &sm_desc() const { return sm_desc_; }
  const std::vector<uint8_t> &flow_table() const { return flow_table_; }
  const std::vector<uint8_t> &args_offset() const { return args_offset_; }
  bool is_flowtable() const { return is_flowtable_; }
  cce::ccOpContext cc_context() const {
    return ctx_;
  }
  std::string stub_func() const {
    return stub_func_;
  }
  uint32_t block_dim() const {
    return block_dim_;
  }
  const std::vector<uint8_t> &args() const {
    return args_;
  }
  uint32_t args_size() const {
    return args_size_;
  }
  const std::vector<uint8_t> &sm_desc() const {
    return sm_desc_;
  }
  const std::vector<uint8_t> &flow_table() const {
    return flow_table_;
  }
  const std::vector<uint8_t> &args_offset() const {
    return args_offset_;
  }
  bool is_flowtable() const {
    return is_flowtable_;
  }

 private:
  cce::ccOpContext ctx_;
@@ -126,17 +152,39 @@ class TbeTaskInfo : public TaskInfo {
        workspace_addrs_(workspace_addrs) {}
  ~TbeTaskInfo() override {}

  const std::string &stub_func() const { return stub_func_; }
  uint32_t block_dim() const { return block_dim_; }
  const std::vector<uint8_t> &args() const { return args_; }
  uint32_t args_size() const { return args_size_; }
  const std::vector<uint8_t> &sm_desc() const { return sm_desc_; }
  void *binary() const { return binary_; }
  uint32_t binary_size() const { return binary_size_; }
  const std::vector<uint8_t> &meta_data() const { return meta_data_; }
  const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
  const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }
  const std::vector<void *> &workspace_addrs() const { return workspace_addrs_; }
  const std::string &stub_func() const {
    return stub_func_;
  }
  uint32_t block_dim() const {
    return block_dim_;
  }
  const std::vector<uint8_t> &args() const {
    return args_;
  }
  uint32_t args_size() const {
    return args_size_;
  }
  const std::vector<uint8_t> &sm_desc() const {
    return sm_desc_;
  }
  void *binary() const {
    return binary_;
  }
  uint32_t binary_size() const {
    return binary_size_;
  }
  const std::vector<uint8_t> &meta_data() const {
    return meta_data_;
  }
  const std::vector<void *> &input_data_addrs() const {
    return input_data_addrs_;
  }
  const std::vector<void *> &output_data_addrs() const {
    return output_data_addrs_;
  }
  const std::vector<void *> &workspace_addrs() const {
    return workspace_addrs_;
  }

  void SetBinary(void *binary, uint32_t binary_size) {
    binary_ = binary;
@@ -171,12 +219,24 @@ class AicpuTaskInfo : public TaskInfo {
        output_data_addrs_(output_data_addrs) {}
  ~AicpuTaskInfo() override {}

  const std::string &so_name() const { return so_name_; }
  const std::string &kernel_name() const { return kernel_name_; }
  const std::string &node_def() const { return node_def_; }
  const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
  const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }
  const std::string &ext_info() const { return ext_info_; }
  const std::string &so_name() const {
    return so_name_;
  }
  const std::string &kernel_name() const {
    return kernel_name_;
  }
  const std::string &node_def() const {
    return node_def_;
  }
  const std::vector<void *> &input_data_addrs() const {
    return input_data_addrs_;
  }
  const std::vector<void *> &output_data_addrs() const {
    return output_data_addrs_;
  }
  const std::string &ext_info() const {
    return ext_info_;
  }

 private:
  std::string so_name_;
@@ -192,7 +252,9 @@ class LabelSetTaskInfo : public TaskInfo {
  LabelSetTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id)
      : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SET, false), label_id_(label_id) {}
  ~LabelSetTaskInfo() override {}
  uint32_t label_id() const { return label_id_; }
  uint32_t label_id() const {
    return label_id_;
  }

 private:
  uint32_t label_id_;
@@ -203,7 +265,9 @@ class LabelGotoTaskInfo : public TaskInfo {
  LabelGotoTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id)
      : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_GOTO, false), label_id_(label_id) {}
  ~LabelGotoTaskInfo() override {}
  uint32_t label_id() const { return label_id_; }
  uint32_t label_id() const {
    return label_id_;
  }

 private:
  uint32_t label_id_;
@@ -218,9 +282,15 @@ class LabelSwitchTaskInfo : public TaskInfo {
        label_list_(label_list),
        cond_(cond) {}
  ~LabelSwitchTaskInfo() override {}
  uint32_t label_size() const { return label_size_; }
  const std::vector<uint32_t> &label_list() const { return label_list_; }
  void *cond() const { return cond_; }
  uint32_t label_size() const {
    return label_size_;
  }
  const std::vector<uint32_t> &label_list() const {
    return label_list_;
  }
  void *cond() const {
    return cond_;
  }

 private:
  uint32_t label_size_;
@@ -230,7 +300,9 @@ class LabelSwitchTaskInfo : public TaskInfo {

 class EventTaskInfo : public TaskInfo {
 public:
  uint32_t event_id() const { return event_id_; }
  uint32_t event_id() const {
    return event_id_;
  }

 protected:
  EventTaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, uint32_t event_id)
@@ -271,14 +343,13 @@ class FusionEndTaskInfo : public TaskInfo {
 class HcclTaskInfo : public TaskInfo {
 public:
  HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr,
               void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num,
               void *output_data_addr, int64_t workspace_size, int64_t hccl_stream_num,
               const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id,
               int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag)
      : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag),
        hccl_type_(hccl_type),
        input_data_addr_(input_data_addr),
        output_data_addr_(output_data_addr),
        workspace_addr_(workspace_addr),
        workspace_size_(workspace_size),
        hccl_stream_num_(hccl_stream_num),
        private_def_(private_def),
@@ -290,25 +361,47 @@ class HcclTaskInfo : public TaskInfo {
        group_(group) {}
  ~HcclTaskInfo() override {}

  const std::string &hccl_type() const { return hccl_type_; }
  void *input_data_addr() const { return input_data_addr_; }
  void *output_data_addr() const { return output_data_addr_; }
  void *workspace_addr() const { return workspace_addr_; }
  int64_t workspace_size() const { return workspace_size_; }
  int64_t hccl_stream_num() const { return hccl_stream_num_; }
  const std::vector<uint8_t> &private_def() const { return private_def_; }
  void *ops_kernel_store() const { return ops_kernel_store_; }
  int32_t count() const { return count_; }
  int64_t root_id() const { return root_id_; }
  int64_t op_type() const { return op_type_; }
  int64_t data_type() const { return data_type_; }
  const std::string &group() const { return group_; }
  const std::string &hccl_type() const {
    return hccl_type_;
  }
  void *input_data_addr() const {
    return input_data_addr_;
  }
  void *output_data_addr() const {
    return output_data_addr_;
  }
  int64_t workspace_size() const {
    return workspace_size_;
  }
  int64_t hccl_stream_num() const {
    return hccl_stream_num_;
  }
  const std::vector<uint8_t> &private_def() const {
    return private_def_;
  }
  void *ops_kernel_store() const {
    return ops_kernel_store_;
  }
  int32_t count() const {
    return count_;
  }
  int64_t root_id() const {
    return root_id_;
  }
  int64_t op_type() const {
    return op_type_;
  }
  int64_t data_type() const {
    return data_type_;
  }
  const std::string &group() const {
    return group_;
  }

 private:
  std::string hccl_type_;
  void *input_data_addr_;
  void *output_data_addr_;
  void *workspace_addr_;
  int64_t workspace_size_;
  int64_t hccl_stream_num_;
  std::vector<uint8_t> private_def_;
@@ -329,9 +422,15 @@ class ProfilerTraceTaskInfo : public TaskInfo {
        flat_(flat) {}
  ~ProfilerTraceTaskInfo() override {}

  uint64_t log_id() const { return log_id_; }
  bool notify() const { return notify_; }
  uint32_t flat() const { return flat_; }
  uint64_t log_id() const {
    return log_id_;
  }
  bool notify() const {
    return notify_;
  }
  uint32_t flat() const {
    return flat_;
  }

 private:
  uint64_t log_id_;
@@ -351,11 +450,21 @@ class MemcpyAsyncTaskInfo : public TaskInfo {
        kind_(kind) {}
  ~MemcpyAsyncTaskInfo() override {}

  void *dst() const { return dst_; }
  uint64_t dst_max() const { return dst_max_; }
  void *src() const { return src_; }
  uint64_t count() const { return count_; }
  uint32_t kind() const { return kind_; }
  void *dst() const {
    return dst_;
  }
  uint64_t dst_max() const {
    return dst_max_;
  }
  void *src() const {
    return src_;
  }
  uint64_t count() const {
    return count_;
  }
  uint32_t kind() const {
    return kind_;
  }

 private:
  void *dst_;
@@ -377,11 +486,21 @@ class StreamSwitchTaskInfo : public TaskInfo {
        data_type_(data_type) {}
  ~StreamSwitchTaskInfo() override {}

  int64_t true_stream_id() const { return true_stream_id_; }
  void *input_addr() const { return input_addr_; }
  void *value_addr() const { return value_addr_; }
  int64_t cond() const { return cond_; }
  int64_t data_type() const { return data_type_; }
  int64_t true_stream_id() const {
    return true_stream_id_;
  }
  void *input_addr() const {
    return input_addr_;
  }
  void *value_addr() const {
    return value_addr_;
  }
  int64_t cond() const {
    return cond_;
  }
  int64_t data_type() const {
    return data_type_;
  }

 private:
  int64_t true_stream_id_;
@@ -397,7 +516,9 @@ class StreamActiveTaskInfo : public TaskInfo {
      : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_ACTIVE, false), active_stream_id_(active_stream_id) {}
  ~StreamActiveTaskInfo() override {}

  uint32_t active_stream_id() const { return active_stream_id_; }
  uint32_t active_stream_id() const {
    return active_stream_id_;
  }

 private:
  uint32_t active_stream_id_;
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -35,7 +35,7 @@ namespace ge {
 * @li values:A `Tensor`. Must have the same type as `sorted_x`. \n

 *@par Attributes:
 *@li out_type:An optional `DType` from: `int32, int64`.
 *out_type:An optional `DType` from: `int32, int64`.
 Defaults to `int32`. \n

 *@par Outputs:
@@ -504,7 +504,7 @@ REG_OP(Constant)
 *x: A tensor. \n

 *@par Outputs:
 *y: A tensor. \n
 *y: A copy of input tensor. \n

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Snapshot.
@@ -684,7 +684,9 @@ REG_OP(ExpandDims)

 *@par Inputs:
 *@li x: Original tensor.
 *@li axis: List of ints. \n

 *@par Attributes:
 *@li axes: List of ints indicating the dimensions to be inserted. \n

 *@par Outputs:
 *y: Reshape tensor with same data as input. \n
@@ -755,10 +757,10 @@ REG_OP(Squeeze)
 *@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n

 *@par Inputs:
 *x: A tensor. \n
 *x: A Tensor of type float32, float16, int8, int16, uint16, uint8, int32, int64, uint32, uint64, bool, double. \n

 *@par Outputs:
 *y: A tensor. The rank of input tensor. \n
 *y: A tensor. The rank of input tensor. Type is int32. \n

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Rank.
@@ -848,7 +850,6 @@ REG_OP(PlaceHolder)
 *x: A tensor. \n

 *@par Attributes:
 *@li dtype: data type of tensor.
 *@li shape: tensor shape. \n

 *@par Outputs:
@@ -867,13 +868,13 @@ REG_OP(PlaceholderWithDefault)
 *@brief Reads and returns the value of the input variable tensor. \n

 *@par Inputs:
 *x: A tensor. \n
 *x: A tensor must have numeric type. \n

 *@par Attributes:
 *dtype: An optional int32 or int64. The output data type. Defaults to int32. \n

 *@par Outputs:
 *y: A tensor. \n
 *y: A tensor must have numeric type. \n

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ReadVariableOp.
@@ -1134,10 +1135,10 @@ This is an M-length vector.
 This is an R-length vector

 *@par Attributes:
 *@li normalize: boolean (if true, edit distances are normalized by length of truth). \n
 *normalize: boolean (if true, edit distances are normalized by length of truth). \n

 *@par Outputs:
 *@li output: A dense float tensor with rank R - 1. \n
 *output: A dense float tensor with rank R - 1. \n

 *@par Third-party framework compatibility
 * Compatible with TensorFlow EditDistance operator.
@@ -1154,18 +1155,17 @@ REG_OP(EditDistance)
    .OP_END_FACTORY_REG(EditDistance)

 /**
 * @brief sort_v2.
 * @brief sort the input tensor without returning the value of index.

 * @par Inputs:
 * @li x: An ND tensor of type float16.
 * x: An ND tensor of type float16.

 * @par Attributes:

 * @li axis: An optional int. The dimension to sort along. This value defaults to -1.
 * @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False.

 * @par Outputs:
 * @li y: An ND tensor of type float16.
 * y: An ND tensor of type float16.

 * @attention Constraints:
 * @li Axis should select the last dim.
@@ -1206,7 +1206,7 @@ REG_OP(Expand)
 *@Returns a tensor containing the indices of all non-zero elements of input. \n

 *@par Inputs:
 *@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
 *x: A Tensor. Must be one of the following types: float16, float32, int32, int64.

 *@par Attributes:
 * transpose: the output tensor will be transposed if true. \n
@@ -1230,15 +1230,15 @@ REG_OP(NonZero)

 * @par Inputs:
 * One inputs, including:
 * @li x: A Tensor. Must be one of the following types:
 * x: A Tensor. Must be one of the following types:
 *     float16, float32, int32, int8 ,uint8. \n

 * @par Attributes:
 * @li shape: A required listInt to specify the shape that the input tensor expanded to. \n
 * shape: A required listInt to specify the shape that the input tensor expanded to. \n


 * @par Outputs:
 * @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
 * y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n

 * @par Third-party framework compatibility
 * Compatible with the ONNX operator Expand.
@@ -1249,6 +1249,38 @@ REG_OP(ExpandD)
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .REQUIRED_ATTR(shape, ListInt)
    .OP_END_FACTORY_REG(ExpandD)

 /**
 *@brief Finds unique elements in a 1D tensor. \n

 *@par Inputs:
 *x: 1D tensor. Must be one of the following types:
 *     float16, float32, double, int64, int32, int16, uint16, int8 ,uint8. \n

 *@par Attributes:
 *@li return_inverse: Whether to also return the indices for where elements in the original 
 *                input ended up in the returned unique list.
 *@li return_inverse: Whether to also return the counts for each unique element.

 *@par Outputs:
 *@li y1: The output list of unique scalar elements. Has the same type as "x".
 *@li y2: Representing the indices for where elements in the original input map to in the output.
 *@li y3: Representing the number of occurrences for each unique value or tensor. \n

 * @par Third-party framework compatibility
 * Compatible with the troch operator _unique2.
 */

 REG_OP(UniqueWithCountsAndSorting)
    .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
           DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
    .OUTPUT(y1, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
           DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
    .OUTPUT(y2, TensorType({ DT_INT32, DT_INT64 }))
    .OUTPUT(y3, TensorType({ DT_INT32, DT_INT64 }))
    .ATTR(return_inverse, Bool, false)
    .ATTR(return_counts, Bool, false)
    .OP_END_FACTORY_REG(UniqueWithCountsAndSorting)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_
--- a/third_party/fwkacllib/inc/ops/control_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h
@@ -96,7 +96,7 @@ REG_OP(RefMerge)
 *       Otherwise, the data is forwarded to "output_false" . \n

 *@par Inputs:
 *@li data: The tensor to be forwarded. \n
 *@li data: The tensor to be forwarded.
 *          Must be one of the following types: float16, float32, float64,
 *          int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
 *@li pred: A boolean scalar. The output port that will receive data . \n
--- a/third_party/fwkacllib/inc/ops/ctc_ops.h
+++ b/third_party/fwkacllib/inc/ops/ctc_ops.h
@@ -74,7 +74,7 @@ REG_OP(CTCLoss)
 *@li sequence_length: A vector containing sequence lengths, size `(batch_size)`. \n

 *@par Attributes:
 *@li merge_repeated: If True, merge repeated classes in output. \n
 * merge_repeated: If True, merge repeated classes in output. \n

 *@par Outputs:
 *@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,
@@ -108,6 +108,8 @@ REG_OP(CTCGreedyDecoder)

 *@par Attributes:
 *@li merge_repeated: If True, merge repeated classes in output. \n
 *@li beam_width:A scalar >= 0 (beam search beam width).
 *@li top_paths:A scalar >= 0, <= beam_width (controls output size).

 *@par Outputs:
 *@li decoded_indices: A list (length: top_paths) of indices matrices.  Matrix j,
@@ -162,7 +164,7 @@ REG_OP(CTCBeamSearchDecoder)
 * Compatible with Pytorch CTCLoss operator.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 *The length of Label should in [4, 1000].
 */
 REG_OP(CTCLossV2)
    .INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE}))
@@ -203,7 +205,7 @@ REG_OP(CTCLossV2)
 * Compatible with Pytorch CTCLoss operator.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 *The limit of Label’s length is 1K.
 */
 REG_OP(CTCLossV2Grad)
    .INPUT(grad_out, TensorType({DT_FLOAT, DT_DOUBLE}))
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -1201,6 +1201,8 @@ REG_OP(TensorArraySize)
 *@brief A queue implementation that dequeues elements in a random order. \n

 *@par Attributes:
 *@li component_types:A list of fully-defined Tensortype objects with
 the same length as shapes, or None.
 *@li shapes: (Optional.) A list of fully-defined TensorShape objects with
 the same length as dtypes, or None.
 *@li capacity: An integer. The upper bound on the number of elements that may
@@ -1281,6 +1283,7 @@ The length of this attr must be either 0 or the same as the length of
 elements are not constrained, and only one element may be dequeued at a time.
 *@li container: An optional string. Defaults to "". If non-empty, this queue
 is placed in the given container. Otherwise, a default container is used.
 *@li capacity:An integer. The upper bound on the number of elements that may be stored in this queue.
 *@li shared_name: An optional string. Defaults to "". If non-empty, this
 queue will be shared under the given name across multiple sessions. \n

@@ -1435,7 +1438,7 @@ REG_OP(OrderedMapClear)

 *@par Inputs:
 *Including:
 * @li resource: A Tensor of type DT_RESOURCE.
 * resource: A Tensor of type DT_RESOURCE.

 *@par Outputs:
 *handle: A Tensor of type DT_STRING ref. \n
@@ -1526,7 +1529,7 @@ REG_OP(OrderedMapPeek)

 *@par Inputs:
 *Including:
 * @li indices: A Tensor of type DT_INT32. \n
 * indices: A Tensor of type DT_INT32. \n

 *@par Attributes:
 *@li capacity: An optional int that is >= 0. Defaults to "0".
@@ -2331,6 +2334,40 @@ REG_OP(CacheAllIndexToLocal)
  .REQUIRED_ATTR(dtype, Type)
  .OP_END_FACTORY_REG(CacheAllIndexToLocal)

 /**
 *@brief LRUCacheV2, aicore LRUCache.
 *@par Inputs:
 *index_list: exchange index list
 *data: host data
 *cache: gm cache
 *tag: cache's tag
 *is_last_call: if is last call write all cache to data
 *@par Outputs:
 *data: output data
 *cache: gm cache
 *tag: cache's tag
 *index_offset_list: index_offset_list
 *not_in_cache_index_list: output not in cache's index_list
 *not_in_cache_number: scalar
 *@par Attributes:
 *pre_route_count: types of all outputs
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(LRUCacheV2)
    .INPUT(index_list, TensorType::BasicType())
    .INPUT(data, TensorType::BasicType())
    .INPUT(cache, TensorType::BasicType())
    .INPUT(tag, TensorType::BasicType())
    .INPUT(is_last_call, TensorType::BasicType())
    .OUTPUT(data, TensorType::BasicType())
    .OUTPUT(cache, TensorType::BasicType())
    .OUTPUT(tag, TensorType::BasicType())
    .OUTPUT(index_offset_list, TensorType::BasicType())
    .OUTPUT(not_in_cache_index_list, TensorType::BasicType())
    .OUTPUT(not_in_cache_number, TensorType::BasicType())
    .REQUIRED_ATTR(pre_route_count, Int)
    .OP_END_FACTORY_REG(LRUCacheV2)

 /**
 *@brief DynamicGetNext, dynamic get next data
 *@par Inputs:
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -624,9 +624,9 @@ REG_OP(Log1p)

 *@attention Constraints:
 *@li x2: The input data does not support 0
 *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
 *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
 *requirement of double thousandths in the mini form
 *@li Due to different architectures, the calculation results of this operator 
 *@li Due to different architectures, the calculation results of this operator
 *on NPU and CPU may be inconsistent
 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8

@@ -2066,9 +2066,9 @@ REG_OP(FloorDiv)

 *@attention Constraints:
 *@li x2: The input data does not support 0
 *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
 *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
 *requirement of double thousandths in the mini form
 *@li Due to different architectures, the calculation results of this operator 
 *@li Due to different architectures, the calculation results of this operator
 *on NPU and CPU may be inconsistent
 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8

@@ -2200,9 +2200,9 @@ REG_OP(Tan)

 *@attention Constraints:
 *@li x2: The input data does not support 0
 *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
 *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
 *requirement of double thousandths in the mini form
 *@li Due to different architectures, the calculation results of this operator 
 *@li Due to different architectures, the calculation results of this operator
 *on NPU and CPU may be inconsistent
 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8

@@ -2467,11 +2467,11 @@ REG_OP(Eltwise)

 *@par Inputs:
 *One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 * input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n

 *@par Outputs:
 *y: A Tensor with the same type and shape of input_x's. \n
 *output_y: A Tensor with the same type and shape of input_x's. \n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Erfinv. \n
@@ -3154,13 +3154,13 @@ REG_OP(FusedMulAddNL2loss)
 *@brief Tests whether the input exceeds a threshold. \n

 *@par Inputs:
 *@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n
 * x: A Tensor with any format. Must be one of the following types: float16, float32. \n

 *@par Attributes:
 *@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n
 * threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n

 *@par Outputs:
 *@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
 * y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32.
 *@par Third-party framework compatibility
 * Compatible with the Caffe operator Threshold.
 */
@@ -3175,7 +3175,7 @@ REG_OP(FusedMulAddNL2loss)
 *@brief Returns the index number corresponding to the maximum value entered. \n

 *@par Inputs:
 *@li x: A tensor. Must be one of the following types: float16, float32. \n
 *x: A tensor. Must be one of the following types: float16, float32. \n

 *@par Attributes:
 *@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000
@@ -3203,12 +3203,11 @@ REG_OP(ArgMaxWithK)
 *@brief Multiply tensor with scale. \n

 *@par Inputs:
 *Five inputs, including:
 * @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
 * @li x2: A scale. Must be float. \n
 *One input, including:
 *x: A Tensor. Must be one of the following types:int32,int16, float16, float32.

 *@par Outputs:
 *@li y: A Tensor. Has the same type and shape as "x1". \n
 *y: A Tensor. Has the same type and shape as "x1". \n

 *@par Third-party framework compatibility:
 * Compatible with the Pytorch operator muls.
@@ -3223,12 +3222,11 @@ REG_OP(Muls)
 *@brief Fill tensor with scale. \n

 *@par Inputs:
 *Five inputs, including:
 * @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
 * @li x2: A scale. Must be float. \n
 *One input, including:
 *x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.

 *@par Outputs:
 *@li y: A Tensor. Has the same type and shape as "x1". \n
 *y: A Tensor. Has the same type and shape as "x1". \n

 *@par Third-party framework compatibility:
 * Compatible with the Pytorch operator fills.
@@ -3378,7 +3376,7 @@ REG_OP(TensorMove)

 *@par Inputs:
 *One inputs, including:
 * @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n
 *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n

 *@par Outputs:
 *output_x: A Tensor. Has the same type as "x". \n
@@ -3397,7 +3395,7 @@ REG_OP(TensorRedirect)
 * multiply the result by the scalar value and add it to tensor x1

 * @par Inputs:
 * Three inputs, including:
 * Four inputs, including:
 * @li input_data: A mutable input Tensor. Must be one of the following types:
 *     float16, float32.
 * @li x1: A mutable input Tensor of the same type as x1.
@@ -3406,7 +3404,7 @@ REG_OP(TensorRedirect)
 *     float16, float32, int32. \n

 * @par Outputs:
 * @li y: A mutable Tensor. Has the same type as "x1". \n
 * y: A mutable Tensor. Has the same type as "x1". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Addcdiv.
@@ -3420,12 +3418,12 @@ REG_OP(Addcdiv)
    .OP_END_FACTORY_REG(Addcdiv)

 /**
 * @brief Performs the element-wise multiplication of tensor x2 by tensor x3, 
 * multiply the result by the scalar value and add it to tensor input_data 
 * @brief Performs the element-wise multiplication of tensor x2 by tensor x3,
 * multiply the result by the scalar value and add it to tensor input_data


 * @par Inputs:
 * Three inputs, including:
 * Four inputs, including:
 * @li input_data: A mutable input Tensor. Must be one of the following types:
 *     float16, float32, int8, int32, uint8.
 * @li x1: A mutable input Tensor of the same type as x1.
@@ -3433,7 +3431,7 @@ REG_OP(Addcdiv)
 * @li value: A tensor which includes only one element of the same type as x1. \n

 * @par Outputs:
 * @li y: A mutable output Tensor. Has the same type as "x1". \n
 * y: A mutable output Tensor. Has the same type as "x1". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Addcmul.
@@ -3455,7 +3453,7 @@ REG_OP(Addcmul)
 * @li alpha: A scalar tensor of type float16, float32. \n

 * @par Outputs:
 * @li y: An ND tensor tensor with the same shape and type as "x1". \n
 * y: An ND tensor tensor with the same shape and type as "x1". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Axpy.
@@ -3467,25 +3465,6 @@ REG_OP(AxpyV2)
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OP_END_FACTORY_REG(AxpyV2)

 /**
 * @brief Computes the result of x1 - x2.

 * @par Inputs:
 * @li x1: An ND tensor of type float16, float, int32.
 * @li x2: An ND tensor of type float16, float, int32. \n

 * @par Outputs:
 * @li y: An ND tensor tensor with the same type as "x1". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Sub.
 */
 REG_OP(PtSub)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OP_END_FACTORY_REG(PtSub)

 /**
 * @brief Add the partial values of two tensors in format NC1HWC0.

@@ -3502,7 +3481,7 @@ REG_OP(PtSub)
 * the difference between C1 and offset in "x1" and "x2". \n

 * @par Outputs:
 * @li y:  A Tensor of the same type as "x1", and the same shape as "x1",
 * y:  A Tensor of the same type as "x1", and the same shape as "x1",
 * except for the C1 value. Record the result after adding. \n
 */
 REG_OP(StrideAdd)
@@ -3523,7 +3502,7 @@ REG_OP(StrideAdd)
 * @li input_y: A Tensor. the second tensor. \n

 * @par Outputs:
 * @li output_z: A Tensor. Bool type, compare result of the two inputs. \n
 *output_z: A Tensor. Bool type, compare result of the two inputs. \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch equal operator. \n
@@ -3535,21 +3514,21 @@ REG_OP(TensorEqual)
    .OP_END_FACTORY_REG(TensorEqual)

 /**
 * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). 
 * All inputs and outputs must have the same data type. This operator supports multidirectional 
 * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support).
 * All inputs and outputs must have the same data type. This operator supports multidirectional
 * (i.e., Numpy-style) broadcasting
 * 
 * @par inputs
 *
 * @par Inputs:
 * one input including:
 * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
 * 
 * @par output
 * x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
 *
 * @par Outputs:
 * one output including:
 * @li y:A Tensor of the same type as x
 * 
 * y:A Tensor of the same type as x
 *
 */
 REG_OP(MaxN)
    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) 
    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
    .OP_END_FACTORY_REG(MaxN)

@@ -3634,16 +3613,16 @@ REG_OP(DataCompare)
 *which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the
 *corresponding input.
 *
 *@par inputs
 *@par Inputs:
 *one input including:
 *@li x: input A Tensor.Must be one of the following types:float32,float16
 *x: input A Tensor.Must be one of the following types:float32,float16
 *
 *@par Attributes:
 *@li axis:A required int attribute that decides which dimension will be used to cal the hard_max
 *axis:A required int attribute that decides which dimension will be used to cal the hard_max
 *
 *@par output:
 *@par Outputs:
 *one output including:
 *@li y:A Tensor of the same type as x
 *y:A Tensor of the same type as x
 *
 */
 REG_OP(HardMax)
@@ -3661,7 +3640,7 @@ REG_OP(HardMax)
 * @li input_y: A Tensor. the second tensor must be 1d. \n

 * @par Outputs:
 * @li output: A Tensor. Result of the two inputs, must be 1d. \n
 * output: A Tensor. Result of the two inputs, must be 1d. \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch dot operator. \n
@@ -3671,7 +3650,7 @@ REG_OP(Dot)
    .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
    .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
    .OP_END_FACTORY_REG(Dot)
 	

 /**
 *@brief Returns a new tensor with boolean elements representing \n
 *if each element of input is “close” to the corresponding element of other \n
@@ -3719,7 +3698,7 @@ REG_OP(IsClose)
 *
 *@attention Constraints:
 *@li indices: only support int32,and shape same to "updates"
 *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". 
 *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
 *@li y:A Tensor, the type and shape is same to "var" \n

 *@par Third-party framework compatibility
@@ -3754,7 +3733,7 @@ REG_OP(ArgMaxGrad)

 *@attention Constraints:
 *@li indices: only support int32,and shape same to "updates"
 *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". 
 *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x".
 *@li y:A Tensor, the type and shape is same to "var" \n

 *@par Third-party framework compatibility
@@ -3805,15 +3784,15 @@ REG_OP(AddMatMatElements)

 *@par Inputs:
 *Two inputs, including:
 * @li input_x1: A tensor. Must be the following types:
 *     float32. \n
 * @li input_x1: A tensor. Must be the following types: float32.
 * @li input_x2: A tensor. Must of the following types: float32. \n

 *@par Inputs:
 *@li input_x2: A tensor. Must of the following types:
 *     float32. \n
 * @par Attributes:
 * @li dim:The type is Int and the default value is 1.
 * @li eps:The type is Float and the default value is 1e-8. \n

 *@par Outputs:
 *@li output_y: A Tensor with the same type of input_x's. \n
 * output_y: A Tensor with the same type of input_x's. \n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator CosineSimilarity. \n
@@ -3826,6 +3805,45 @@ REG_OP(CosineSimilarity)
    .ATTR(eps, Float, 1e-8)
    .OP_END_FACTORY_REG(CosineSimilarity)

 /**
 *@brief count adam result. \n

 *@par Inputs:
 *eleven inputs, including:
 * @li var: A Tensor. Support float16/float32.\n
 * @li m: A Tensor. Datatype and shape are same as exp_avg.\n
 * @li v: A Tensor. Datatype and shape are same as exp_avg.\n
 * @li lr: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
 * @li beta1: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
 * @li beta2: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
 * @li epsilon: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
 * @li grad: A Tensor. Datatype and shape are same as exp_avg.\n
 * @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
 * @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
 * @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n

 *@par Outputs:
 *three inputs, including:
 * @li var: A Tensor. Datatype and shape are same as exp_avg.\n
 * @li m: A Tensor. Datatype and shape are same as exp_avg.\n
 * @li v: A Tensor. Datatype and shape are same as exp_avg.\n
 */
 REG_OP(ApplyAdamV2)
    .INPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(lr, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(beta1, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(beta2, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(epsilon, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(grad, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OP_END_FACTORY_REG(ApplyAdamV2)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/functional_ops.h
+++ b/third_party/fwkacllib/inc/ops/functional_ops.h
@@ -163,9 +163,6 @@ REG_OP(Case)
 *          if it is not a scalar, non-empty means True and empty means False.
 *@li body: A subgraph takes 'input' and returns a another list of tensors .  \n

 *@par Attributes:
 *parallel_iterations: An optional int, default as 10 . \n

 *@par Outputs:
 *output: The output tensors returned by "body". Has the same type as "input" . \n

--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -28,7 +28,7 @@ namespace ge {
 *@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n

 *@par Inputs:
 *@li contents:A Tensor of type string. 0-D. The GIF-encoded image. \n
 *contents:A Tensor of type string. 0-D. The GIF-encoded image. \n

 *@par Outputs:
 *image:A Tensor of type uint8. \n
@@ -128,8 +128,8 @@ crops from the input image tensor and resizes them using bilinear sampling or
 nearest neighbor sampling to a common output size specified by crop_size . \n

 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li images:A Tensor. Must be one of the following types:uint8, uint16, int8,
 *Input x must be a 4-D tensor. Inputs include:
 *@li x:A Tensor. Must be one of the following types:uint8, uint16, int8,
 int16, int32, int64, float16, float, double. A 4-D tensor of shape
 [batch, image_height, image_width, depth]. The format must be NHWC.
 *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
@@ -266,8 +266,9 @@ depth] containing the original image size. Both image_height and image_width
 need to be positive . \n

 *@par Attributes:
 method: A string specifying the interpolation method. Only 'bilinear' is
 supported for now . \n
 *@li method: A string specifying the interpolation method. Only 'bilinear' is
 supported for now .
 *@li T: output of type  \n

 *@par Outputs:
 *y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format
@@ -585,9 +586,11 @@ REG_OP(ResizeNearestNeighborV2GradD)
 channels], The image tensor that was resized . \n

 *@par Attributes:
 *align_corners: An optional bool. Defaults to False. If true, the centers of
 *@li align_corners: An optional bool. Defaults to False. If true, the centers of
 the 4 corner pixels of the input and grad tensors are aligned. Defaults to
 false . \n
 false .
 *@li half_pixel_centers: indicates if the offset coordinates are normalized. Defaults
 to false . \n

 *@par Outputs:
 *y: A Tensor. Has the same type as original_image . \n
@@ -617,9 +620,10 @@ REG_OP(ResizeBilinearV2Grad)
 size for the images . \n

 *@par Attributes:
 *align_corners: If true, the centers of the 4 corner pixels of the input and
 * @li align_corners: If true, the centers of the 4 corner pixels of the input and
 output tensors are aligned, preserving the values at the corner pixels.
 Defaults to false . \n
 Defaults to false .
 * @li half_pixel_centers: An optional bool. Defaults to False . \n

 *@par Outputs:
 *y: 4-D with shape [batch, new_height, new_width, channels] . \n
@@ -684,6 +688,9 @@ be non-negative. In the case of 0, the cropped area does not need to overlap
 any of the bounding boxes supplied .
 *@li aspect_ratio_range: The cropped area of the image must have an aspect
 ratio = width / height within this range.
 *@li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
 cropped area of the image must contain a fraction of the supplied image
 within this range.
 *@li max_attempts: Number of attempts at generating a cropped region of the
 image of the specified constraints. After max_attempts failures, return the
 entire image.
@@ -740,6 +747,9 @@ generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2: A second seed to avoid seed collision.
 *@li aspect_ratio_range: The cropped area of the image must have an aspect
 ratio = width / height within this range.
 *@li area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
 cropped area of the image must contain a fraction of the supplied image
 within this range.
 *@li max_attempts: Number of attempts at generating a cropped region of the
 image of the specified constraints. After max_attempts failures, return the
 entire image.
@@ -787,9 +797,10 @@ REG_OP(SampleDistortedBoundingBoxExt2)
 The new size for the images . \n

 *@par Attributes:
 *align_corners: If true, the centers of the 4 corner pixels of the input and
 *@li align_corners: If true, the centers of the 4 corner pixels of the input and
 output tensors are aligned, preserving the values at the corner pixels.
 Defaults to false . \n
 *@li half_pixel_centers: An optional bool. Defaults to False . \n

 *@par Outputs:
 *y: 4-D with shape [batch, new_height, new_width, channels] . \n
@@ -999,10 +1010,6 @@ deciding whether boxes overlap too.
 *@li score_threshold: A 0-D float tensor representing the threshold for
 deciding when to remove boxes based on score . \n

 *@par Attributes:
 *pad_to_max_output_size: If true, the output selected_indices is padded
 to be of length max_output_size. Defaults to false . \n

 *@par Outputs:
 *selected_indices: A 1-D integer tensor of shape [M] representing the
 selected indices from the boxes tensor, where M <= max_output_size . \n
@@ -1094,8 +1101,8 @@ REG_OP(EncodePng)
 *contents: 0-D. PNG-decoded image .

 *@par Attributes:
 *channels: graph channels \n
 *dtype: type of image
 *@li channels: graph channels \n
 *@li dtype: type of image

 *@par Outputs:
 *image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels]
@@ -1116,10 +1123,10 @@ REG_OP(DecodePng)
 *@brief Bmp-decode an image. \n

 *@par Inputs:
 *@li contents: A Tensor of type string. 0-D. The BMP-encoded image. \n
 *contents: A Tensor of type string. 0-D. The BMP-encoded image. \n

 *@par Attributes:
 *@li channels: Decode the desired number of color channels of the image. \n
 *channels: Decode the desired number of color channels of the image. \n

 *@par Outputs:
 *image: A Tensor dtype of uint8.
@@ -1253,6 +1260,7 @@ REG_OP(KeepRatioResizeBilinear)
 No default value.
 *@li align_corners: An optional bool. If "true", the centers of the corner
 pixels of the input and output tensors are aligned. Defaults to "false" . \n
 *@li half_pixel_centers: An optional bool. Defaults to False . \n

 *@par Outputs:
 *y: A Tensor with the same type and format as input "images" . \n
@@ -1381,6 +1389,7 @@ REG_OP(NonMaxSuppressionV5)
 *@li scale: A `Tensor` of type `float32`.
 *@li translation: A `Tensor` of type `float32` . \n

 *@par Attributes:
 *@li kernel_type: type is string, default  lanczos3
 *@li antialias: type is bool, default true \n

@@ -1411,6 +1420,7 @@ REG_OP(ScaleAndTranslate)
 *@li scale: A `Tensor` of type `float32`.
 *@li translation: A `Tensor` of type `float32` . \n

 *@par Attributes:
 *@li kernel_type: type is string, default  lanczos3
 *@li antialias: type is bool, default true

@@ -1460,9 +1470,10 @@ if they fall beyond [0, 1]. If false, do not do clipping and output the box
 coordinates as it is. If not specified, defaults to true . \n

 *@par Outputs:
 *nmsed_boxes:type is float
 *nmsed_scores:type is float
 *nmsed_classes:type is float  \n
 *@li nmsed_boxes:type is float
 *@li nmsed_scores:type is float
 *@li nmsed_classes:type is float  
 *@li valid_detections:type is INT32 \n

 *@par Third-party framework compatibility
 * Compatible with tensorflow CombinedNonMaxSuppression operator.
@@ -1508,6 +1519,9 @@ REG_OP(IMGWarp)

 *@par Outputs:
 *map_img: A Tensor after resize. \n

 *@par Restrictions:
 *Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(Remap)
    .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
@@ -1524,7 +1538,7 @@ and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left),  (h_bott
 *@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point.

 *@par Outputs:
 *remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
 *warp_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
 */
 REG_OP(IMGWarpResize)
    .INPUT(img, TensorType({DT_FLOAT32}))
@@ -1558,6 +1572,39 @@ REG_OP(SpatialTransformerD)
    .ATTR(use_default_theta, ListBool, {})
    .OP_END_FACTORY_REG(SpatialTransformerD)

 /**
 *@brief Function spatial transformer . \n

 *@par Inputs:
 *@li x: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64.
 *@li theta: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64, 
     auxiliary coefficients . \n

 *@par Attributes:
 *@li output_size: A tuple output size.
 *@li default_theta: A tuple default theta
 *@li use_default_theta: List use default theta

 *@par Outputs:
 *y: A Tensor dtype of float16, float32, double, uint8, int8, uint16, int16, int32, uint32, uint64, int64, 
    should be same shape and type as x.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(SpatialTransformer)
    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,DT_UINT16,
                          DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
    .OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,
                                       DT_UINT16,DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16,DT_DOUBLE,DT_UINT8,DT_INT8,DT_UINT16,
                           DT_INT16,DT_INT32,DT_UINT32,DT_UINT64,DT_INT64}))
    .ATTR(output_size, ListInt, {-1, -1})
    .ATTR(default_theta, ListFloat, {})
    .ATTR(align_corners, Bool, false)
    .ATTR(use_default_theta, ListInt, {})
    .OP_END_FACTORY_REG(SpatialTransformer)

 /**
 * @brief Resize the input tensor. \n
 currently, only support resize image tensor using nearest neighbor and linear interpolation.
@@ -1623,7 +1670,7 @@ REG_OP(Resize)
 *@brief Function parse image from string to int. \n

 *@par Inputs:
 *@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
 * contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n

 *@par Attributes:
 *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
@@ -1668,7 +1715,7 @@ REG_OP(DenseImageWarp)

 *@par Inputs:
 *One inputs, including:
 * @li x: A tensor. Must be one of the following types:
 * x: A tensor. Must be one of the following types:
 *     float16, float32. \n

 *@par Attributes:
@@ -1713,7 +1760,7 @@ REG_OP(ResizeD)

 *@par Inputs:
 *One inputs, including:
 * @li grads: A tensor. Must be one of the following types:
 * grads: A tensor. Must be one of the following types:
 *     float16, float32. \n

 *@par Attributes:
@@ -1762,8 +1809,8 @@ REG_OP(ResizeGradD)
 *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n

 *@par Outputs:
 *grad_image: Returns 4-D with the same shape and dtype as `image`.
 *grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
 *@li grad_image: Returns 4-D with the same shape and dtype as `image`.
 *@li grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
 */
 REG_OP(DenseImageWarpGrad)
    .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1817,12 +1864,12 @@ REG_OP(GridSampler2D)
 *@li assist: Assist matrix, a 4-D tensor of type float16.

 *@par Attributes:
 *@li align_corners: An optional bool. If "true", the centers of the corner
 *align_corners: An optional bool. If "true", the centers of the corner
 pixels of the input and output tensors are aligned. Defaults to "false" .

 *@par Outputs:
 *diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
 *position: Returns 4-D Tensor with the same shape as `grid`.
 *@li diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
 *@li position: Returns 4-D Tensor with the same shape as `grid`.
 */
 REG_OP(GridUnnormal)
    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1840,10 +1887,13 @@ REG_OP(GridUnnormal)
 *@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`.

 *@par Attributes:
 *@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
 *padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .

 *@par Outputs:
 *y: Returns 4-D Tensor with the same dtype as `x`.

 *@par Restrictions:
 *Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(ImageUnfold)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1936,5 +1986,204 @@ REG_OP(GridSampler3DGrad)
    .ATTR(align_corners, Bool, false)
    .OP_END_FACTORY_REG(GridSampler3DGrad)

 /**
 *@brief Upsample the 3-D data with the nearest neighbor interpolation algorithm. \n

 *@par Inputs:
 *One inputs, including:
 *x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
 *     float16, float32, float64. \n

 *@par Attributes:
 *@li output_size: An optional listInt. Defaults to none.
    contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size'
    should be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n
 *@li scales: An optional listFloat. Defaults to none.
    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. 
    The number of elements of 'scales' should be the same as the rank of input 'x'. One of 'scales' and
    'output_size' MUST be specified and it is an error if both are specified. \n

 *@par Outputs:
 *y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
 */

 REG_OP(UpsampleNearest3d)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .ATTR(output_size, ListInt, {})
    .ATTR(scales, ListFloat, {})
    .OP_END_FACTORY_REG(UpsampleNearest3d)

 /**
 *@brief Upsample the 3-D data with the trilinear interpolation algorithm. \n

 *@par Inputs:
 *One inputs, including:
 *x: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
 *     float16, float32, float64. \n

 *@par Attributes:
 *@li output_size: An optional listInt. Defaults to none.
    contain 3 elements: output_depth, output_height, output_width. The number of elements of 'output_size' should
    be the same as the rank of input 'x'. Only one of 'scales' and 'output_size' can be specified. \n
 *@li scales: An optional listFloat. Defaults to none.
    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width.
    The number of elements of 'scales' should be the same as the rank of input 'x'.
    One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n
 *@li align_corners: An optional bool. Defaults to false.
    If true, the input and output tensors are aligned by the center points of their corner pixels, preserving the
    values at the corner pixels. If false, the input and output tensors are aligned by the corner points of their
    corner pixels, and the interpolation use edge value padding for out of boundary values. \n

 *@par Outputs:
 *y: A 5-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
 */

 REG_OP(UpsampleTrilinear3d)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .ATTR(output_size, ListInt, {})
    .ATTR(scales, ListFloat, {})
    .ATTR(align_corners, Bool, false)
    .OP_END_FACTORY_REG(UpsampleTrilinear3d)

 /**
 *@brief Upsample the 3-D gradient data  with the nearest neighbor interpolation algorithm. \n

 *@par Inputs:
 *One inputs, including:
 *grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
 *     float16, float32, float64. \n

 *@par Attributes:
 *@li input_size: An required listInt.
    contain 5 elements: [min_batch, channels, depth, height, width]. Must:
      input_size[0] == grad_output_tensor_size[0]
      input_size[1] == grad_output_tensor_size[1]. \n
 *@li output_size: An optional listInt. Defaults to none.
    contain 3 elements: depth, height, width. The number of elements of 'output_size' should
    be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must:
      grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0]
      grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1]
      grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n
 *@li scales: An optional listFloat. Defaults to none.
    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. 
    The number of elements of 'scales' should be the same as the rank of input 'grad_output'.
    One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n

 *@par Outputs:
 *y: A 5-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */

 REG_OP(UpsampleNearest3dGrad)
    .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .REQUIRED_ATTR(input_size, ListInt)
    .ATTR(output_size, ListInt, {})
    .ATTR(scales, ListFloat, {})
    .OP_END_FACTORY_REG(UpsampleNearest3dGrad)

 /**
 *@brief Upsample the 3-D gradient data  trilinear interpolation algorithm. \n

 *@par Inputs:
 *One inputs, including:
 *grad_output: A 5-D input tensor [N, C, D, H, W]. Must be one of the following types:
 *     float16, float32, float64. \n

 *@par Attributes:
 *@li input_size: An required listInt.
    contain 5 elements: [min_batch, channels, depth, height, width]. Must:
      input_size[0] == grad_output_tensor_size[0]
      input_size[1] == grad_output_tensor_size[1]. \n
 *@li output_size: An optional listInt. Defaults to none.
    contain 3 elements: depth, height, width. The number of elements of 'output_size' should
    be the same as the rank of input 'grad_output'. Only one of 'scales' and 'output_size' can be specified. Must:
      grad_output_tensor_size[2] == floor(input_size[2] * scales[0]) == output_size[0]
      grad_output_tensor_size[3] == floor(input_size[3] * scales[1]) == output_size[1]
      grad_output_tensor_size[4] == floor(input_size[4] * scales[2]) == output_size[2]. \n
 *@li scales: An optional listFloat. Defaults to none.
    The scale array along each dimension, contain 3 elements: scale_depth, scale_height, scale_width. 
    The number of elements of 'scales' should be the same as the rank of input 'grad_output'.
    One of 'scales' and 'output_size' MUST be specified and it is an error if both are specified. \n

 *@par Outputs:
 *y: A Tensor with shape depends on intput_size and output_size/scales. Must be one of the following
    types: float16, float32, float64. \n

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */

 REG_OP(UpsampleTrilinear3dGrad)
    .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .REQUIRED_ATTR(input_size, ListInt)
    .ATTR(output_size, ListInt, {})
    .ATTR(scales, ListFloat, {})
    .ATTR(align_corners, Bool, false)
    .OP_END_FACTORY_REG(UpsampleTrilinear3dGrad)


 /**
 *@brief Upsample the 1-D data with the nearest neighbor interpolation algorithm. \n

 *@par Inputs:
 *x: A 1-D input tensor [N, C, W]. Must be one of the following types:
 *     float16, float32, float64. \n

 *@par Attributes:
 *@li output_size: An required listInt contains output_width.
 *@li scales: An optional listFloat contains scale_width. Defaults to be zero. \n

 *@par Outputs:
 *y: A 3-D tensor. Has the same type as input x, shape depends on x and output_size/scales. \n

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
 */

 REG_OP(UpsampleNearest1d)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .REQUIRED_ATTR(output_size, ListInt)
    .ATTR(scales, ListFloat, {})
    .OP_END_FACTORY_REG(UpsampleNearest1d)

 /**
 *@brief Upsample the 1-D gradient data  with the nearest neighbor interpolation algorithm. \n

 *@par Inputs:
 *grad_output: A 3-D input tensor [N, C, W]. Must be one of the following types:
 *     float16, float32, float64. \n

 *@par Attributes:
 *@li output_size: An required listInt contains output_width.
 *@li scales: An optional listFloat contains scale_width. Defaults to be zero.
 *@li input_size: An required listInt contains output_width. \n

 *@par Outputs:
 *y: A 3-D tensor. Has the same type as input grad_output, shape depends on Attributes:input_size. \n

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. \n
 */

 REG_OP(UpsampleNearest1dGrad)
    .INPUT(grad_output, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .REQUIRED_ATTR(input_size, ListInt)
    .REQUIRED_ATTR(output_size, ListInt)
    .ATTR(scales, ListFloat, {})
    .OP_END_FACTORY_REG(UpsampleNearest1dGrad)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_
--- a/third_party/fwkacllib/inc/ops/linalg_ops.h
+++ b/third_party/fwkacllib/inc/ops/linalg_ops.h
@@ -347,6 +347,9 @@ REG_OP(SelfAdjointEig)
    .OP_END_FACTORY_REG(SelfAdjointEig)

 /**
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.

 *@brief Computes the sign and the log of the absolute value of the determinant
 of one or more square matrices . \n

@@ -382,9 +385,10 @@ REG_OP(Slogdet)
 *x:Tensor of shape [..., M, N]. Let P be the minimum of M and N . \n

 *@par Attributes:
 *compute_uv:If True then left and right singular vectors will be computed and
 *@li compute_uv:If True then left and right singular vectors will be computed and
 returned in u and v, respectively. Otherwise, only the singular values will
 be computed, which can be significantly faster . \n
 be computed, which can be significantly faster .
 *@li full_matrices:the param effect u,v.  \n

 *@par Outputs:
 *@li sigma:Singular values. Shape is [..., P]. The values are sorted in
@@ -427,6 +431,9 @@ denotes the lower triangular factor `L` with unit diagonal.
 *@li p: upper triangular part denotes the upper triangular factor `U`.Permutation
 of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n

 *@par Attributes:
 *output_idx_type: An optional DType from: int32, int64.

 *@par Third-party framework compatibility
 * Compatible with TensorFlow Lu operator.
 */
@@ -467,6 +474,12 @@ left-hand side . \n
 *@par Outputs:
 y: Tensor of shape `[..., M, K]` containing the solutions \n

 *@par Attributes:
 *partial_pivoting: Whether to perform partial pivoting. `True` by default.
 Partial pivoting makes the procedure more stable, but slower. Partial
 pivoting is unnecessary in some cases, including diagonally dominant and
 symmetric positive definite matrices

 *@par Third-party framework compatibility
 * Compatible with TensorFlow TridiagonalSolve operator.
 */
--- a/third_party/fwkacllib/inc/ops/list_ops.h
+++ b/third_party/fwkacllib/inc/ops/list_ops.h
@@ -35,10 +35,10 @@ namespace ge {
 *@li max_num_elements: The maximum number of elements. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li handle: An empty tensor list . \n
 *handle: An empty tensor list . \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow EmptyTensorList operator.
@@ -59,10 +59,10 @@ and the other elements of the given list in `input_handle`. \n
 *@li tensor: The tensor to put on the list. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li output_handle:A list with the elements of old list followed by tensor. \n
 *output_handle:A list with the elements of old list followed by tensor. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListPushBack operator.
@@ -86,7 +86,7 @@ list with all but that element. \n
 *@li element_shape: A shape compatible with that of elements in the list. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li output_handle:A list with the elements of the old list followed by tensor.
@@ -110,10 +110,10 @@ REG_OP(TensorListPopBack)
 *@brief The number of tensors in the input tensor list. \n

 *@par Inputs:
 *@li input_handle: The input list. \n
 *input_handle: The input list. \n

 *@par Outputs:
 *@li length:The number of tensors in the list. \n
 *length:The number of tensors in the list. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListLength operator.
@@ -127,13 +127,13 @@ REG_OP(TensorListLength)
 *@brief The shape of elements in the input tensor list. \n

 *@par Inputs:
 *@li input_handle: The input list. \n
 *input_handle: The input list. \n

 *@par Attributes:
 *@li shape_type: The type of shape in the list. \n
 *shape_type: The type of shape in the list. \n

 *@par Outputs:
 *@li element_shape:A shape compatible with that of elements in the list. \n
 *element_shape:A shape compatible with that of elements in the list. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListElementShape operator.
@@ -156,7 +156,7 @@ REG_OP(TensorListElementShape)
 *@li shape_type: The type of shape in the list. \n

 *@par Outputs:
 *@li handle: An output tensor list . \n
 *handle: An output tensor list . \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListReserve operator.
@@ -178,10 +178,10 @@ REG_OP(TensorListReserve)
 *@li element_shape: A shape compatible with that of elements in the list. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li item: An output tensor value of index position . \n
 *item: An output tensor value of index position . \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListGetItem operator.
@@ -206,10 +206,10 @@ REG_OP(TensorListGetItem)
 *@li item: The element to be assigned to that position. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li output_handle: An output tensor list . \n
 *output_handle: An output tensor list . \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListSetItem operator.
@@ -233,10 +233,10 @@ REG_OP(TensorListSetItem)
 *@li tensor: The tensor push into tensor list. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li output_handles: The output tensor lists. \n
 *output_handles: The output tensor lists. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListPushBackBatch operator.
@@ -263,7 +263,7 @@ REG_OP(TensorListPushBackBatch)
 *@li num_elements: The number of elements in the list. \n

 *@par Outputs:
 *@li tensor: The tensor of list. \n
 *tensor: The tensor of list. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListStack operator.
@@ -293,7 +293,7 @@ the leading dim of input_handle.element_shape or the element_shape input arg
 is not already set. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li tensor: The concated result.
@@ -324,10 +324,10 @@ REG_OP(TensorListConcatV2)
 *@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li output_handle: The list. \n
 *output_handle: The list. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListSplit operator.
@@ -351,10 +351,10 @@ REG_OP(TensorListSplit)
 *@li element_shape: The shape of elements in the list. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li output_handle: An output tensor list . \n
 *output_handle: An output tensor list . \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListFromTensor operator.
@@ -377,7 +377,7 @@ REG_OP(TensorListFromTensor)
 *@li size: size of the output list. \n

 *@par Outputs:
 *@li output_handle: The output tensor list. \n
 *output_handle: The output tensor list. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListResize operator.
@@ -397,10 +397,10 @@ REG_OP(TensorListResize)
 *@li element_shape: The shape of elements in the list. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li values: The tensor. \n
 *values: The tensor. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListGather operator.
@@ -429,10 +429,10 @@ the largest index in indices. If -1, the list is just large enough to include
 the largest index in indices. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li output_handle: The TensorList. \n
 *output_handle: The TensorList. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListScatterV2 operator.
@@ -458,10 +458,10 @@ REG_OP(TensorListScatterV2)
 *@li indices: The indices used to index into the list. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li output_handle: The TensorList. \n
 *output_handle: The TensorList. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListScatterIntoExistingList operator.
@@ -485,10 +485,10 @@ REG_OP(TensorListScatterIntoExistingList)
 *@li input_b: The input tensor list B. \n

 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *element_dtype: The type of elements in the list. \n

 *@par Outputs:
 *@li output: The output list. \n
 *output: The output list. \n

 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListConcatLists operator.
--- a/third_party/fwkacllib/inc/ops/lookup_ops.h
+++ b/third_party/fwkacllib/inc/ops/lookup_ops.h
@@ -77,8 +77,8 @@ REG_OP(LookupTableInsert)
 *handle: A Tensor of type resource. Handle to the table . \n

 *@par Attributes:
 *@li Tkeys: A DType.
 *@li Tvalues: A DType . \n
 *@li Tkeys: A DType of keys.
 *@li Tvalues: A DType of values.

 *@par Outputs:
 *@li keys: A Tensor of type Tkeys.
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -227,10 +227,10 @@ REG_OP(Bucketize)

 *@par Inputs:
 *One inputs, including:
 *   @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n
 *input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n

 *@par Outputs:
 *y: A tensor with the same type and shape of input_x \n
 *output_y: A tensor with the same type and shape of input_x \n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Trunc. \n
@@ -298,7 +298,7 @@ REG_OP(SparseSegmentMean)

 *@par Inputs:
 *The input grad must have be type float or double. Inputs include:
 *@li grad: A Tensor. Must be one of the following types: float, double.
 *@li x: A Tensor. Must be one of the following types: float, double.
 gradient propagated to the SparseSegmentMean op.
 *@li indices: A Tensor. Must be one of the following types: int32, int64.
 indices passed to the corresponding SparseSegmentMean op.
@@ -365,6 +365,7 @@ REG_OP(InitData)
 component of an element of this dataset.
 *@li output_shapes: A nested structure of TensorShape objects corresponding
 to each component of an element of this dataset.
 *@li output_num:output of nums.
 *@li channel_name: A string. Default "" . \n

 *@par Outputs:
@@ -538,11 +539,11 @@ REG_OP(NextAfter)

 *@par Inputs:
 *One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 * input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n

 *@par Attributes:
 *@li  p: An optional float.Defaults to 2. \n
 *p: An optional float.Defaults to 2. \n

 *@par Outputs:
 *y: A Tensor with the same type and shape of input_x's. \n
@@ -560,10 +561,10 @@ REG_OP(Pdist)
 *@brief Compute element-wise finiteness, return a boolean tensor.

 *@par Inputs:
 *x:A Tensor.
 *x:A Tensor of type float16, float32, double.

 *@par Outputs:
 *y:A Tensor. Has the same shape as x.
 *y:A Tensor. Returns which elements of x are finite

 *@par Third-party framework compatibility.
 *Compatible with tensorflow IsFinite operator.
@@ -577,10 +578,10 @@ REG_OP(IsFinite)
 *@brief Compute element-wise infiniteness, return a boolean tensor.

 *@par Inputs:
 *x:A Tensor.
 *x:A Tensor of type float16, float32, double.

 *@par Outputs:
 *y:A Tensor. Has the same shape as x.
 *y:A Tensor. Has the same shape as x. Returns which elements of x are isinf.

 *@par Third-party framework compatibility.
 *Compatible with tensorflow IsInf operator.
@@ -594,7 +595,11 @@ REG_OP(IsInf)
 *@brief Computes the complex absolute value of a tensor.

 *@par Inputs:
 *x:A Tensor.
 *x: x of complex numbers, this operation returns a tensor of type 
 float or double that is the absolute value of each element in x .

 * @par Attributes:
 * Tout: representing the output of type. 

 *@par Outputs:
 *y:A tensor of type `float` or `double` that is the absolute value of each element in `x`.
@@ -612,10 +617,10 @@ REG_OP(ComplexAbs)
 *@brief Returns which elements of x are NaN.

 *@par Inputs:
 *x:A Tensor.
 *x:A Tensor of type float16, float32, double.

 *@par Outputs:
 *y:A Tensor. Has the same shape as x.
 *y:A Tensor. Has the same shape as x. Returns which elements of x are isnan

 *@par Third-party framework compatibility.
 *Compatible with tensorflow IsNan operator.
@@ -629,7 +634,10 @@ REG_OP(IsNan)
 *@brief Returns the real part of a complex number.

 *@par Inputs:
 *input:A Tensor.
 *input:A Tensor. Must have numeric type.

 *@par Attributes:
 *Tout: Type of outputs. \n

 *@par Outputs:
 *output:A Tensor. Has the same shape as input.
@@ -670,7 +678,8 @@ REG_OP(Conj)
 *@li weight: A Tensor dtype of float32 . \n

 *@par Attributes:
 *reduction: An optional attribute. Defaults to "mean" . \n
 *@li reduction: An optional attribute. Defaults to "mean" .
 *@li ignore_index:An optional attribute.Defaults to -100 . \n

 *@par Outputs:
 *@li y: A Tensor dtype of float32.
@@ -700,7 +709,8 @@ REG_OP(NLLLoss)
 *@li total_weight:A Tensor dtype of float32 . \n

 *@par Attributes:
 *reduction: An optional attribute. Defaults to "mean" . \n
 *@li reduction: An optional attribute. Defaults to "mean" .
 *@li ignore_index:An optional attribute.Defaults to -100 . \n

 *@par Outputs:
 *x_grad: A Tensor. Must be the following type: float32 . \n
@@ -720,24 +730,24 @@ REG_OP(NLLLossGrad)
    .OP_END_FACTORY_REG(NLLLossGrad)

 /**
 *@brief The ifmr . \n
 *@brief IFMR(Input Feature Map Reconstruction). \n

 *@par Inputs:
 *@li data:A Tensor of feature map
 *@li data_min:A Tensor of min value of feature map.
 *@li data_max:A Tensor of max value of feature map.
 *@li cumsum:A Tensor of cumsum bin of data . \n
 *@li data: A Tensor of feature map.
 *@li data_min: A Tensor of min value of feature map.
 *@li data_max: A Tensor of max value of feature map.
 *@li cumsum: A Tensor of cumsum bin of data . \n

 *@par Attributes:
 *min_percentile: min init percentile.
 *max_percentile: max init percentile.
 *search_range: search range.
 *search_step: step size of searching.
 *with_offset: whether using offset . \n
 *@li min_percentile: min init percentile.
 *@li max_percentile: max init percentile.
 *@li search_range: search range.
 *@li search_step: step size of searching.
 *@li with_offset: whether using offset . \n

 *@par Outputs:
 *scale: optimal scale.
 *offset: optimal offset . \n
 *@li scale: optimal scale.
 *@li offset: optimal offset . \n

 *@par Third-party framework compatibility
 *Compatible with mindspore
@@ -758,16 +768,16 @@ REG_OP(IFMR)
  .OP_END_FACTORY_REG(IFMR)

 /**
 *@brief weights adaptive range quantization. \n
 *@brief Weights Adaptive Range Quantization. \n

 *@par Inputs:
 *@li w:A Tensor of weights. \n
 *@li w_min:A Tensor of weights reduce_min. \n
 *@li w_max:A Tensor of weights reduce_max. \n
 *@li w: A Tensor of weights. \n
 *@li w_min: A Tensor of weights reduce_min. \n
 *@li w_max: A Tensor of weights reduce_max. \n

 *@par Attributes:
 *num_bits: the bits num used for quantize.
 *offset_flag: whether using offset. \n
 *@li num_bits: the bits num used for quantize.
 *@li offset_flag: whether using offset. \n

 *@par Outputs:
 *y: fake quantized weights. \n
@@ -789,22 +799,22 @@ REG_OP(WtsARQ)
  .OP_END_FACTORY_REG(WtsARQ)

 /**
 *@brief The acts_ulq. \n
 *@brief Activations Universal Linear Quantization. \n

 *@par Inputs:
 *@li x:A Tensor of feature map
 *@li clamp _min:A Tensor of min clamp value of feature map.
 *@li clamp _max:A Tensor of max clamp value of feature map.
 *@li x: A Tensor of feature map.
 *@li clamp _min: A Tensor of min clamp value of feature map.
 *@li clamp _max: A Tensor of max clamp value of feature map.

 *@par Attributes:
 *fixed_min: fix min to zero.
 *num_bits: quant bits. \n
 *@li fixed_min: fix min to zero.
 *@li num_bits: quant bits. \n

 *@par Outputs:
 *y: output fake quant feature map.
 *clamp_min_mask: where x > clamp_min
 *clamp_min_mask: where x < clamp_max
 *x_clamped_loss: clamp loss. \n
 *@li y: output fake quant feature map.
 *@li clamp_min_mask: where x > clamp_min.
 *@li clamp_min_mask: where x < clamp_max.
 *@li x_clamped_loss: clamp loss. \n

 *@par Third-party framework compatibility
 *Compatible with mindspore
@@ -826,12 +836,12 @@ REG_OP(ActsULQ)
  .OP_END_FACTORY_REG(ActsULQ)

 /**
 *@brief The acts_ulq_input_grad. \n
 *@brief The gradient of Activations Universal Linear Quantization. \n

 *@par Inputs:
 *@li y_grad: A Tensor of gradient
 *@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'
 *@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'
 *@li y_grad: A Tensor of gradient.
 *@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed'.
 *@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed'.

 *@par Outputs:
 *x_grapd: The gradient of inpust. \n
@@ -851,10 +861,10 @@ REG_OP(ActsULQInputGrad)
  .OP_END_FACTORY_REG(ActsULQInputGrad)

 /**
 *@brief The act_ulq_clamp_max_grad. \n
 *@brief The gradient of Activations Universal Linear Quantization clamp max. \n

 *@par Inputs:
 *@li y_grad: A Tensor of gradient
 *@li y_grad: A Tensor of gradient.
 *@li clamp_max_mask: A Tensor of boolean mask indicating whether an additional one is needed.
 *@li x_clamped_loss: A Tensor of gradient. \n

@@ -876,10 +886,10 @@ REG_OP(ActULQClampMaxGrad)
  .OP_END_FACTORY_REG(ActULQClampMaxGrad)

 /**
 *@brief The act_ulq_clamp_min_grad. \n
 *@brief The gradient of Activations Universal Linear Quantization clamp min. \n

 *@par Inputs:
 *@li y_grad: A Tensor of gradient
 *@li y_grad: A Tensor of gradient.
 *@li clamp_min_mask: A Tensor of boolean mask indicating whether an additional one is needed.
 *@li x_clamped_loss: A Tensor of gradient. \n

@@ -904,7 +914,7 @@ REG_OP(ActULQClampMinGrad)
 * @brief Computes Lp norm.

 * @par Inputs:
 * @li x: An ND tensor of type float16, float32. \n
 * x: An ND tensor of type float16, float32. \n
 *
 * @par Attributes:
 * @li p: Int, "inf" or "-inf", default value is 2.
@@ -913,7 +923,7 @@ REG_OP(ActULQClampMinGrad)
 * @li epsilon: Float, default is 1e-12. \n

 * @par Outputs:
 * @li y: An ND tensor of type float16, float32. The shape of y is depending
 * y: An ND tensor of type float16, float32. The shape of y is depending
 * on axes and keepdim. \n

 * @par Third-party framework compatibility
@@ -932,11 +942,13 @@ REG_OP(LpNorm)
 * @brief get complex.

 * @par Inputs:
 * @li real: An ND tensor of type  float32. double
 * @li imag: An ND tensor of type  float32. double \n
 * @li real: An ND tensor of type  float32 double, representing the real part of a complex number.
 * @li imag: An ND tensor of type  float32 double, representing the imaginary part of a complex number. \n
 *
 * @par Attributes:
 * Tout: representing the output of type. 
 * @par Outputs:
 * @li out: An ND tensor of type complex64, complex128 \n
 * out: An ND tensor of type complex64, complex128 \n
 */
 REG_OP(Complex)
    .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
@@ -949,10 +961,13 @@ REG_OP(Complex)
 * @brief  deal complex.

 * @par Inputs:
 * @li input: An ND tensor of type complex64, complex128 \n
 *
 * input: An ND tensor of type complex64, complex128 \n

 * @par Attributes:
 * Tout: representing the output of type. 

 * @par Outputs:
 * @li output: An ND tensor of type float32. double \n
 * output: An ND tensor of type float32. double \n
 */
 REG_OP(Imag)
    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
@@ -988,7 +1003,7 @@ REG_OP(Angle)
 *     float16, float32. \n

 *@par Attributes:
 * @li reduction: Specifies the reduction to apply to the output:
 * reduction: Specifies the reduction to apply to the output:
 *     'none' | 'mean' | 'sum'. Default: 'mean'. \n

 *@par Outputs:
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -61,21 +61,28 @@ REG_OP(MatMul)
 *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n

 *@par Inputs:
 *Two inputs, including:
 * @li x1: A matrix Tensor. 2D. Must be one of the following types: float16,
 * float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
 * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16,
 * float32, int32. Has format [ND, NHWC, FRACTAL_NZ].
 * @li bias: A 1D Tensor. Must be one of the following types: float16,
 * float32, int32. Has format [ND, NHWC] . \n
 *Four inputs, including:
 * @li x1: A matrix Tensor. 2D. Must be one of the following types: float32,
 float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ].
 * @li x2: A matrix Tensor. 2D. Must be one of the following types: float32,
 float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ].
 * @li bias: A 1D Tensor. Must be one of the following types: float32,
 float16, int32. Has format [ND, NHWC].
 * @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8.
 Reserved. \n

 *@par Attributes:
 *@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
 *@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
 * @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to
 [M, K].
 * @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to
 [K, N].
 * @li offset_x: An optional integer for quantized MatMulV2.
 * The negative offset added to the input x1 for int8 type. Ensure offset_x
 within the effective range of int8 [-128, 127]. Defaults to "0". \n

 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
 * float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n
 *y: The result matrix Tensor. 2D. Must be one of the following types: float32,
 float16, int32. Has format [ND, NHWC, FRACTAL_NZ]. \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
@@ -95,19 +102,27 @@ REG_OP(MatMulV2)
 *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n

 *@par Inputs:
 *Two inputs, including:
 *Five inputs, including:
 * @li x1: A matrix Tensor. 2D. Must be one of the following types: int8.
 * @li x2: A matrix Tensor. 2D. Must be one of the following types: int8.
 * @li compress_index: A compress index matrix of type int8.
 * @li bias: A 1D Tensor. Must be one of the following types: int32, float16.
 * @li bias: An optional Tensor. 1D. Must be one of the following types: int32,
 float16.
 * @li offset_w: An optional matrix Tensor. 2D. Must be one of the following
 types: int8. \n

 *@par Attributes:
 *@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
 *@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
 *@li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to
 [M, K].
 *@li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to
 [K, N].
 *@li offset_x: An optional integer for quantized MatMulV2Compress.
 *The negative offset added to the input x1 for int8 type. Ensure offset_x
 within the effective range of int8 [-128, 127]. Defaults to "0". \n

 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
 * int32. \n
 *y: The result matrix Tensor. 2D. Must be one of the following types: int32,
 * float16. \n

 */
 REG_OP(MatMulV2Compress)
@@ -488,13 +503,13 @@ REG_OP(ScatterElements)

 *@par Inputs:
 * Three inputs, including:
 *@li var: An ND Tensor . \n
 *@li var: An ND Tensor .

 *Must be one of the following types: float16, float32, int32, int8, uint8
 *@li indices: An ND Tensor of type int32 or int64


 *@li updates: An Tensor. format:NCHW, NHWC . \n
 *@li updates: An Tensor. format:NCHW, NHWC .

 *Must be one of the following types: float16, float32, int32, int8, uint8

@@ -516,6 +531,61 @@ REG_OP(ScatterAdd)
    .ATTR(use_locking, Bool, false)
    .OP_END_FACTORY_REG(ScatterAdd)

 /**
 *@brief  Use a scalar to modify the tensor. \n

 *@par Inputs:
 *inputs, including:
 *@li index: An ND Tensor . \n

 *Must be one of the following types: float16, float32, int32, int8, uint8

 *@par Attributes:
 * dim : the axis along which to index .
 * value : the source element(s) to scatter . \n

 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "index" . \n

 *@par Third-party framework compatibility
 * Compatible with the Pytorch operator ScatterScalar.
 */
 REG_OP(ScatterScalar)
    .INPUT(index, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
    .REQUIRED_ATTR(dim, Int)
    .REQUIRED_ATTR(value, Float)
    .OP_END_FACTORY_REG(ScatterScalar)

 /**
 *@brief Use a tensor to modify the tensor . \n

 *@par Inputs:
 * Two inputs, including:
 *@li index: An ND Tensor . \n

 *Must be one of the following types: float16, float32, int32, int8, uint8

 *@li src: An ND Tensor . \n

 *Must be one of the following types: float16, float32, int32, int8, uint8

 *@par Attributes:
 * dim : the axis along which to index . \n

 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "index" . \n

 *@par Third-party framework compatibility
 * Compatible with the Pytorch operator ScatterTensor.
 */
 REG_OP(ScatterTensor)
    .INPUT(index, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
    .INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
    .REQUIRED_ATTR(dim, Int)
    .OP_END_FACTORY_REG(ScatterTensor)

 /**
 *@brief Divides a variable reference by sparse updates . \n

@@ -530,7 +600,7 @@ REG_OP(ScatterAdd)
 *Must be one of the following types: float16, float, int32, int8, uint8

 *@par Attributes:
 *@li use_locking: An optional bool. Defaults to "False". If "True",
 *use_locking: An optional bool. Defaults to "False". If "True",
 * the operation will be protected by a lock . \n

 *@par Outputs:
@@ -752,10 +822,12 @@ REG_OP(DiagPart)

 *@par Attributes:
 *@li num_output: Reserved.
 *@li transpose: A bool, specifying weight whether to transpose, either "true" or "false". Defaults to "false".
 *@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false".
 *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1.
 * The product of the subsequent dimensions starting form first dimension or the second dimension is "K".
 *@li offset_x: Reserved . \n
 *@li offset_x: An optional integer for quantized FullyConnection.
 *The negative offset added to the input image for int8 type. Ensure offset_x within the
 *effective range of int8 [-128, 127]. Defaults to "0". \n

 *@par Outputs:
 *y: The result tensor of type float16, int32, float32 . \n
@@ -779,27 +851,34 @@ REG_OP(FullyConnection)
    .OP_END_FACTORY_REG(FullyConnection)

 /**
 *@brief Also known as a "fully-connected-compress" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n
 *@brief Also known as a "fully-connected-compress" layer, computes an inner
 product with a set of learned weights, and (optionally) adds biases . \n

 *@par Inputs:
 * Four inputs, including:
 * Five inputs, including:
 *@li x: A Tensor of type uint8, int8.
 *@li w: A weight matrix of type int8, int8.
 *@li w: A compress index matrix of type int8, int8.
 *@li b: A Tensor of type float16, int32, int32.
 *@li offset_w: A Tensor of type int8.i
 *@li w: A weight matrix of type int8.
 *@li compress_index: A compress index matrix of type int8.
 *@li b: A Tensor of type int32.
 *@li offset_w: A Tensor of type int8.

 *@par Attributes:
 *@li num_output: Reserved.
 *@li transpose: A bool, specifying whether to transpose, either "true" or "false". Defaults to "false".
 *@li axis: Reserved.
 *@li offset_x: Reserved . \n
 *@li num_output: A int, specifying the number of outputs.
 *@li transpose: A bool, specifying whether to transpose input w, either "true"
 or "false". Defaults to "false".
 *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K"
 starts from. Defaults to "1".
 * The product of the subsequent dimensions starting form first dimension or the
 second dimension is "K".
 *@li offset_x: An optional integer for quantized FullyConnectionCompress.
 *The negative offset added to the input image for int8 type. Ensure offset_x
 within the effective range of int8 [-128, 127]. Defaults to "0". \n

 *@par Outputs:
 *y: The result tensor of type int32 . \n
 *y: The result tensor of type int32. \n

 *@par Third-party framework compatibility
 * Compatible with the Caffe operator InnerProduct . \n
 * Compatible with the Caffe operator InnerProduct. \n

 *@par Quantization supported or not
 * Yes
@@ -925,13 +1004,13 @@ REG_OP(ScatterMin)

 *@par Inputs:
 * Three inputs, including:
 *@li var: An ND Tensor . \n
 *@li var: An ND Tensor .

 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An NCHW, NHWC, or ND Tensor . \n

 *Must be one of the following types: int32 or int64
 *@li updates: An NCHW, NHWC, or ND Tensor . \n
 *@li updates: An NCHW, NHWC, or ND Tensor .

 *Must be one of the following types: float16, float, int32, int8, uint8

@@ -958,13 +1037,13 @@ REG_OP(ScatterMax)

 *@par Inputs:
 * Three inputs, including:
 *@li var: An ND Tensor . \n
 *@li var: An ND Tensor .

 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor . \n

 *Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor . \n
 *@li updates: An ND Tensor .

 *Must be one of the following types: float16, float, int32, int8, uint8

@@ -1112,15 +1191,47 @@ REG_OP(IndexAdd)
    .ATTR(axis, Int, 0)
    .OP_END_FACTORY_REG(IndexAdd)

 /**
 * @brief According to the index number of indexes, replace the value
 *corresponding to X1 with the value in x2.

 * @par Inputs:
 * Three inputs, including:
 * @li x1: A Tensor. Must be one of the following types:
 *     float16, float32, int32, int8, uint8.
 * @li x2: A Tensor of the same type as "x1".
 * @li indices: A Tensor of the indices, type should be int32.

 * @par Attributes:
 * @li accumulate: Does it support self accumulation.Defaults to 0.

 * @par Outputs:
 * @li y: A Tensor. Same as input "x1".

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator index_put.

 * @par Restrictions:
 * Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(IndexPut)
    .INPUT(x1, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
    .INPUT(x2, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
    .INPUT(indices, TensorType({DT_INT64, DT_INT32}))
    .OUTPUT(y, TensorType({DT_INT64, DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
    .ATTR(accumulate, Int, 0)
    .OP_END_FACTORY_REG(IndexPut)

 /**
 *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n

 *@par Inputs:
 * Two inputs, including:
 *@li x: A Tensor. Must be one of the following types:
 *    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
 *@li diagonal:(int, optional) – the diagonal to consider。\n
 *x: A Tensor. Must be one of the following types:
 *float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n

 *@par Attributes:
 *diagonal: An optional attribute indicates the diagonal to consider. \n

 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
@@ -1138,11 +1249,12 @@ REG_OP(Triu)
 *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n

 *@par Inputs:
 * Two inputs, including:
 *@li x: A Tensor. Must be one of the following types:
 *    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
 *@li diagonal:(int, optional) – the diagonal to consider。\n
 *x: A Tensor. Must be one of the following types:
 *float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n

 *@par Attributes:
 *diagonal: An optional attribute indicates the diagonal to consider. \n

 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
@@ -1213,6 +1325,30 @@ REG_OP(Eye)
    .ATTR(dtype, Int, 0)
    .OP_END_FACTORY_REG(Eye)

 /**
 *@brief: Fill diagonal of at least 2 dimension tensors with value . \n

 *@par Inputs:
 *x: A Tensor. Must be one of the following types:
 *    float32, int32, int64 . \n

 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n

 *@par Attributes:
 *fill_value:The value to fill in
 *wrap: An optional bool. Defaults to "False". If "True", Use recursive fill. \n

 *@par Third-party framework compatibility
 * Compatible with the Pytorch operator FillDiagonal.
 */
 REG_OP(FillDiagonal)
    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT64}))
    .REQUIRED_ATTR(fill_value, Float)
    .ATTR(wrap, Bool, false)
    .OP_END_FACTORY_REG(FillDiagonal)

 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -152,6 +152,42 @@ REG_OP(Iou)
    .ATTR(mode, String, "iou")
    .OP_END_FACTORY_REG(Iou)

 /**
 *@brief First calculate the minimum closure area of the two boxes, IoU,
 * the proportion of the closed area that does not belong to the two boxes in the closure area,
 * and finally subtract this proportion from IoU to get GIoU . \n

 *@par Inputs:
 * Two inputs, including:
 *@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
 * shape (N, 4). "N" indicates the number of bounding boxes, and the value
 * "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
 *@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
 * with shape (M, 4). "M" indicates the number of ground truth boxes, and
 * the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n

 *@par Attributes:
 *@li trans: An optional bool, true for 'xywh', false for 'xyxy'.
 *@li is_cross: An optional bool, control whether the output shape is [M, N] or [1, N]
 *@li mode: Computation mode, a character string with the value range of [iou, iof] . \n

 *@par Outputs:
 * overlap: A 2D Tensor of type float16 or float32 with shape [M, N] or [1, N],
 * specifying the IoU or IoF ratio . \n

 *@attention Constraints:
 * Only computation of float16 data is supported. To avoid overflow, the input
 * length and width are scaled by 0.2 internally.
 */
 REG_OP(GIoU)
    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(trans, Bool, false)
    .ATTR(is_cross, Bool, true)
    .ATTR(mode, String, "iou")
    .OP_END_FACTORY_REG(GIoU)

 /**
 *@brief Performs the backpropagation of ROIAlign for training scenarios . \n

@@ -417,7 +453,7 @@ REG_OP(PSROIPooling)
 *@brief Returns detection result . \n

 *@par Inputs:
 * Four inputs, including:
 * Five inputs, including:
 *@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
 *@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
 *@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
@@ -459,7 +495,7 @@ REG_OP(FSRDetectionOutput)
 *@brief Returns detection result . \n

 *@par Inputs:
 * Four inputs, including:
 * Three inputs, including:
 *@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput.
 *@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput.
 *@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput.
@@ -474,7 +510,6 @@ REG_OP(FSRDetectionOutput)
 *@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
 *@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
 *@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
 *@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output".
 *@par Outputs:
 *@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
 *@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
@@ -989,26 +1024,26 @@ REG_OP(SPP)
 * feature map . \n

 *@attention Constraints:
 *@li For the feature map input:
 (1) If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
 (2) If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
 (3) If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
 (4) If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
 (5) If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
 (6) If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
 (7) If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
 (8) If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
 (9) If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
 (10) If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
 (11) If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
 (12) If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
 (13) If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
 (14) If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
 (15) If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
 (16) If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
 (17) If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
 (18) If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
 (19) If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
 * For the feature map input:
 *@li If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
 *@li If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
 *@li If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
 *@li If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
 *@li If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
 *@li If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
 *@li If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
 *@li If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
 *@li If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
 *@li If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
 *@li If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
 *@li If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
 *@li If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
 *@li If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
 *@li If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
 *@li If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
 *@li If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
 *@li If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
 *@li If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
@@ -1222,9 +1257,7 @@ REG_OP(RpnProposalsD)
 * @li box_filter: bool, mark of box_filter. Defaults to "true"
 * @li core_max_num: int, max number of core. Defaults to "8"
 *@par Outputs:
 * @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
 * @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
 * @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
 *sorted_box: A Tensor. Must be float16. N-D with shape [N, 1].
 */
 REG_OP(RpnProposalPostProcessing)
    .INPUT(sorted_proposal, TensorType({DT_FLOAT16}))
@@ -1382,7 +1415,7 @@ REG_OP(BatchMultiClassNonMaxSuppression)
 * @li shape_hw: A 1D Tensor of type int32 . \n

 * @par Attributes:
 * @li reversed_box: An optional bool, specifying the last two dims is "4,num" or
 * reversed_box: An optional bool, specifying the last two dims is "4,num" or
 * "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n

 * @par Outputs:
@@ -1429,9 +1462,9 @@ REG_OP(NormalizeBBox)
 * @li anchors: A Tensor. Must be int32.
 *
 *@par Attributes:
 * @li scales: optional, listfloat, .
 * @li scales: optional, listfloat.
 * @li decode_clip: optional, float, threahold of decode process.
 * @li reversed_boxes: optional, bool,.
 * @li reversed_boxes: optional, bool.
 *
 *@par Outputs:
 * y: A Tensor. Must have the same type as box_predictions.
@@ -1446,16 +1479,16 @@ REG_OP(DecodeBboxV2)
    .OP_END_FACTORY_REG(DecodeBboxV2)

 /**
 *@brief Computes sort function.
 *@brief sort the input tensor and return the value of index.
 *
 *@par Inputs:
 *Inputs include:
 * x: A Tensor. Dtype support: flaot16, flaot, int16, int8,
 * x: A Tensor. Dtype support: float16, float, int16, int8,
                          uint8, int32, int64.
 *

 *@par Attributes:
 * @li axis: optional, int.
 * @li descending: optional,bool.
 * @li axis: An optional attribute indicates the sorting axis.
 * @li descending: An optional attribute indicates desending sort or not.
 *
 *@par Outputs:
 * @li y1: A Tensor. Must have the same type as x.
@@ -1568,16 +1601,18 @@ deciding when to remove boxes based on score . \n
 the last dim representing (batch_id,class_id,index_id)  . \n

 *@par Attributes:
 *center_point_box:Integer indicate the format of the box data. 
 *@li center_point_box:Integer indicate the format of the box data. 
 The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] 
 where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair 
 of box corners and the coordinates can be provided as normalized 
 (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
 1 - the box data is supplied as [x_center, y_center, width, height].
 Mostly used for Pytorch models. \n
 *@li max_boxes_size: An optional attribute integer representing the real maximum 
 *number of boxes to be selected by non max suppression . \n

 *@par Outputs:
 *@li selected_indices: A 2-D integer tensor of shape [M] representing the
 *selected_indices: A 2-D integer tensor of shape [M] representing the
 selected indices from the boxes tensor, where M <= max_output_size. \n

 *@attention Constraints:
@@ -1603,7 +1638,7 @@ REG_OP(NonMaxSuppressionV7)
 *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n

 *@par Inputs:
 * Three inputs, including:
 * Two inputs, including:
 *@li features: A 5HD Tensor list of type float32 or float16.
 *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
 * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
@@ -1760,7 +1795,7 @@ REG_OP(AnchorResponseFlags)
 * "N" indicates the number of ROIs. \n

 *@par Attributes:
 *@li performance_mode: select performance mode, "high_precision" or "high_performance".
 *performance_mode: select performance mode, "high_precision" or "high_performance".
 * select "high_precision" when input type is float32, the output tensor precision
 * will be smaller than 0.0001, select "high_performance" when input type is float32,
 * the ops will be best performance, but precision will be only smaller than 0.005.
@@ -1795,12 +1830,12 @@ REG_OP(YoloBoxesEncode)
 *@li num_gts: A Tensor. Support int32. real k. shape (1, )

 *@par Attributes:
 *@li output_dim: float. IOU threshold for positive bboxes.
 *@li group_size: float. minimum iou for a bbox to be considered as a positive bbox
 *@li spatial_scale: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.
 *@li pos_iou_thr: float. IOU threshold for positive bboxes.
 *@li min_pos_iou: float. minimum iou for a bbox to be considered as a positive bbox
 *@li gt_max_assign_all: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.

 *@par Outputs:
 *@li assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
 * assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
 */
 REG_OP(GridAssignPositive)
    .INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 }))
@@ -1816,6 +1851,40 @@ REG_OP(GridAssignPositive)
    .REQUIRED_ATTR(min_pos_iou, Float)
    .REQUIRED_ATTR(gt_max_assign_all, Bool)
    .OP_END_FACTORY_REG(GridAssignPositive)

 /**
 *@brief GIoUGrad . \n

 *@par Inputs:
 *@li dy : data of grad increment, a 1D Tensor of type float16 or float32 with
 * shape (N,).
 *@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
 * shape (4, N). "N" indicates the number of bounding boxes, and the value
 * "4" refers to [x1, y1, x2, y2] or [x, y, w, h].
 *@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
 * with shape (4, M). "M" indicates the number of ground truth boxes, and
 * the value "4" refers to [x1, y1, x2, y2] or [x, y, w, h] . \n

 *@par Attributes:
 *@li trans: An optional attr, true for 'xywh', false for 'xyxy', only support true now.
 *@li is_cross: An optional attr, if false M equals N, only support false now.
 *@li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
 *          only support 'iou' now. \n

 *@par Outputs:
 *@li dbboxes: A 2D Tensor of type float16 or float32 with shape [4, N].
 *@li dgtboxes: A 2D Tensor of type float16 or float32 with shape [4, M].
 */
 REG_OP(GIoUGrad)
    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(dbboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(dgtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(trans, Bool, false)
    .ATTR(is_cross, Bool, true)
    .ATTR(mode, String, "iou")
    .OP_END_FACTORY_REG(GIoUGrad)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -54,15 +54,16 @@ REG_OP(LogSoftmaxGrad)
 *@par Inputs:
 *Two inputs, including:
 * @li features: A Tensor. Must be one of the following types: half, float32, double.
 *    A "batch_size * num_classes" matrix.
 *A "batch_size * num_classes" matrix.
 * @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'.
 *             batch_size vector with values in [0, num_classes).
 *             This is the label for the given minibatch entry.
 *batch_size vector with values in [0, num_classes).
 *This is the label for the given minibatch entry. \n


 *@par Outputs:
 *loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
 *backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
 *@li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
 *@li backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). 
 Has the same type as "features" . \n

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator SparseSoftmaxCrossEntropyWithLogits.
@@ -84,8 +85,8 @@ REG_OP(SparseSoftmaxCrossEntropyWithLogits)
 * @li labels: A Tensor of the same type as "features". A "batch_size * num_classes" matrix . \n

 *@par Outputs:
 *loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
 *backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n
 * @li loss: A Tensor for per example loss (a "batch_size" vector). Has the same type as "features".
 * @li backprop: A Tensor for the backpropagated gradients (a batch_size * num_classes matrix). Has the same type as "features" . \n

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator SoftmaxCrossEntropyWithLogits.
@@ -127,12 +128,13 @@ REG_OP(SoftmaxGrad)
 *@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n

 *@par Inputs:
 * Two inputs, including:
 * Three inputs, including:
 *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
 *@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n
 *@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value .
 *@li dout:A multi-dimensional Tensor of float16 or float32,specifying the gradient transferred from the upper layer. \n

 *@par Outputs:
 *loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
 *gradient: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n

 *@par Third-party framework compatibility
 * Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogitsGrad.
@@ -148,13 +150,12 @@ REG_OP(SigmoidCrossEntropyWithLogitsGrad)
 *@brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios . \n

 *@par Inputs:
 * Three inputs, including:
 * Two inputs, including:
 *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
 *@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value.
 *@li dout: A multi-dimensional Tensor of float16 or float32, specifying the gradient transferred from the upper layer . \n
 *@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value. \n

 *@par Outputs:
 *gradient: Return gradient. Has the same dimensions and type as "predict" . \n
 *loss: Return loss. Has the same dimensions and type as "predict" . \n

 *@par Third-party framework compatibility
 * Compatible with the scenario where "reduction" is set to "none"of PyTorch operator SigmoidCrossEntropyWithLogits.
@@ -572,7 +573,7 @@ REG_OP(LayerNorm)

 *@par Inputs:
 *One input, including:
 * @li x: A Tensor. Must be one of the following types: float16, float32 . \n
 * x: A Tensor. Must be one of the following types: float16, float32 . \n

 *@par Attributes:
 * @li p: Specify L_p norm, the type is float. 
@@ -581,7 +582,7 @@ REG_OP(LayerNorm)

 *@par Outputs:
 *One outputs, including:
 * @li y: shape and dtype of output, should be same shape and type as input.
 * y: shape and dtype of output, should be same shape and type as input.
 */
 REG_OP(Renorm)
    .INPUT(x, TensorType::BasicType())
@@ -811,7 +812,7 @@ REG_OP(LayerNormBetaGammaBackpropV2)
 *     shape of "keep_prob" should be (1,) or [1,].
 *     Has the same type as "x" . \n

 *@par Output:
 *@par Outputs:
 *y: A mutable Tensor. Has the same type as "x".
 */
 REG_OP(DropOutDoMask)
@@ -839,7 +840,7 @@ REG_OP(DropOutDoMask)
 *     shape of "keep_prob" should be (1,) or [1,].
 *     Has the same type as "x" . \n

 *@par Output:
 *@par Outputs:
 *y: A mutable Tensor. Has the same type as "x".
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
@@ -1010,7 +1011,7 @@ REG_OP(LRNGrad)
 *@li grads: A Tensor. Has the same type as acts.

 *@par Attributes:
 *@li blank_label: An optional attribute. Defaults to 0.
 *blank_label: An optional attribute. Defaults to 0.

 *@par Third-party framework compatibility
 * Compatible with TensorFlow RNNTLoss operator.
@@ -1198,13 +1199,11 @@ REG_OP(INInferV2D)
 * @li epsilon: An attribute of type Float. \n

 * @par Outputs:
 *Three outputs, including:
 * Three outputs, including:
 * @li y: A Tensor. Has the same type as "x". \n
 * @li mean: A Tensor. Has the same type as "x". \n
 * @li variance: A Tensor. Has the same type as "x". \n

 * @par Third-party framework compatibility
 * Can be used by onnx InstanceNormalization
 */
 REG_OP(InstanceNorm)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1218,24 +1217,22 @@ REG_OP(InstanceNorm)
    .OP_END_FACTORY_REG(InstanceNorm)

 /**
 *@brief InstanceNormGrad operator interface implementation.
 * @brief InstanceNormGrad operator interface implementation.

 *@par Inputs:
 *Five inputs, including:
 * @par Inputs:
 * Five inputs, including:
 * @li dy: A Tensor. Must be one of the following types: float16, float32.
 * @li x: A Tensor. Must be one of the following types: float16, float32.
 * @li variance: A Tensor. Must be one of the following types: float16, float32.
 * @li mean: A Tensor. Must be one of the following types: float16, float32.
 * @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n

 *@par Outputs:
 *Three outputs, including:
 * @par Outputs:
 * Three outputs, including:
 * @li pd_x: A Tensor. Must be one of the following types: float16, float32.
 * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
 * @li pd_beta: A Tensor. Must be one of the following types: float16, float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(InstanceNormGrad)
    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1248,58 +1245,6 @@ REG_OP(InstanceNormGrad)
    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OP_END_FACTORY_REG(InstanceNormGrad)

 /**
 *@brief InstanceNormXBackprop operator interface implementation.

 *@par Inputs:
 *Five inputs, including:
 * @li dy: A Tensor. Must be one of the following types: float16, float32.
 * @li x: A Tensor. Must be one of the following types: float16, float32.
 * @li variance: A Tensor. Must be one of the following types: float16, float32.
 * @li mean: A Tensor. Must be one of the following types: float16, float32.
 * @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n

 *@par Outputs:
 *Two outputs, including:
 * @li pd_x: A Tensor. Must be one of the following types: float16, float32.
 * @li res_for_gamma: A Tensor. Must be one of the following types: float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(InstanceNormXBackprop)
    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(InstanceNormXBackprop)

 /**
 *@brief InstanceNormBetaGammaBackprop operator interface implementation.

 *@par Inputs:
 *Two inputs, including:
 * @li dy: A Tensor. Must be one of the following types: float16, float32.
 * @li res_for_gamma: A Tensor. Must be one of the following types: float32.\n

 *@par Outputs:
 *Two outputs, including:
 * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
 * @li pd_beta: A Tensor. Must be one of the following types: float16, float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(InstanceNormBetaGammaBackprop)
    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(res_for_gamma, TensorType({DT_FLOAT}))
    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OP_END_FACTORY_REG(InstanceNormBetaGammaBackprop)

 /**
 * @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n

@@ -1340,10 +1285,10 @@ REG_OP(KlDivLossGrad)
 * @li label: A Tensor. Has the same type as "grads". Required. \n

 * @par Attributes:
 * @li reduction: An optional attribute of type String. Defaults to "mean". \n
 * reduction: An optional attribute of type String. Defaults to "mean". \n

 * @par Outputs:
 * @li y: A Tensor. Has the same type as "x". \n
 * y: A Tensor. Has the same type as "x". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator L1LossGrad.
@@ -1368,7 +1313,7 @@ REG_OP(L1LossGrad)
 * @li reduction: An optional string.Defaults to "mean". \n

 * @par Outputs:
 * @li y: An ND tensor tensor with the same shape and type as "predict". \n
 *  y: An ND tensor tensor with the same shape and type as "predict". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator LpLoss.
@@ -1390,10 +1335,10 @@ REG_OP(LpLoss)
 * @li dout: An ND tensor of type float16, float32. \n

 * @par Attributes:
 * @li reduction: An optional string.Defaults to "mean". \n
 * reduction: An optional string.Defaults to "mean". \n

 * @par Outputs:
 * @li y: An ND tensor tensor with the same shape and type as "predict". \n
 * y: An ND tensor tensor with the same shape and type as "predict". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator MseLossGrad.
@@ -1414,10 +1359,10 @@ REG_OP(MseLossGrad)
 *  @li label: An ND Tensor of dtype float16 or float32.\n
 *
 * @par Attributes:
 *  @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n
 * reduction:An optional str from sum, none, mean, Defaults to "mean".\n
 *
 * @par Outputs:
 *  @li y: when reduction=sum/mean, y is scale. when reduction=none, y has
 * y: when reduction=sum/mean, y is scale. when reduction=none, y has
 *    same type and shape as "predict".\n
 */
 REG_OP(MseLoss)
@@ -1445,7 +1390,7 @@ REG_OP(MseLoss)
 *    Must be one of the following: "none", "mean", "sum". \n

 * @par Outputs:
 * @li gradient: A Tensor. Has the same type as "predict". \n
 *  gradient: A Tensor. Has the same type as "predict". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SmoothL1LossBackward.
@@ -1480,7 +1425,7 @@ REG_OP(SmoothL1LossGradV2)
 * the output,'sum': the output will be summed. Default: 'mean'. \n

 * @par Outputs:
 * @li loss: Indicates the loss between the predictive value and target value.
 * loss: Indicates the loss between the predictive value and target value.
 * Has the same dimensions as "predict". \n

 * @par Third-party framework compatibility
@@ -1498,12 +1443,12 @@ REG_OP(SmoothL1LossV2)
 * @brief Computes Centralization. result = x - mean(x, axes)

 * @par Inputs:
 * @li x: An ND tensor of type float16, float32.
 *  x: An ND tensor of type float16, float32.
 * @par Attributes:
 * @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
 * axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
 * Must be in the range [-rank(x), rank(x)).
 * @par Outputs:
 * @li y: A Tensor. Has the same type as "x". \n
 * y: A Tensor. Has the same type as "x". \n

 * @par Third-party framework compatibility
 * custom operator \n
@@ -1521,7 +1466,7 @@ REG_OP(Centralization)

 *@par Inputs:
 *One inputs, including:
 * @li x: A tensor . Must be one of the following types:
 * x: A tensor . Must be one of the following types:
 *     float16, float32, int32, uint32, int8, uint8. \n

 *@par Attributes:
@@ -1546,14 +1491,14 @@ REG_OP(Roll)
 logistic loss between input_x and input_y (containing 1 or -1). \n

 *@par Inputs:
 *One inputs, including:
 *Tow inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @li input_y: A tensor. Must be one of the following types:
 *     float16, float32. \n

 *@par Attributes:
 *@li lambd: An optional string.Defaults to "mean". \n
 *reduction: An optional string.Defaults to "mean". \n

 *@par Outputs:
 *output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n
@@ -1580,10 +1525,10 @@ REG_OP(SoftMarginLoss)
 * @li pos_weight: An optional ND tensor of type float16, float32. \n

 * @par Attributes:
 * @li reduction: An optional string.Defaults to "mean". \n
 * reduction: An optional string.Defaults to "mean". \n

 * @par Outputs:
 * @li gradient: An ND tensor tensor with the same shape and type as "predict". \n
 * gradient: An ND tensor tensor with the same shape and type as "predict". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad.
@@ -1603,24 +1548,14 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2)

 * @par Inputs:
 * Two inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * 
 * @par Inputs:
 * @li target: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @li input_x: A tensor. Must be one of the following types: float16, float32.
 * @li target: A tensor. Must be one of the following types: float16, float32. \n

 * @par Attributes:
 * four Attributes, including:
 * @li log_input: An optional bool. Defaults to "True" \n
 * 
 *  @par Attributes:
 * @li full: An optional bool. Defaults to "False" \n
 * 
 *  @par Attributes:
 * @li eps: An optional float. Defaults to "1e-8" \n
 * 
 *  @par Attributes:
 * @li log_input: An optional bool. Defaults to "True"
 * @li full: An optional bool. Defaults to "False"
 * @li eps: An optional float. Defaults to "1e-8"
 * @li reduction: An optional string. Defaults to "mean" \n

 * @par Outputs:
@@ -1641,14 +1576,14 @@ REG_OP(PoissonNllLoss)
 /**
 *@brief rnn_gen_mask
 * @par Inputs:
 * @li seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n
 * seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n
 *
 * @par Attributes:
 * @li num_step: A required int.\n
 * @li hidden_size: A required int. \n
 *
 * 
 * @par Output:
 * @par Ouputs:
 * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n
 *
 */
@@ -1666,18 +1601,16 @@ REG_OP(RnnGenMask)
 * @par Inputs:
 * Two inputs, including:
 * @li x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * 
 * @par Inputs:
 *     float16, float32.
 * @li target: A tensor. Must be the following types:
 *     int32. \n

 * @par Attributes:
 * @li reduction: An optional string. Defaults to "mean" \n
 * reduction: An optional string. Defaults to "mean" \n

 * @par Outputs:
 * y: A Tensor has same element type as input x. \n
 * is_target: A Tensor has same element type as input target. \n
 * @li y: A Tensor has same element type as input x. \n
 * @li is_target: A Tensor has same element type as input target. \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator MultiLabelMarginLoss. \n
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -106,16 +106,16 @@ REG_OP(FusedBatchNormV2)
    .OP_END_FACTORY_REG(FusedBatchNormV2)

 /**
 * @brief: Large amount of data sort.First operator of TopK.
 * @brief Large amount of data sort.First operator of TopK.
 * @par Inputs:
 * two input, including:
 * @li input_data: A Tensor. Data to be sorted. Support float16
 * @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data.
 * @par Attributes:
 * @li k_num: Int.Number to be sorted.
 * k_num: Int.Number to be sorted.
 * @par Outputs:
 * 1 output, including:
 * @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
 * One output, including:
 * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
 */
 REG_OP(SegmentSort)
    .INPUT(input_data, TensorType({DT_FLOAT16}))
@@ -127,13 +127,13 @@ REG_OP(SegmentSort)
 /**
 * @brief: Large amount of data sort.Second operator of TopK.
 * @par Inputs:
 * two input, including:
 * @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16
 * One input, including:
 * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
 * @par Attributes:
 * @li k_num: Int.Number to be sorted.
 * k_num: Int.Number to be sorted.
 * @par Outputs:
 * 1 output, including:
 * @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
 * One output, including:
 * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
 */
 REG_OP(MultiMerge)
    .INPUT(input_proposal, TensorType({DT_FLOAT16}))
@@ -142,14 +142,14 @@ REG_OP(MultiMerge)
    .OP_END_FACTORY_REG(MultiMerge)

 /**
 * @brief: Large amount of data sort.Third operator of TopK.
 * @brief Large amount of data sort.Third operator of TopK.
 * @par Inputs:
 * two input, including:
 * @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16
 * One input, including:
 * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
 * @par Attributes:
 * @li k_num: Int.Number to be sorted.
 * k_num: Int.Number to be sorted.
 * @par Outputs:
 * 2 output, including:
 * Two output, including:
 * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
 * @li output_index: A Tensor. int32. Data index.
 */
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -29,7 +29,7 @@ namespace ge {
 /**
 *@brief Performs pooling on the input.
 *@par Inputs:
 *@li x: An NCHW tensor of type float16, float32, int8.
 * x: An NCHW tensor of type float16, float32, int8.
 *@par Attributes:
 *@li mode: An optional int32, specifying the pooling algorithm, either "0" (max pooling) or "1" (avg pooling). Defaults to "0".
 *@li global_pooling: An optional bool. Defaults to "false".
@@ -50,6 +50,7 @@ namespace ge {
 *dilation[2]: An optional int32, specifying the left dilation. Defaults to "1".
 *dilation[3]: An optional int32, specifying the right dilation. Defaults to "1".
 *@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0".
 *@li data_format: An optional string, Specify the data format of the input and output data. With the default format "NCHW".
 *@par Outputs:
 *y: An NCHW tensor of type float16, float32, int32.
 *@attention Constraints:
@@ -204,7 +205,7 @@ REG_OP(AvgPool3D)
 *y: The average pooled output tensor . \n

 *@attention Constraints:
 *@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
 *"ksize" is in the range [1, 255]. "strides" is in the range [1, 63]

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3D.
@@ -281,10 +282,10 @@ REG_OP(AvgPool3DGrad)
 * @li data_format: A string, format of input data . \n

 * @par Outputs:
 * @output: The average pooled output tensor . \n
 * output: The average pooled output tensor . \n

 * @attention Constraints:
 * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
 * "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]

 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3DGradD.
@@ -430,6 +431,47 @@ REG_OP(MaxPool3D)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(MaxPool3D)

 /**
 * @brief Performs max pooling3d on both max values and indices.
 * 
 * @par Inputs:
 *  One input:
 *  x: An 6D tensor. Supported type: float16. Format as NDC1HWC0.
 * @par Attributes:
 *  @li ksize: A required list of int32 values,
 *   specifying the size of the window for each dimension of the input tensor.
 *   No default value.
 *  @li strides: A required list of int32 values,
 *   specifying the stride of the sliding window for each dimension of
 *   the input tensor. No default value.
 *  @li pads: A required 3*2-dimension-list of int32 values.
 *   specifying the pad of three dimension of input, implement with 0.
 *  @li dilation: dilation of kernel. default value is {1,1,1,1,1}.
 *  @li ceil_mode: default value is false.
 *  @li data_format: the format of torch input, default value is "NCDHW".
 *  @li argmax_type: the function of this field is to determine the type of
 *   output argmax, "bitmask" is the default value, the argmax will return
 *   a img2col bitmask. "index_int32" and "index_int64" represent the torch 
 *   output indices.
 * @par Outputs:
 *  y: An 6D tensor. the maxpool3d output(max value), format as NDoC1HoWoC0.
 * @par Outputs:
 *  argmax: A 5D uint16 tensor. the indice output.
 *  format as NC1HWC0, actually it represent N, Do, C1*ksize, Ho*Wo//16, 16.
 */
 REG_OP(MaxPool3DWithArgmax)
    .INPUT(x, TensorType::RealNumberType())
    .OUTPUT(y, TensorType::RealNumberType())
    .OUTPUT(argmax, TensorType::IndexNumberType())
    .REQUIRED_ATTR(ksize, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(dilation, ListInt, {1, 1, 1, 1, 1})
    .ATTR(ceil_mode, Bool, false)
    .ATTR(data_format, String, "NCDHW")
    .ATTR(argmax_type, String, "bitmask")
    .OP_END_FACTORY_REG(MaxPool3DWithArgmax)

 /**
 *@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n
 * The output is of size H x W, for any input size. 
@@ -522,8 +564,7 @@ REG_OP(MaxPool3DGradGrad)
 * y: A mutable tensor. Has the same shape and type as "x1" . \n

 * @attention Constraints:
 * @li Computing gradients of global pooling is not supported, which means
 * "ksize < x1".
 * @li ksize is limited by buffer with full tiling.
 * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]

 * @par Third-party framework compatibility
@@ -568,7 +609,7 @@ REG_OP(MaxPoolGrad)
 * @li Other dimensions of ksize and strides is 1 . \n

 * @par Outputs:
 * @li y: Has the same type and format as input "x1" . \n
 * y: Has the same type and format as input "x1" . \n

 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator MaxPoolGradGrad.
@@ -588,7 +629,7 @@ REG_OP(MaxPoolGradGrad)
 *@brief Performs max_pool_ext2 on the input . \n

 *@par Inputs:
 * Two inputs:
 * Three inputs:
 *@li x: An NC1HWC0 Tensor of type float16.
 *@li strides: A required type of int32 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
 *@li ksize: A required type of int32 values, specifying the size of the window for each dimension of the input tensor. No default value.
@@ -635,7 +676,8 @@ REG_OP(MaxPoolV2)
 *@li strides: A required list of int8, int16, int32, or int64 values,
 * specifying the stride of the sliding window for each dimension of
 * the input tensor. No default value.
 *@li padding: A required string. No default value . \n
 *@li padding: A required string. No default value .
 *@li Targmax:An optional int with default value 7 . \n

 *@par Outputs:
 *@li y: A Tensor. Has the same type and format as input "x".
@@ -645,7 +687,7 @@ REG_OP(MaxPoolV2)
 * ksize[1] * ksize[2] <= 255.
 *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
 * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
 *@li "padding" is either "SAME" or "VALID" . \n
 *@li "padding" is either "SAME" or "VALID" .

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolWithArgmax.
@@ -710,14 +752,15 @@ REG_OP(MaxPoolGradWithArgmax)
 *@brief Performs transform mask to argmax . \n

 *@par Inputs:
 * Two input:
 *x: An NC1HWC0 Tensor of type float16.
 *mask: An NC1HWC0 Tensor of type uint16 . \n
 * Two inputs:
 *@li x: An NC1HWC0 Tensor of type float16.
 *@li mask: An NC1HWC0 Tensor of type uint16 . \n

 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
 *@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
 *@li padding: A required string. No default value . \n
 *@li padding: A required string. No default value .
 *@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n

 *@par Outputs:
 *argmax: An NC1HWC0 Tensor of type int32 . \n
@@ -754,7 +797,7 @@ REG_OP(Mask2Argmax)
 * @li strides: A required list, specifying the stride of the sliding window.
 * @li padding: A required string, window sliding mode. Either SAME or VALID.
 * @par Outputs:
 * @li y:Result tensor. Supported type: float, double, int32,
 * y:Result tensor. Supported type: float, double, int32,
 * uint8, int16, int8, int64, uint16, half, uint32, uint64

 * @attention Constraints:
@@ -767,7 +810,7 @@ REG_OP(Mask2Argmax)
 * (shape_max_pool[2] * shape_max_pool[3] + 31) // 16, 16), else failed . \n

 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax.
 * Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax.
 */
 REG_OP(MaxPoolGradGradWithArgmax)
    .INPUT(x, TensorType::RealNumberType())
@@ -931,11 +974,11 @@ REG_OP(AvgPoolV2GradD)
    .OP_END_FACTORY_REG(AvgPoolV2GradD)

 /**
 *@brief :upsample the layer
 *@brief upsample the layer, similar to the nearest-neighbor difference scaling algorithm.

 *@par Inputs:
 * one input, including:
 *@li x: A tensor of type float16 or float32.
 * x: A tensor of type float16 or float32.
 *@par Attributes:
 *@li  scale: A optional float32, scale factor of x. Defaults to "1.0".
 *@li  stride_h: An optional int32, broadcast the axis of h. Defaults to "2".
@@ -1419,7 +1462,7 @@ REG_OP(MaxPoolV3)
 * the floor function will be used. Default False \n

 * @par Outputs:
 * y: A mutable tensor. Has the same shape and type as "x1" . \n
 * out_grad: A mutable tensor. Has the same shape and type as "x1" . \n

 * @attention Constraints:
 * @li Computing gradients of global pooling is not supported, which means
@@ -1447,8 +1490,8 @@ REG_OP(MaxPoolV3Grad)
 *@brief Performs Dilation2D on the input . \n

 *@par Inputs:
 *x: A tensor of shape is 4d, format is support NHWC.
 *filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n
 *@li x: A tensor of shape is 4d, format is support NHWC.
 *@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n

 *@par Attributes:
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1.
@@ -1480,9 +1523,9 @@ REG_OP(Dilation2D)
 *@brief Performs Dilation2DBackpropFilter on the input. \n

 *@par Inputs:
 *x: A tensor of shape is 4d, format is support NHWC.
 *filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
 *out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
 *@li x: A tensor of shape is 4d, format is support NHWC.
 *@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
 *@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n

 *@par Attributes
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
@@ -1519,9 +1562,9 @@ REG_OP(Dilation2DBackpropFilter)
 *@brief Performs Dilation2DBackpropInput on the input. \n

 *@par Inputs:
 *x: A tensor of shape is 4d, format is support NHWC.
 *filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
 *out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
 *@li x: A tensor of shape is 4d, format is support NHWC.
 *@li filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
 *@li out_backprop: Has the same type and format as input x and the c dimension is same with x. \n

 *@par Attributes
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -289,7 +289,8 @@ REG_OP(SparseApplyAdagradV2D)
 *     Should be from a Variable().
 *@li lr: A scalar. Has the same type as "var".
 *@li grad: A tensor for the gradient. Has the same type as "var".
 *
 *@li momentum: Momentum. Must be a scalar.

 *@par Attributes:
 *@li use_nesterov: An optional bool. Defaults to "False".
 *     If "True", the tensor passed to compute grad will be
@@ -701,7 +702,7 @@ REG_OP(ApplyPowerSignD)
 /**
 *@brief Updates "var" as FOBOS algorithm with fixed learning rate.
 *  prox_v = var - alpha * delta
 *  var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
 *  var = sign(prox_v)/(1+alpha * l2) * max{|prox_v|-alpha * l1,0}
 *
 *@attention Constraints:
 *  the input tensors must have the same shape.
@@ -2128,10 +2129,12 @@ REG_OP(FusedMulApplyMomentumExtern)
 *    otherwise the behavior is undefined, but may exhibit less contention.
 *
 *@par Outputs:
 * var: A mutable tensor. Has the same type as input "var".
 * @li var: A mutable tensor. Has the same type as input "var".
 * @li accum: A mutable tensor. Has the same type as input "accum".
 *
 *@attention Constraints:
 * The input tensors must have the same shape.
 * @li var: A mutable tensor. Has the same type as input "var".
 * @li accum: A mutable tensor. Has the same type as input "accum".
 *
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -28,8 +28,8 @@ namespace ge {
 *@brief Computes the for the gelu of "x" . \n

 *@par Inputs:
 *Two inputs, including:
 * @li x: A Tensor. Must be one of the following types: float16, float32
 *One input, including:
 *x: A Tensor. Must be one of the following types: float16, float32

 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -66,8 +66,8 @@ REG_OP(GeluGrad)
 *@brief Computes the for the fast_gelu of "x" . \n

 *@par Inputs:
 *Two inputs, including:
 * @li x: A Tensor. Must be one of the following types: float16, float32
 *One input, including:
 *x: A Tensor. Must be one of the following types: float16, float32

 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -83,7 +83,7 @@ REG_OP(FastGelu)
 *@brief Computes the gradient for the fast_gelu of "x" . \n

 *@par Inputs:
 *Three inputs, including:
 *Two inputs, including:
 * @li dy: A Tensor. Must be one of the following types: float16, float32
 * @li x: A Tensor of the same type as "dy" . \n

@@ -169,7 +169,7 @@ REG_OP(Relu)
 * x: A Tensor of type RealNumberType . \n

 * @par Outputs:
 * y: A Tensor of type RealNumberType . \n
 * y: A Tensor with the same type as x . \n

 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Relu6.
@@ -209,8 +209,12 @@ REG_OP(Relu6D)
 *     backprops = gradients * (features > 0) * (features < 6) . \n

 * @par Inputs:
 * @li features: A Tensor of type RealNumberType.
 * @li gradients: A Tensor of type RealNumberType . \n
 * @li gradients: A Tensor of type RealNumberType. The backpropagated
      gradients to the corresponding Relu6 operation. 
 * @li features: A Tensor with the same type as gradients.he features passed
      as input to the corresponding Relu6 operation, or its output;
      using either one produces the same result.  \n


 * @par Outputs:
 * backprops: A Tensor of type RealNumberType . \n
@@ -228,7 +232,7 @@ REG_OP(Relu6Grad)
 *Applies the element-wise function:
 * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
 *@par Inputs:
 *One inputs, including:
 *Two inputs, including:
 * @li grads: A tensor. Must be one of the following types:
 *     float16, float32. 
 * @li activations: A tensor. Must be one of the following types:
@@ -238,7 +242,7 @@ REG_OP(Relu6Grad)
 *y: A Tensor with the same type and shape of grads's.
 * 
 *@par Attributes:
 *@li alpha: scalar parameter, default value = 1.0
 *alpha: scalar parameter, default value = 1.0
 */	
 REG_OP(EluGradV2)
    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -539,13 +543,9 @@ REG_OP(Elu)
 *x: A float16, float32, for the input data type . \n

 *@par Attributes:
 *alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n

 *@par Attributes:
 *alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n

 *@par Attributes:
 *alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
 *@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
 *@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
 *@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n

 *@par Outputs:
 *y: A float16, float32, for the normalized result . \n
@@ -706,8 +706,8 @@ REG_OP(Mish)
 * @li x: A Tensor. Must be one of the following types: float16, float32
 * @li tanhx: A Tensor. shape, datatype and format is same as x
 * @par Outputs:
 * 1 output, including:
 * @li x_grad: A Tensor. shape, datatype and format is same as x
 * One output, including:
 * x_grad: A Tensor. shape, datatype and format is same as x
 */

 REG_OP(MishGrad)
@@ -721,20 +721,20 @@ REG_OP(MishGrad)
 * @brief pytorch hardtanh_backward operator.
 *
 * @par Inputs:
 * 2 inputs, including:
 * Two inputs, including:
 * @li result, minimum tensor of the linear region range,
 * datatype: float16/float32, format:ND/5HD.
 * @li grad, maximum tensor of the linear region range,
 * datatype:float16/float32, format:ND/5HD. \n

 * @par Attributes:
 * 2 attributes, including:
 * Two attributes, including:
 * @li min_val, minimum value of the linear region range, datatype:float.
 * @li max_val, maximum value of the linear region range, datatype:float. \n

 * @par Outputs:
 * 1 output, including:
 * @li y, hardtanh_backward output tensor, datatype and format is same as
 * One output, including:
 * y, hardtanh_backward output tensor, datatype and format is same as
 * input result. \n

 * @attention Constraints:
@@ -756,7 +756,7 @@ REG_OP(HardtanhGrad)

 * @par Inputs:
 * One inputs, including:
 * @li x: A mutable Tensor. Must be one of the following types:
 * x: A mutable Tensor. Must be one of the following types:
 *     float16, float32. \n

 * @par Attributes:
@@ -765,7 +765,7 @@ REG_OP(HardtanhGrad)
 * @li threshold: An optional float. Defaults to "20.0" \n

 * @par Outputs:
 * @li y: A mutable Tensor. Has the same type as "x" \n
 * y: A mutable Tensor. Has the same type as "x" \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Softplus.
@@ -792,7 +792,7 @@ REG_OP(SoftplusV2)
 * @li threshold: An optional float. Defaults to "20.0" \n

 * @par Outputs:
 * @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n
 * output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SoftplusGrad.
@@ -809,13 +809,16 @@ REG_OP(SoftplusV2Grad)
 * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
 *  where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
 * 
 * @par inputs
 * @par Inputs:
 * one input including:
 * @li x: input A Tensor. Must be one of the following types: float32, float16
 * x: input A Tensor. Must be one of the following types: float32, float16
 * 
 * @par output
 * @par Attributes:
 * alpha: An optional float. Defaults to 1.0. \n

 * @par Outputs:
 * one output including:
 * @li y:A Tensor of the same type as x
 * y:A Tensor of the same type as x
 * 
 */
 REG_OP(ThresholdedRelu)
@@ -829,14 +832,14 @@ REG_OP(ThresholdedRelu)

 * @par Inputs:
 * One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 * input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n

 * @par Attributes:
 * @li lambd: An optional float. Defaults to 0.5. \n
 * lambd: An optional float. Defaults to 0.5. \n

 * @par Outputs:
 * y: A Tensor with the same dtype and shape of input_x's. \n
 * output_y: A Tensor with the same dtype and shape of input_x's. \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Hardshrink. \n
@@ -863,7 +866,7 @@ REG_OP(HardShrink)
 *backprops: A Tensor with the same type and shape of features's. \n
 *
 *@par Attributes:
 *@li lambd: An optional float.Defaults to 0.5. \n
 *lambd: An optional float.Defaults to 0.5. \n
 *
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Hardshrink_backward. \n
@@ -880,7 +883,7 @@ REG_OP(HardShrink)

 * @par Inputs:
 * One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 * input_x: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n

 * @par Attributes:
@@ -905,11 +908,11 @@ REG_OP(HardSigmoid)

 * @par Inputs:
 * One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 * input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n

 * @par Attributes:
 * @li lambd: An optional float. Defaults to 0.5. \n
 * lambd: An optional float. Defaults to 0.5. \n

 * @par Outputs:
 * y: A Tensor with the same dtype and shape of input_x's. \n
@@ -933,7 +936,7 @@ REG_OP(SoftShrink)
 * @li input_x: A tensor of the same dtype as "input_grad". \n

 * @par Attributes:
 * @li lambd: An optional float. Defaults to 0.5. \n
 * lambd: An optional float. Defaults to 0.5. \n

 * @par Outputs:
 * y: A Tensor of the same dtype and shape as "input_graxd". \n
@@ -976,12 +979,12 @@ REG_OP(LogSigmoidGrad)

 *@par Inputs:
 *One inputs, including:
 * @li x: A tensor. Must be one of the following types:
 * x: A tensor. Must be one of the following types:
 *       float16, float32. \n

 *@par Outputs:
 *One outputs, including:
 * @li y: A tensor with the same type and shape of x's. \n
 * y: A tensor with the same type and shape of x's. \n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator LogSigmoid. \n
@@ -1003,7 +1006,7 @@ REG_OP(LogSigmoid)

 *@par Outputs:
 *One outputs, including:
 * @li y: A tensor with the same type and shape of x's. \n
 * y: A tensor with the same type and shape of x's. \n

 * @par Attributes:
 * @li alpha: An optional float. Defaults to 0.16666666. \n
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -33,8 +33,8 @@ namespace ge {

 *@li value: A 0D scalar. Specifies the value to fill the returned tensor.
 *    Must be one of the following types:
 *    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
 *    float16, float32, double, int32, uint8, int16, int8, complex64, int64, bool, 
 *    qint8, quint8, qint32, qint16, quint16, uint16, complex128, uint32, uint64, .
 *
 *@par Outputs:
 * y: A tensor. Has the same type as "value".
@@ -46,8 +46,14 @@ namespace ge {
 */
 REG_OP(Fill)
    .INPUT(dims, TensorType::IndexNumberType())
    .INPUT(value, TensorType::BasicType())
    .OUTPUT(y, TensorType::BasicType())
    .INPUT(value, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16,
                              DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8,
                              DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16,
                              DT_COMPLEX128, DT_FLOAT16, DT_UINT32, DT_UINT64}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16,
                              DT_INT8, DT_COMPLEX64, DT_INT64, DT_BOOL, DT_QINT8,
                              DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_UINT16,
                              DT_COMPLEX128, DT_FLOAT16, DT_UINT32, DT_UINT64}))
    .OP_END_FACTORY_REG(Fill)

 /**
@@ -213,11 +219,11 @@ REG_OP(PadV2)
 *@brief Pads a tensor . \n

 *@par Inputs:
 *x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
 *constant_values: A Tensor. Must have the same type as input.
 *@li x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
 *@li constant_values: A Tensor. Must have the same type as input.

 *@par Attributes:
 *paddings: An optional "vector<vector<int>>". Defaults to "{}".
 *paddings: A required Attribute.
 *     For each dimension D of input, paddings[D, 0] indicates how many
 *     values to add before the contents of tensor in that dimension,
 *     and paddings[D, 1] indicates how many values to add after the
@@ -461,7 +467,7 @@ REG_OP(FillV2)
 * @li dims: An required listInt to specify the shape that the value to fill.

 * @par Outputs:
 * @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
 * y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.

 * @par Third-party framework compatibility
 * Compatible with the ONNX operator ConstantOfShape.
--- a/third_party/fwkacllib/inc/ops/parsing_ops.h
+++ b/third_party/fwkacllib/inc/ops/parsing_ops.h
@@ -54,27 +54,26 @@ REG_OP(StringToNumber)
 /**
 *@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
 *@brief Parse an Example prototype. 
 *@par Input:
 *serialized: A Tensor of type string.
 *dense_defaults:  DYNAMIC INPUT Tensor type as string, float, int64. \n
 *@par Inputs:
 *@li serialized: A Tensor of type string.
 *@li dense_defaults:  DYNAMIC INPUT Tensor type as string, float, int64. \n

 *@par Attributes:
 *num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes
 *out_type: output type
 *sparse_keys: ListString
 *sparse_types: types of sparse_values
 *dense_keys: ListString
 *dense_shapes: output of dense_defaults shape
 *dense_types: output of dense_defaults type  \n
 *@li num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes
 *@li sparse_keys: ListString
 *@li sparse_types: types of sparse_values
 *@li dense_keys: ListString
 *@li Tdense: output of dense_defaults type
 *@li dense_shapes: output of dense_defaults shape  \n

 *@par Outputs:
 *sparse_indices: A Tensor of type string. 
 *sparse_values:  Has the same type as sparse_types.
 *sparse_shapes: A Tensor of type int64
 *dense_values:  Has the same type as dense_defaults.
 *@li sparse_indices: A Tensor of type string. 
 *@li sparse_values:  Has the same type as sparse_types.
 *@li sparse_shapes: A Tensor of type int64
 *@li dense_values:  Has the same type as dense_defaults.

 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 **/
 */
 REG_OP(ParseSingleExample)
    .INPUT(serialized, TensorType({DT_STRING}))
    .DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
@@ -92,16 +91,16 @@ REG_OP(ParseSingleExample)

 /**
 *@brief Decodes raw file into  tensor . \n
 *@par Input:
 *@par Inputs:
 *bytes: A Tensor of type string.

 *@par Attributes:
 *little_endian: bool ture
 *out_type: output type
 *@li little_endian: bool ture
 *@li out_type: output type

 *@par Outputs:
 *Output: A Tensor
 **/
 */
 REG_OP(DecodeRaw)
    .INPUT(bytes, TensorType({DT_STRING}))
    .OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT,
@@ -147,18 +146,20 @@ REG_OP(ParseTensor)

 *@par Inputs:
 *Inputs include:
 *records: Each string is a record/row in the csv and all records should have the
 *@li records: Each string is a record/row in the csv and all records should have the
 *same format. \n
 *record_defaults: One tensor per column of the input record, with either a
 *@li record_defaults: One tensor per column of the input record, with either a
 *scalar default value for that column or an empty vector if the column is
 *required. \n

 *@par Attributes:
 *OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n
 *field_delim: char delimiter to separate fields in a record. \n
 *use_quote_delim: If false, treats double quotation marks as regular characters
 *@li OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n
 *@li field_delim: char delimiter to separate fields in a record. \n
 *@li use_quote_delim: If false, treats double quotation marks as regular characters
 *inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n
 *na_value: Additional string to recognize as NA/NaN. \n
 *@li na_value: Additional string to recognize as NA/NaN. \n
 *@li select_cols: Optional sorted list of column indices to select. If specified,
 only this subset of columns will be parsed and returned.

 *@par Outputs:
 *output: A Tensor. Has the same type as x . \n
@@ -186,25 +187,25 @@ REG_OP(DecodeCSV)
 /**
 *@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
 *@brief Parse an Example prototype.
 *@par Input:
 *serialized: A Tensor of type string. \n
 *name:A Tensor of type string. \n
 *sparse_keys: Dynamic input tensor of string. \n
 *dense_keys: Dynamic input tensor of string \n
 *dense_defaults:  Dynamic input tensor type as string, float, int64. \n
 *@par Inputs:
 *@li serialized: A Tensor of type string. \n
 *@li name:A Tensor of type string. \n
 *@li sparse_keys: Dynamic input tensor of string. \n
 *@li dense_keys: Dynamic input tensor of string \n
 *@li dense_defaults:  Dynamic input tensor type as string, float, int64. \n

 *@par Attributes:
 *Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n
 *Ndense: Number of dense_keys \n
 *sparse_types: types of sparse_values \n
 *Tdense: Type of dense_defaults dense_defaults and dense_values \n
 *dense_shapes: output of dense_defaults shape  \n
 *@li Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n
 *@li Ndense: Number of dense_keys \n
 *@li sparse_types: types of sparse_values \n
 *@li Tdense: Type of dense_defaults dense_defaults and dense_values \n
 *@li dense_shapes: output of dense_defaults shape  \n

 *@par Outputs:
 *sparse_indices: A Tensor of type string. \n
 *sparse_values:  Has the same type as sparse_types. \n
 *sparse_shapes: A Tensor of type int64 \n
 *dense_values:  Has the same type as dense_defaults. \n
 *@li sparse_indices: A Tensor of type string. \n
 *@li sparse_values:  Has the same type as sparse_types. \n
 *@li sparse_shapes: A Tensor of type int64 \n
 *@li dense_values:  Has the same type as dense_defaults. \n
 *@par Third-party framework compatibility \n
 *@li compatible with tensorflow StringToNumber operator. \n
 */
@@ -228,37 +229,37 @@ REG_OP(ParseExample)
 /**
 *@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed
 *tensors.
 *@par Input:
 *serialized: A Tensor of type string. \n
 *feature_list_dense_missing_assumed_empty:A Tensor of type string. \n
 *context_sparse_keys: Dynamic input tensor of string. \n
 *context_dense_keys: Dynamic input tensor of string \n
 *feature_list_sparse_keys:  Dynamic input tensor of string \n
 *feature_list_dense_keys:  Dynamic input tensor of string \n
 *context_dense_defaults:  Dynamic input tensor of string, float, int64 \n
 *debug_name: A Tensor of type string. \n
 *@par Inputs:
 *@li serialized: A Tensor of type string. \n
 *@li feature_list_dense_missing_assumed_empty:A Tensor of type string. \n
 *@li context_sparse_keys: Dynamic input tensor of string. \n
 *@li context_dense_keys: Dynamic input tensor of string \n
 *@li feature_list_sparse_keys:  Dynamic input tensor of string \n
 *@li feature_list_dense_keys:  Dynamic input tensor of string \n
 *@li context_dense_defaults:  Dynamic input tensor of string, float, int64 \n
 *@li debug_name: A Tensor of type string. \n

 *@par Attributes:
 *Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n
 *Ncontext_dense: Number of context_dense_keys \n
 *Nfeature_list_sparse: Number of feature_list_sparse_keys \n
 *Nfeature_list_dense: Number of feature_list_dense_keys \n
 *context_sparse_types: Types of context_sparse_values \n
 *Tcontext_dense: Number of dense_keys \n
 *feature_list_dense_types: Types of feature_list_dense_values \n
 *context_dense_shapes: Shape of context_dense \n
 *feature_list_sparse_types: Type of feature_list_sparse_values \n
 *feature_list_dense_shapes: Shape of feature_list_dense \n
 *@li Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n
 *@li Ncontext_dense: Number of context_dense_keys \n
 *@li Nfeature_list_sparse: Number of feature_list_sparse_keys \n
 *@li Nfeature_list_dense: Number of feature_list_dense_keys \n
 *@li context_sparse_types: Types of context_sparse_values \n
 *@li Tcontext_dense: Number of dense_keys \n
 *@li feature_list_dense_types: Types of feature_list_dense_values \n
 *@li context_dense_shapes: Shape of context_dense \n
 *@li feature_list_sparse_types: Type of feature_list_sparse_values \n
 *@li feature_list_dense_shapes: Shape of feature_list_dense \n

 *@par Outputs:
 *context_sparse_indices: Dynamic output tensor of type int64. \n
 *context_sparse_values:  Dynamic output tensor of type string, float, int64. \n
 *context_sparse_shapes: Dynamic output tensor of type int64 \n
 *context_dense_values:  Dynamic output tensor of type string, float, int64. \n
 *feature_list_sparse_indices: Dynamic output tensor of type int64. \n
 *feature_list_sparse_values:  Dynamic output tensor of type string, float, int64. \n
 *feature_list_sparse_shapes: Dynamic output tensor of type int64 \n
 *feature_list_dense_values:  Dynamic output tensor of type string, float, int64. \n
 *@li context_sparse_indices: Dynamic output tensor of type int64. \n
 *@li context_sparse_values:  Dynamic output tensor of type string, float, int64. \n
 *@li context_sparse_shapes: Dynamic output tensor of type int64 \n
 *@li context_dense_values:  Dynamic output tensor of type string, float, int64. \n
 *@li feature_list_sparse_indices: Dynamic output tensor of type int64. \n
 *@li feature_list_sparse_values:  Dynamic output tensor of type string, float, int64. \n
 *@li feature_list_sparse_shapes: Dynamic output tensor of type int64 \n
 *@li feature_list_dense_values:  Dynamic output tensor of type string, float, int64. \n
 *@par Third-party framework compatibility \n
 *@li compatible with tensorflow StringToNumber operator. \n
 */
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -63,10 +63,11 @@ REG_OP(Dequantize)
 /**
 *@brief Quantizes the input . \n
 *@par Inputs:
 *x:  shape and dtype of input_x. \n
 *scales: shape and dtype of input_scales. \n
 *zero_points: shape and dtype of input_zero_points \n
 *@li x: shape and dtype of input_x. \n
 *@li scales: shape and dtype of input_scales. \n
 *@li zero_points: shape and dtype of input_zero_points \n
 *@par Attributes:
 *@li dtype: required, type. 
 *@li axis: the processed dim. \n
 *@par Outputs:
 *y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n
@@ -91,7 +92,8 @@ REG_OP(Quantize)
 *@li offset: A required float16, specifying the offset.
 *@li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". Defaults to "False".
 *@li round_mode: An optional string, specifying the float16 to int8 cast type.
 * The value range is [Round, Floor, Ceiling, Truncate]. Defaults to "Round" . \n
 * The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" .
 *@li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n

 *@par Outputs:
 *y: The quantized output tensor of type int8 and with format NC1HWC0 . \n
--- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
@@ -37,13 +37,18 @@ namespace ge {
 *deprecated name.
 *@li indices: Indices in the outermost dimension of `params` of the values that should be
 *gathered.

 *@par Attributes:
 *@li PARAMS_RAGGED_RANK:The ragged rank of the params_nested_splits.
 *@li Tsplits:A type of output_nested_splits.
 *@li OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
 *this number of `row_splits` tensors. This value should equal
 *`indices.shape.ndims + params.ragged_rank - 1` . \n

 *@par Outputs:
 *y:A Returns The `nested_row_splits` tensors that define the row-partitioning for the
 *returned RaggedTensor.The `flat_values` for the returned RaggedTensor . \n
 *@li output_nested_splits:A Returns The `nested_row_splits` tensors that define the row-partitioning for the
 *returned RaggedTensor.The `flat_values` for the returned RaggedTensor . 
 *@li output_dense_values:The `flat_values` for the returned RaggedTensor. \n

 *@par Third-party framework compatibility
 * Compatible with tensorflow RaggedGather operator.
--- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
@@ -61,7 +61,6 @@ REG_OP(RaggedTensorToSparse)
 *@brief Create a dense tensor from a ragged tensor, possibly altering its shape . \n

 *@par Inputs:
 *Six inputs, including:
 *@li shape:A `Tensor`. Must be one of the following types: `int64`, `int32`.
 *@li values:A 1D tensor representing the values of the ragged tensor.
 *@li default_value:A `Tensor`. Must have the same type as `values`.
@@ -78,7 +77,7 @@ The types of the row partition tensors. At present, these can be:
 is preceeded by "FIRST_DIM_SIZE" . \n

 *@par Outputs:
 *@li result: A `Tensor`. Has the same type as `values`.
 *result: A `Tensor`. Has the same type as `values`.
 */
 REG_OP(RaggedTensorToTensor)
    .INPUT(shape, TensorType({DT_INT32, DT_INT64}))
--- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
@@ -35,7 +35,11 @@ namespace ge {
 *@li deltas: The deltas of each range . \n

 *@par Outputs:
 *y:A Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor` . \n
 *@li rt_dense_values:The `flat_values` for the returned `RaggedTensor`.
 *@li rt_nested_splits:The `row_splits` for the returned `RaggedTensor`. \n

 *@par Attributes:
 *Tsplits:A type of rt_nested_splits.

 *@attention Constraints:
 *The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors.
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -147,6 +147,32 @@ REG_OP(RandomGamma)
    .ATTR(seed2, Int, 0)
    .OP_END_FACTORY_REG(RandomGamma)

 /**
 *@brief Returns the random permutation of integers from 0 to n-1. \n

 *@par Attributes:
 *@li n: An required int.
 *@li dtype: An optional str. Defaults to int64 .
 *@li layout: An optional int. Defaults to 0 . \n

 *@par Outputs:
 *out: A required Tensor. Must be one of the following types:
         float16, float32, float32, int8, uint8, int16, int32, int64. \n

 *@attention Constraints:
 *The implementation for Randperm on Ascend uses AICPU, with bad performance.

 *@par Third-party framework compatibility
 *@li compatible with Pytorch Randperm operator.
 */
 REG_OP(Randperm)
    .OUTPUT(out, TensorType({DT_INT64, DT_INT32, DT_INT16,
        DT_UINT8, DT_INT8, DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .REQUIRED_ATTR(n, Int)
    .ATTR(layout, Int, 0)
    .ATTR(dtype, Type, DT_INT64)
    .OP_END_FACTORY_REG(Randperm)

 /**
 *@brief Outputs random values from the Poisson distribution(s) described by rate . \n

@@ -157,11 +183,12 @@ REG_OP(RandomGamma)

 *@par Attributes:
 *@li dtype: An optional type from: half, float32, float64, int32, int64. Defaults to int64.
 *@li seed: An optional int. Defaults to 0.
 *@li seed2: An optional int. Defaults to 0 . \n
 *@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
 the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n

 *@par Outputs:
 *y: A Tensor of type dtype . \n
 *y: A Tensor of type dtype float16, float, double, int32, int64. \n

 *@attention Constraints:
 *The implementation for RandomPoisson on Ascend uses AICPU, with bad performance.
@@ -188,11 +215,13 @@ REG_OP(RandomPoisson)
 *x: A Tensor. The tensor to be shuffled . \n

 *@par Attributes:
 *@li seed: An optional int. Defaults to 0.
 *@li seed2: An optional int. Defaults to 0 . \n
 *@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
 the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n

 *@par Outputs:
 *y: A Tensor. Has the same type as x . \n
 *y: A Tensor. Has the same type as x . A Tensor of type float16, float, 
 *double, int32, int64, int16, uint16, int8, uint8, int32,int64. \n

 *@attention Constraints:
 *The implementation for RandomShuffle on Ascend uses AICPU, with bad performance.
@@ -220,11 +249,12 @@ REG_OP(RandomShuffle)

 *@par Attributes:
 *@li dtype: A type from: half, float16, float32, float64. The type of the output.
 *@li seed: An optional int. Defaults to 0.
 *@li seed2: An optional int. Defaults to 0 . \n
 *@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
 the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n

 *@par Outputs:
 *y: A Tensor of type dtype . \n
 *y: A Tensor of type float32, float16, double. \n

 *@attention Constraints:
 *The implementation for RandomStandardNormal on Ascend uses AICPU, with bad performance.
@@ -240,6 +270,28 @@ REG_OP(RandomStandardNormal)
    .ATTR(seed2, Int, 0)
    .OP_END_FACTORY_REG(RandomStandardNormal)

 /**
 *@brief Output random value from  separate normal distribution. \n

 *@par Inputs:
 *Inputs include:
 *mean: The mean is a tensor with the mean of each output element’s normal distribution . 
 *std: The std is a tensor with the standard deviation of each output element’s normal distribution. \n
 *@par Outputs:
 *y: A Tensor of type dtype . \n

 *@attention Constraints:
 *The implementation for Normal on Ascend uses AICPU, with bad performance.

 *@par Third-party framework compatibility
 *@li compatible with Pytorch Normal operator.
 */
 REG_OP(Normal)
    .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(std, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OP_END_FACTORY_REG(Normal)

 /**
 *@brief Outputs random integers from a uniform distribution . \n

@@ -250,8 +302,9 @@ REG_OP(RandomStandardNormal)
 * @li max: A Tensor. Must have the same type as minval. 0-D . \n

 *@par Attributes:
 *@li seed: An optional int. Defaults to 0.
 *@li seed2: An optional int. Defaults to 0 . \n
 *@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
 the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n

 *@par Outputs:
 *y: A Tensor. Has the same type as min . \n
@@ -280,8 +333,9 @@ REG_OP(RandomUniformInt)

 *@par Attributes:
 *@li dtype: A type from: half, float16, float32, float64. The type of the output.
 *@li seed: An optional int. Defaults to 0.
 *@li seed2: An optional int. Defaults to 0 . \n
 *@li seed: An optional int. Defaults to 0. If either seed or seed2 are set to be non-zero, 
 the random number generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n

 *@par Outputs:
 *y: A Tensor of type dtype . \n
@@ -308,11 +362,14 @@ REG_OP(RandomUniform)
 *shape: A Tensor. Must be one of the following types: int32, int64 . \n

 *@par Attributes:
 *@li seed: An optional int. Defaults to 0.
 *@li seed2: An optional int. Defaults to 0 . \n
 *@li seed: An optional int. Defaults to 0.If either `seed` or `seed2` 
 are set to be non-zero, the random number generator is seeded by the given 
 seed. Otherwise, it is seeded by a random seed.
 *@li seed2: An optional int. Defaults to 0 . A second seed to avoid seed collision. \n

 *@par Outputs:
 *size: A Tensor of types: float16, float32, double . \n
 *y: A Tensor of types: float16, float32, double . A tensor of the specified shape
 filled with random truncated normal values. \n

 *@attention Constraints:
 *The implementation for TruncatedNormal on Ascend uses AICPU, with bad performance.
@@ -505,15 +562,15 @@ REG_OP(RandomChoiceWithMask)

 *@par Inputs:
 *Inputs including:
 * @li x: A required Tensor. Must be one of the following types:
         float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
 * x: A required Tensor. Must be one of the following types:
     float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n

 *@par Attributes:
 *@li group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n
 * group: A required int32, specifying the number of groups to split the channel dimension into. Defaults to "1" . \n

 *@par Outputs:
 *y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
    float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n
 * y: A required Tensor. Has same type and shape as "x". Must be one of the following types:
     float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64 . \n

 *@attention Constraints:
 *@li "group" must be greater than 0 and must evenly divide the channel dimension size.
@@ -584,6 +641,50 @@ REG_OP(DropoutV2)
    .OUTPUT(seed, TensorType({ DT_FLOAT }))
    .REQUIRED_ATTR(p, Float)
    .OP_END_FACTORY_REG(DropoutV2)

 /**
 * @brief The Bernoulli distribution with probability . \n

 * @par Inputs:
 * @li x: A ND Tensor. Must be one of the following data types: 
         int8, uint8, int16, int32, int64, bool, float32, float64 . 
 * @li p: A ND Tensor. The probability of an element to be zeroed. 
        Must be one of the following data types: float32, float64. \n

 * @par Attributes:
 * seed: An Integer, the seed of the random generator. Default value -1 
    to use current timestamp, otherwise it should be a positive integer.

 * @par Outputs:
 * y: A tensor with the same shape and type as "x".
 */

 REG_OP(Bernoulli)
    .INPUT(x, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
    .INPUT(p, TensorType({ DT_FLOAT, DT_DOUBLE }))
    .OUTPUT(y, TensorType({ DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
    .ATTR(seed, Int, -1)
    .OP_END_FACTORY_REG(Bernoulli)

 /**
 * @brief: Fill the input tensor with values drawn from the uniform distribution U(from, to). \n
 
 * @par Inputs:
 * x: A Tensor. Must be one of the following types: float16, float, double. \n

 * @par Attributes:
 * @li from: The lower bound of the uniform. Defaults: 0.0
 * @li to: The upper bound of the uniform. Defaults: 1.0  \n

 * @par Outputs:
 * y: A Tensor has the same type as x. \n
 */
 REG_OP(Uniform)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .ATTR(from, Float, 0.0)
    .ATTR(to, Float, 1.0)
    .OP_END_FACTORY_REG(Uniform)
 }   // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -576,7 +576,7 @@ REG_OP(ReduceAll)
 *@li axis: A mutable Tensor. The dimensions to reduce . \n

 *@par Attributes:
 *@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n
 *keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False" . \n

 *@par Outputs:
 *y: A Tensor. Has the same type and format as input "x" . \n
@@ -967,9 +967,9 @@ REG_OP(EuclideanNormD)
 Defaults to "0.00001" . \n

 *@par Outputs:
 *y: A Tensor of type float16 or float32 for the normalized "x".
 *batch_mean: A Tensor of type float32 for the result mean.
 *batch_ variance: A Tensor of type float32 for the result variance . \n
 *@li y: A Tensor of type float16 or float32 for the normalized "x".
 *@li batch_mean: A Tensor of type float32 for the result mean.
 *@li batch_ variance: A Tensor of type float32 for the result variance . \n

 *@attention Constraints:
 *For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction.
@@ -987,7 +987,7 @@ REG_OP(INInferV2)
    .OP_END_FACTORY_REG(INInferV2)

 /**
 *@brief Performs reduced instance normalization . \n
 *@brief Performs reduce instance normalization. \n

 *@par Inputs:
 *x: A Tensor of type float16 or float32. \n
@@ -1008,32 +1008,31 @@ REG_OP(INTrainingReduceV2)


 /**
 *@brief Performs update instance normalization . \n
 *@brief Performs update instance normalization. \n

 *@par Inputs:
 * Seven inputs, including: (NC1HWC0supported)
 * Seven inputs, including:
 *@li x: A Tensor of type float16 or float32.
 *@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
 *@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
 *@li gamma: A Tensor of type float32, for the scaling gamma.
 *@li beta: A Tensor of type float32, for the scaling beta.
 *@li mean: A Tensor of type float32, for the updated mean.
 *@li variance: A Tensor of type float32, for the updated variance . \n
 *@li variance: A Tensor of type float32, for the updated variance. \n

 *@par Attributes:
 *@li momentum: A required float32, specifying the momentum to update mean and var.
 *@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
 *@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. \n

 *@par Outputs:
 * Three outputs
 *@li y: A Tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A Tensor of type float32, for the updated mean.
 *@li batch_variance: A Tensor of type float32, for the updated variance . \n
 *@li batch_variance: A Tensor of type float32, for the updated variance. \n

 *@attention Constraints:
 *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
 * This operator is a InstanceNorm fusion operator for updating the moving averages for training.
 * This operator is used in conjunction with INTrainingReduceV2.
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
 */
 REG_OP(INTrainingUpdateV2)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -1051,6 +1050,80 @@ REG_OP(INTrainingUpdateV2)
    .OP_END_FACTORY_REG(INTrainingUpdateV2)


 /**
 *@brief Performs the backpropagation of InstanceNorm. \n

 *@par Inputs:
 * Seven inputs, including:
 *@li dy: A Tensor of type float16 or float32.
 *@li x: A Tensor of type float16 or float32.
 *@li variance: A Tensor of type float32, for the variance of "x".
 *@li mean: A Tensor of type float32, for the mean of "x".
 *@li res_gamma: A Tensor of type float32.
 *@li res_beta: A Tensor of type float32.
 *@li gamma: A Tensor of type float32. \n

 *@par Outputs:
 *pd_x: A Tensor of type float16 or float32, for the offset of "x". \n

 *@attention Constraints:
 * The preceding layer of this operator must be INTrainingUpdateGrad. \n
 */
 REG_OP(INTrainingReduceGrad)
    .INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(variance, TensorType({DT_FLOAT}))
    .INPUT(mean, TensorType({DT_FLOAT}))
    .INPUT(res_gamma, TensorType({DT_FLOAT}))
    .INPUT(res_beta, TensorType({DT_FLOAT}))
    .INPUT(gamma, TensorType({DT_FLOAT}))
    .OUTPUT(pd_x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OP_END_FACTORY_REG(INTrainingReduceGrad)

 /**
 *@brief Performs the backpropagation of InstanceNorm. \n

 *@par Inputs:
 * Four inputs, including:
 *@li dy: A Tensor of type float16 or float32, for the gradient.
 *@li x: A Tensor of type float16 or float32.
 *@li variance: A Tensor of type float32, for the variance of "x".
 *@li mean: A Tensor of type float32, for the mean of "x". \n

 *@par Outputs:
 *@li res_gamma: A Tensor of type float32.
 *@li res_beta: A Tensor of type float32. \n

 */
 REG_OP(INTrainingUpdateGrad)
    .INPUT(dy, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(variance, TensorType({DT_FLOAT}))
    .INPUT(mean, TensorType({DT_FLOAT}))
    .OUTPUT(res_gamma, TensorType({DT_FLOAT}))
    .OUTPUT(res_beta, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(INTrainingUpdateGrad)

 /**
 *@brief Performs the backpropagation of InstanceNorm. \n

 *@par Inputs:
 * Two inputs, including:
 *@li res_gamma: A Tensor of type float32.
 *@li res_beta: A Tensor of type float32. \n

 *@par Outputs:
 *@li pd_gamma: A Tensor of type float32.
 *@li pd_beta: A Tensor of type float32. \n

 */
 REG_OP(INTrainingUpdateGradGammaBeta)
    .INPUT(res_gamma, TensorType({DT_FLOAT}))
    .INPUT(res_beta, TensorType({DT_FLOAT}))
    .OUTPUT(pd_gamma, TensorType({DT_FLOAT}))
    .OUTPUT(pd_beta, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(INTrainingUpdateGradGammaBeta)

 /**
 *@brief Performs reduced group normalization . \n

@@ -1063,7 +1136,7 @@ REG_OP(INTrainingUpdateV2)


 *@par Attributes:
 *@li num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n
 *num_groups: Int, specifying the num of groups. required, same to GNTrainingUpdate . \n

 *@attention Constraints:
 * This operator is a GroupNorm fusion operator for updating the moving averages for training.
@@ -1081,7 +1154,7 @@ REG_OP(GNTrainingReduce)
 *@brief Performs update group normalization . \n

 *@par Inputs:
 * Eight inputs, including: (NCHW NHWC supported)
 * Seven inputs, including: (NCHW NHWC supported)
 *@li x: A Tensor of type float16 or float32.
 *@li sum: A 5D Tensor of type float32,
 shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC
@@ -1145,8 +1218,8 @@ include:
 *@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1..
 *@li separator:string.

 *@par output:
 *@li output::A Tensor of type string..
 *@par Outputs:
 *output:A Tensor of type string.
 */
 REG_OP(ReduceJoin)
    .INPUT(input, TensorType({DT_STRING}))
@@ -1160,7 +1233,7 @@ REG_OP(ReduceJoin)
 * @brief Calculates the standard deviation and average value of Tensors.

 * @par Inputs:
 * @li x: A Tensor. Must be one of the following types:
 * x: A Tensor. Must be one of the following types:
 *     float16, float32. \n

 * @par Attributes:
--- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h
+++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
@@ -33,10 +33,12 @@ namespace ge {
 *y:A Tensor of type resource. \n

 *@par Attributes:
 * @li container: optional, string.
 * @li shared_name: optional, string.
 * @li dtype: required, type.
 * @li shape: optional, ListInt. \n
 * @li container: optional, string. the container this 
 variable is placed in.
 * @li shared_name: optional, string.the name by which
 this variable is referred to.
 * @li dtype: required, type. the output of type.
 * @li shape: optional, ListInt. the output of shape. \n

 *@see VarHandleOp.
 */
@@ -53,11 +55,11 @@ REG_OP(VarHandleOp)
 *@brief Assigns a new value to a variable. \n

 *@par Inputs:
 *resource:Handle to the resource in which to store the variable.
 *value:The value to set the new tensor to use. \n
 *@li resource:Handle to the resource in which to store the variable.
 *@li value:The value to set the new tensor to use. \n

 *@par Attributes:
 * @li dtype: required, type. \n
 * dtype: required, type. \n

 *@see AssignVariableOp.
 */
@@ -73,11 +75,11 @@ REG_OP(AssignVariableOp)
 *@brief Adds a value to the current value of a variable. \n

 *@par Inputs:
 *resource:Handle to the resource in which to store the variable.
 *value:The value by which the variable will be incremented. \n
 *@li resource:Handle to the resource in which to store the variable.
 *@li value:The value by which the variable will be incremented. \n

 *@par Attributes:
 * @li dtype: required, type. \n
 * dtype: required, type. \n

 *@see AssignAddVariableOp.
 */
@@ -93,11 +95,11 @@ REG_OP(AssignAddVariableOp)
 *@brief Subtracts a value to the current value of a variable. \n

 *@par Inputs:
 *resource:Handle to the resource in which to store the variable.
 *value:The value by which the variable will be incremented. \n
 *@li resource:Handle to the resource in which to store the variable.
 *@li value:The value by which the variable will be incremented. \n

 *@par Attributes:
 * @li dtype: required, type. \n
 * dtype: required, type. \n

 *@see AssignSubVariableOp.
 */
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -127,9 +127,7 @@ REG_OP(DynamicLSTM)
 *@li cell_clip:An float identifying the cell clip in the op. Default to -1.
 *@li num_proj:An integer identifying the num projection in the op. Default to 0.
 *@li time_major:An bool identifying the time major in the op. Default to false.
 *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
 *@li forget_bias:An float identifying the forget bias in the op. Default to 0.
 *@li is_training:An bool identifying is training in the op. Default to true.

 *@par Outputs:
 *eight outputs: \n
@@ -491,7 +489,6 @@ REG_OP(DynamicLSTMV2)
 *ten inputs: \n
 *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -504,10 +501,11 @@ REG_OP(DynamicLSTMV2)


 *@par Outputs:
 *eight outputs: \n
 *four outputs: \n
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ.
 */
 REG_OP(LSTMInputGrad)
    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -571,13 +569,13 @@ REG_OP(DynamicLSTMGradCell)
  .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
  .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
  .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
  .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT}))
  .INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
  .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT}))
  .OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT}))
  .OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT}))
  .ATTR(forget_bias, Float, 1)
  .ATTR(activation, String, "")
  .ATTR(direction, String, "Forward")
  .ATTR(forget_bias, Float, 1.0)
  .ATTR(activation, String, "tanh")
  .ATTR(direction, String, "UNIDIRECTIONAL")
  .ATTR(gate_order, String, "ijfo")
  .OP_END_FACTORY_REG(DynamicLSTMGradCell)

@@ -1070,7 +1068,7 @@ REG_OP(GRUV2HiddenGradCell)
 *     If "False", "grad_weight" will not be scale by word_frequency. \n

 * @par Outputs:
 * @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n
 * y: A mutable output Tensor of new word grad has the same type as "grads". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator EmbeddingDenseGrad.
@@ -1222,7 +1220,7 @@ REG_OP(CommonGRU)
 *     is equivalent to the size of indices. This matches the CSR format.. \n

 * @par Outputs:
 * @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n
 * y: A mutable output Tensor of new word grad has the same type as "grads". \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator EmbeddingBag.
--- a/third_party/fwkacllib/inc/ops/rpn_ops.h
+++ b/third_party/fwkacllib/inc/ops/rpn_ops.h
@@ -28,12 +28,12 @@ namespace ge {
 * iou_threshold with higher scoring box according to their
 * intersection-over-union (IoU) . \n

 *@par Input:
 * @li box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and
 * @par Inputs:
 * box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and
 * corresponding confidence scores . \n

 * @par Attributes:
 * @li iou_threshold: An optional float. The threshold for deciding whether boxes
 * iou_threshold: An optional float. The threshold for deciding whether boxes
 * overlap too much with respect to IOU . \n

 * @par Outputs:
--- a/third_party/fwkacllib/inc/ops/sdca_ops.h
+++ b/third_party/fwkacllib/inc/ops/sdca_ops.h
@@ -45,7 +45,13 @@ namespace ge {
 *corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input.
 *@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group.
 *@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input.
 *@li example_state_data: a list of vectors containing the example state data.
 *@li example_state_data: a list of vectors containing the example state data. \n

 *@par Attributes:
 *@li adaptive: the type is bool default false.
 *@li num_sparse_features:The num of sparse.
 *@li num_sparse_features_with_values: The num of sparse_feature_values
 *@li num_dense_features:The num of dense.
 *@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses.
 *@li l1: Symmetric l1 regularization strength.
 *@li l2: Symmetric l2 regularization strength.
@@ -53,10 +59,10 @@ namespace ge {
 *@li num_inner_iterations: Number of iterations per mini-batch . \n

 *@par Outputs:
 *y: A Returns a list of vectors containing the updated example state
 *@li out_example_state_data: A Returns a list of vectors containing the updated example state
 *data.a list of vectors where each value is the delta
 *weights associated with a sparse feature group.a list of vectors where the values are the delta
 *weights associated with a dense feature group . \n
 *@li out_delta_sparse_weights:weights associated with a sparse feature group.a list of vectors where the values are the delta
 *@li out_delta_dense_weights:weights associated with a dense feature group . \n

 *@par Third-party framework compatibility
 * Compatible with tensorflow SdcaOptimizerV2 operator.
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -258,7 +258,7 @@ REG_OP(GatherV2D)

 REG_OP(GatherElements)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
    .INPUT(index, TensorType({DT_INT64}))
    .INPUT(index, TensorType({DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
    .ATTR(dim, Int, 0)
    .OP_END_FACTORY_REG(GatherElements)
@@ -508,7 +508,7 @@ REG_OP(UnsortedSegmentSum)

 *@par Inputs:
 *One inputs, including:
 * @li assist: A tensor. Must be one of the following types:
 * assist: A tensor. Must be one of the following types:
 *     float16, float32. \n

 * @par Attributes:
@@ -970,10 +970,11 @@ REG_OP(TopKV2)
 * for matrices) . \n

 * @par Attributes:
 * @li sorted: An optional bool. Defaults to true.
 * @li sorted: Defaults to true.
 * If true, the resulting "k" elements will be sorted by the values in descending
 * order.
 * @li T: Indicator of indices type . \n
 * @li largest:If true the resulting `k` elements will be sorted by the values in descending order.
 * @li dim:0-D. Number of top elements to look for along the last dimension (along each row for matrices). \n

 * @par Outputs:
 * @li values: A Tensor, specifying the sorted data. Has the same type as
@@ -982,7 +983,7 @@ REG_OP(TopKV2)

 * @see TopK()
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator TopKV2.
 * Compatible with the TensorFlow operator TopKV2.
 */
 REG_OP(TopK)
    .INPUT(x, TensorType::RealNumberType())
@@ -1085,7 +1086,6 @@ REG_OP(InTopKD)
 * @brief Says whether the targets are in the top "k" predictions . \n

 * @par Inputs:
 * Two inputs, including:
 * @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor.
 * @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
 * @li k: A 1D Tensor of the same type as "x2".
@@ -1618,12 +1618,12 @@ REG_OP(UnsortedSegmentMinD)
 * y: A Tensor of type RealNumberType . \n

 * @attention Constraints:
 * @li segment_ids must be non-negative tensor.
 * segment_ids must be non-negative tensor.

 * @see UnsortedSegmentSum(), UnsortedSegmentProd(),

 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator UnsortedSegmentMax.
 * Compatible with the TensorFlow operator UnsortedSegmentMax.
 */
 REG_OP(UnsortedSegmentMax)
    .INPUT(x, TensorType::RealNumberType())
@@ -1875,15 +1875,15 @@ REG_OP(Crop)

 *@par Inputs:
 *One inputs, including:
 * @li x: A tensor . Must be one of the following types:
 * x: A tensor . Must be one of the following types:
 *     float16, float32, int32, uint32, int8, uint8. \n

 *@par Attributes:
 * @li axis: Axis along which to cummin. \n
 * axis: Axis along which to cummin. \n

 *@par Outputs:
 * y: A Tensor with the same type and shape of x's. \n
 * indices: A Tensor with the int32 type and the same shape of x's. \n
 * @li y: A Tensor with the same type and shape of x's.
 * @li indices: A Tensor with the int32 type and the same shape of x's. \n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Cummin. \n
@@ -1968,17 +1968,14 @@ REG_OP(WriteSelect)
    .OP_END_FACTORY_REG(WriteSelect)

 /**
 *@brief Read data by stride . \n
 *@brief Read data by stride.

 *@par Inputs:
 *One input:
 *x: A Tensor. Must be one of the following types: float16, int8 . \n
 *x: A Tensor. Must be one of the following types: float16, int8. \n

 *@par Attributes:
 *@li axis: A required int32, specifying the index of axis to read by stride . \n

 *@par Attributes:
 *@li stride: A required int32, specifying the value of reading stride . \n
 *@li axis: A required int32, specifying the index of axis to read by stride. \n
 *@li stride: A required int32, specifying the value of reading stride. \n

 *@par Outputs:
 *y: A Tensor of the same type as "x".
@@ -1991,16 +1988,14 @@ REG_OP(StridedRead)
    .OP_END_FACTORY_REG(StridedRead)

 /**
 *@brief: Write data by stride . \n
 *@brief Write data by stride.

 *@par Inputs:
 *x: A Tensor. Must be one of the following types: float16, int8 . \n

 *@par Attributes:
 *@li axis: A required int32, specifying the index of axis to write by stride . \n
 *x: A Tensor. Must be one of the following types: float16, int8. \n

 *@par Attributes:
 *@li stride: A required int32, specifying the value of writing stride . \n
 *@li axis: A required int32, specifying the index of axis to write by stride. \n
 *@li stride: A required int32, specifying the value of writing stride. \n

 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -2076,10 +2071,10 @@ REG_OP(CumulativeLogsumexpD)
 * @li updates: A Tensor of the same type as "var". \n

 * @par Attributes:
 * @li axis: An required int to specify the axis to perform indices add. \n
 * axis: An required int to specify the axis to perform indices add. \n

 * @par Outputs:
 * @li var: A Tensor. Same as input "var".
 * var: A Tensor. Same as input "var".

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator index_add_.
@@ -2104,7 +2099,7 @@ REG_OP(InplaceIndexAdd)
 *  @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8.

 * @par Outputs:
 *  @li y: A tensor. Must be one of the following dtypes:
 *  y: A tensor. Must be one of the following dtypes:
 *   float16, float32, int64, int32, int8.
 */
 REG_OP(MaskedFill)
@@ -2123,7 +2118,7 @@ REG_OP(MaskedFill)
 *  @li mask: A Tensor of dtype is bool. \n

 * @par Outputs:
 *  @li y: A tensor with the same type as x. \n
 * y: A tensor with the same type as x. \n

 * @par Third-party framework compatibility
 * Compatible with the Numpy operator select.
@@ -2134,13 +2129,50 @@ REG_OP(MaskedSelectV2)
    .INPUT(mask, TensorType({DT_BOOL}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OP_END_FACTORY_REG(MaskedSelectV2)
    
 /**
 * @brief Choose the value of X with value according to mask.

 * @par Inputs:
 * two inputs, including:
 *  @li x: A Tensor of dtype is float16 or float32 or float64 or int64 or int32 or int16 or int8 or uint8.
 *  @li mask: A Tensor of dtype is bool. \n

 * @par Outputs:
 *  @li y: A tensor with the same type as x. \n

 */
 REG_OP(MaskedSelect)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8,  DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
    .INPUT(mask, TensorType({DT_BOOL}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
    .OP_END_FACTORY_REG(MaskedSelect)

 /**
 * @brief update the value of X with value according to mask.

 * @par Inputs:
 * three inputs, including:
 *  @li x: A Tensor of dtype is float16 or float32 or float64 or int64 or int32 or int16 or int8 or uint8.
 *  @li mask: A Tensor of dtype is bool.
 *  @li updates: A tensor with the same type as x. \n

 * @par Outputs:
 *  @li y: A tensor with the same type as x. \n
 */
 REG_OP(MaskedScatter)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8,  DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
    .INPUT(mask, TensorType({DT_BOOL}))
    .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8,  DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
    .OP_END_FACTORY_REG(MaskedScatter)

 /**
 * @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n

 * @par Inputs:
 * One inputs, including:
 * @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32.
 * x: A Tensor. Must be one of the following types: float16, float32, int16, int32.

 * @par Attributes:
 * @li start: An  attribute of type Int, start index of last dim. \n
@@ -2148,7 +2180,7 @@ REG_OP(MaskedSelectV2)
 * @li stride: An  attribute of type Int, stride of slice. \n

 * @par Outputs:
 * @li y: A Tensor. Has the same type as "x". \n
 * y: A Tensor. Has the same type as "x". \n

 * @par Third-party framework compatibility
 * No compatibility
@@ -2162,39 +2194,36 @@ REG_OP(SliceLastDim)
    .OP_END_FACTORY_REG(SliceLastDim)

 /**
 * @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n
 *   extracts a slice of size (end-begin)/stride from the given input tensor. \n
 *   Starting at the location specified by begin the slice continues by \n
 * @brief Extracts a strided slice of a tensor. Roughly speaking, this op 
 *   extracts a slice of size (end-begin)/stride from the given input tensor. 
 *   Starting at the location specified by begin the slice continues by 
 *   adding stride to the index until all dimensions are not less than end. \n
 *
 * @par Inputs:
 * Four inputs, including:
 * @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n
 *     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n
 *     complex128, float16, uint32, uint64, complex64, complex128. \n
 * Five inputs, including:
 * @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, 
 *     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
 *     complex128, float16, uint32, uint64, complex64, complex128. 
 * @li begin: A Tensor of type int32 or int64, for the index of the first value to select.
 *
 * @li end: A Tensor of type int32 or int64, for the index of the last value to select.
 *
 * @li axes: A Tensor of type int32 or int64, indicate axis to be select.
 *
 * @li strides: A Tensor of type int32 or int64, for the increment.
 * @li strides: A Tensor of type int32 or int64, for the increment. \n
 *
 * @par Attributes:
 * @li begin_mask: A Tensor of type int32. \n
 *     A bitmask where a bit "i" being "1" means to ignore the begin \n
 * @li begin_mask: A Tensor of type int32.
 *     A bitmask where a bit "i" being "1" means to ignore the begin 
 *     value and instead use the largest interval possible.
 * @li end_mask: A Tensor of type int32. \n
 * @li end_mask: A Tensor of type int32. 
 *     Analogous to "begin_mask".
 * @li ellipsis_mask: A Tensor of type int32. \n
 *     A bitmask where bit "i" being "1" means the "i"th position \n
 * @li ellipsis_mask: A Tensor of type int32. 
 *     A bitmask where bit "i" being "1" means the "i"th position
 *     is actually an ellipsis.
 * @li new_axis_mask: A Tensor of type int32. \n
 *     A bitmask where bit "i" being "1" means the "i"th \n
 * @li new_axis_mask: A Tensor of type int32.
 *     A bitmask where bit "i" being "1" means the "i"th 
 *     specification creates a new shape 1 dimension.
 * @li shrink_axis_mask: A Tensor of type int32. \n
 *     A bitmask where bit "i" implies that the "i"th \n
 *     specification should shrink the dimensionality.
 * @li shrink_axis_mask: A Tensor of type int32. 
 *     A bitmask where bit "i" implies that the "i"th
 *     specification should shrink the dimensionality. \n
 *
 * @par Outputs:
 * y: A Tensor. Has the same type as "x".
@@ -2231,7 +2260,7 @@ REG_OP(StridedSliceV2)
 *     float16, float32, int32. \n

 * @par Attributes:
 * @li dim: A required int. Used to select the dimension of this tensor. \n
 * dim: A required int. Used to select the dimension of this tensor. \n

 *@par Outputs:
 *y: A Tensor with the same type and shape of input_x's. \n
@@ -2307,6 +2336,34 @@ REG_OP(MaskedFillRange)
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
    .REQUIRED_ATTR(axis, Int)
    .OP_END_FACTORY_REG(MaskedFillRange)

 /**
 * @brief After a set of sorted data and a new set of data are re-sorted, get the first k data. \n
 *
 * @par Inputs:
 * Six inputs, including:
 * @li topk_pq_distance: A sorted Tensor, Will be updated after calculation. Must be one of the following types: float32, float16. 
 * @li topk_pq_index: A Tensor of type int32, index corresponding to topk_pq_distance.
 * @li topk_pq_ivf: A Tensor of type int32 , the bucket number corresponding to topk_pq_distance.
 * @li pq_distance: A Tensor of type float32 or float16, the new data set will be reordered with topk_pq_distance and updated to topk_pq_distance.
 * @li pq_index: A Tensor of type int32, index corresponding to pq_distance. 
 * @li pq_ivf: A scalar of type int32 , the bucket number corresponding to pq_distance. \n
 *
 * @par Attributes:
 * @li order: A string, indicates the sorting method of topk_pq_distance. \n
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(InplaceTopKDistance)
    .INPUT(topk_pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(topk_pq_index, TensorType({DT_INT32}))
    .INPUT(topk_pq_ivf, TensorType({DT_INT32}))
    .INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(pq_index, TensorType({DT_INT32}))
    .INPUT(pq_ivf, TensorType({DT_INT32}))
    .ATTR(order, String, "asc")
    .OP_END_FACTORY_REG(InplaceTopKDistance)
 } // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/sparse_ops.h
+++ b/third_party/fwkacllib/inc/ops/sparse_ops.h
@@ -281,9 +281,9 @@ REG_OP(SparseSliceGrad)
 * @li size: A 1D Tensor of type int64. The size of the slice . \n

 *@par Outputs:
 *y_indices: A Tensor of type int64.
 *y_values: A Tensor. Has the same type as "values".
 *y_values: A Tensor of type int64 . \n
 *@li y_indices: A Tensor of type int64.
 *@li y_values: A Tensor. Has the same type as "values".
 *@li y_shape: A Tensor of type int64 . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseSlice.
@@ -313,8 +313,8 @@ REG_OP(SparseSlice)
 * @li sum_indices: A 2D Tensor of type int64. The indices of the sum SparseTensor, with size [nnz(sum), ndims] . \n

 *@par Outputs:
 *x1_val_grad: A Tensor. Has the same type as "backprop_val_grad".
 *x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n
 *@li x1_val_grad: A Tensor. Has the same type as "backprop_val_grad".
 *@li x2_val_grad: A Tensor. Has the same type as "backprop_val_grad" . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseAddGrad.
@@ -363,7 +363,7 @@ REG_OP(SparseFillEmptyRowsGrad)

 *@par Inputs:
 * @li x1_indices: A 2D Tensor of type int32 or int64.
 * @li The indices of the matrix "SparseTensor", with size [nnz, 2].
 *The indices of the matrix "SparseTensor", with size [nnz, 2].
 * @li x1_values: A 1D Tensor. The values of the SparseTensor, with size [nnz].
 * @li x1_shape: A 1D Tensor of type int64. The shape of the SparseTensor, with size [2].
 * @li x2: A dense matrix Tensor of the same type as "x1_values". 2D . \n
@@ -373,9 +373,9 @@ REG_OP(SparseFillEmptyRowsGrad)

 *@par Attributes:
 *@li adjoint_a: An optional bool. Defaults to "False".Use the adjoint of A in the matrix multiply.
 *@li If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A).
 *If A is complex, this is transpose(conj(A)). Otherwise it is transpose(A).
 *@li adjoint_b: An optional bool. Defaults to "False".Use the adjoint of B in the matrix multiply.
 *@li If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n
 *If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B) . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseTensorDenseMatMul.
@@ -400,9 +400,13 @@ REG_OP(SparseTensorDenseMatMul)
 * @li indices: A 0D, 1D, or 2D Tensor of type int32 or int64.
 * @li output_shape: A 1D Tensor of the same type as "sparse_indices". The shape of the dense output tensor.
 * @li values: A 1D Tensor. Values corresponding to each row of "sparse_indices",
 * @li or a scalar value to be used for all sparse indices.
 or a scalar value to be used for all sparse indices.
 * @li default_value: A Tensor of the same type as "sparse_values" . \n

 *@par Attributes:
 *validate_indices: If true, indices are checked to make sure they are sorted in
 lexicographic order and that there are no repeats. \n

 *@par Outputs:
 *y: A Tensor. Has the same type as "values" . \n

@@ -427,7 +431,6 @@ REG_OP(SparseToDense)
 *Concatenation is with respect to the dense versions of these sparse tensors . \n

 *@par Inputs:
 *3 or 5 inputs,contains:
 * @li indices:A list of at least 2 `Tensor` objects with type `int64`.2-D.
 *Indices of each input `SparseTensor`.It's a dynamic input.
 * @li values:A list with the same length as `indices` of `Tensor` objects with the same type.
@@ -700,7 +703,6 @@ REG_OP(SparseReduceMaxSparse)
 *@brief Computes the sum of elements across dimensions of a SparseTensor . \n

 *@par Inputs:
 *4 or 5 inputs, including:
 * @li x_indices: A 2D Tensor of type int64.
 *"N x R" matrix with the indices of non-empty values in a
 *SparseTensor, possibly not in canonical ordering.
@@ -711,13 +713,11 @@ REG_OP(SparseReduceMaxSparse)
 *A length-"K" vector containing the reduction axes . \n

 *@par Attributes:
 * keep_dims: An optional bool. Defaults to "False".
 *keep_dims: An optional bool. Defaults to "False".
 *If true, retains reduced dimensions with length 1 . \n

 *@par Outputs:
 * @li y_indices: A Tensor of type int64.
 * @li y_values: A Tensor. Has the same type as "input_values".
 * @li y_shape: A Tensor of type int64 . \n
 *y: A Tensor. Has the same type as "x_values". \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SparseReduceSum.
@@ -818,7 +818,6 @@ REG_OP(SparseSplit)
 *@brief Generates sparse cross from a list of sparse and dense tensors . \n

 *@par Inputs:
 *8 or 10 inputs, including:
 * @li indices: A list of 2D Tensor objects of type int64.
 * Indices of each input SparseTensor.It's a dynamic input.
 * @li values: A list of 1D Tensor objects of type int64 or string.
@@ -899,9 +898,8 @@ REG_OP(AddManySparseToTensorsMap)
 *@brief Reads SparseTensors from a "SparseTensorsMap" and concatenate them . \n

 *@par Inputs:
 *2 or 4 inputs, including:
 * handles: A 1D Tensor of type int64.
 * The "N" serialized SparseTensor objects . \n
 *The "N" serialized SparseTensor objects . \n

 *@par Attributes:
 * @li dtype: A tf.DType. The "dtype" of the SparseTensor objects stored in the "SparseTensorsMap".
@@ -911,9 +909,9 @@ REG_OP(AddManySparseToTensorsMap)
 *The shared name for the "SparseTensorsMap" read by this op . \n

 *@par Outputs:
 * @li indices: A Tensor of type int64.
 * @li values: A Tensor of type "dtype".
 * @li shape: A Tensor of type int64 . \n
 * @li indices: A Tensor of type int64.2-D. The `indices` of the minibatch `SparseTensor`.
 * @li values: A Tensor of type "dtype". 1-D. The `values` of the minibatch `SparseTensor`.
 * @li shape: A Tensor of type int64 . 1-D. The `shape` of the minibatch `SparseTensor`. \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TakeManySparseFromTensorsMap.
@@ -989,8 +987,7 @@ REG_OP(SerializeManySparse)
 *@brief Deserializes SparseTensor objects . \n

 *@par Inputs:
 *Two inputs, including:
 * serialized_sparse: A Tensor. The serialized SparseTensor objects.
 *serialized_sparse: A Tensor. The serialized SparseTensor objects.
 *The last dimension must have 3 columns . \n

 *@par Attributes:
--- a/third_party/fwkacllib/inc/ops/spectral_ops.h
+++ b/third_party/fwkacllib/inc/ops/spectral_ops.h
@@ -31,10 +31,10 @@ namespace ge {
 inner-most dimension of `x`. \n

 *@par Inputs:
 *@li x: A Tensor. Must be the following types: complex64, complex128. \n
 *x: A Tensor. Must be the following types: complex64, complex128. \n

 *@par Outputs:
 *@li y: A complex tensor of the same rank as `x`. \n
 *y: A complex tensor of the same rank as `x`. \n

 *@par Third-party framework compatibility
 * Compatible with TensorFlow IFFT operator.
@@ -52,7 +52,7 @@ REG_OP(IFFT)
 *@li fft_length: An int32 tensor of shape [1]. The FFT length . \n

 *@par Outputs:
 *@li y: A complex64 tensor of the same rank as `input`. The inner-most
 *y: A complex64 tensor of the same rank as `input`. The inner-most
 dimension of `input` is replaced with the `fft_length / 2 + 1` unique
 frequency components of its 1D Fourier transform . \n

@@ -73,7 +73,7 @@ REG_OP(RFFT)
 *@li fft_length: An int32 tensor of shape [1]. The FFT length. \n

 *@par Outputs:
 *@li y: A float32 tensor of the same rank as `input`. The inner-most
 * y: A float32 tensor of the same rank as `input`. The inner-most
  dimension of `input` is replaced with the `fft_length` samples of its inverse
  1D Fourier transform. \n

@@ -91,10 +91,10 @@ REG_OP(IRFFT)
 *@brief 2D fast Fourier transform. \n

 *@par Inputs:
 *@li x: A complex64 tensor.
 *x: A complex64 tensor.

 *@par Outputs:
 *@li y: A complex64 tensor of the same shape as `input`. The inner-most 2
 *y: A complex64 tensor of the same shape as `input`. The inner-most 2
  dimensions of `input` are replaced with their 2D Fourier transform. \n

 *@par Third-party framework compatibility
@@ -110,10 +110,10 @@ REG_OP(FFT2D)
 innermost dimension of the input. \n

 *@par Inputs:
 *@li x: A Tensor. Must be the following types: complex64, complex128. \n
 *x: A Tensor. Must be the following types: complex64, complex128. \n

 *@par Outputs:
 *@li y: A complex tensor with the same shape as input. The innermost dimension
 *y: A complex tensor with the same shape as input. The innermost dimension
 of the input is replaced by its 1-dimensional Fourier transform. \n

 *@par Third-party framework compatibility
@@ -129,10 +129,10 @@ REG_OP(FFT)
 innermost dimension of the input. \n

 *@par Inputs:
 *@li x: A Tensor. Must be the following types: complex64, complex128. \n
 *x: A Tensor. Must be the following types: complex64, complex128. \n

 *@par Outputs:
 *@li y: A complex tensor with the same shape as input. The innermost dimension
 *y: A complex tensor with the same shape as input. The innermost dimension
 of the input is replaced by its inverse two-dimensional Fourier transform. \n

 *@par Third-party framework compatibility
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -161,14 +161,11 @@ REG_OP(SplitVD)
 /**
 *@brief Concatenates a list of N tensors along the first dimension.
 *@par Inputs:
 * Two inputs, including:
 * @li values: A list of Tensors. Must be one of the following types: int8, int16, int32,
 * One input, including:
 * values: A list of Tensors. Must be one of the following types: int8, int16, int32,
 *     int64, uint8, uint16, uint32, uint64, float16, float32.
 *     Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
 *     It's a dynamic input.
 * @li shape: A Tensor of the same type as "x".
 * The final shape of the result. Should be equal to the shapes of any input
 * but with the number of input values in the first dimension . \n
 *     It's a dynamic input. \n

 *@par Attributes:
 * @li shape: A required list of ints.
--- a/third_party/fwkacllib/inc/ops/state_ops.h
+++ b/third_party/fwkacllib/inc/ops/state_ops.h
@@ -104,7 +104,7 @@ REG_OP(DestroyTemporaryVariable)
 *@brief Checks whether a tensor has been initialized. Outputs boolean scalar indicating whether the tensor has been initialized . \n

 *@par Inputs:
 *x: A tensor . \n
 *x: A Tensor of type float16, float32, double, bool, int8, uint8, uint16, int16, int32, uint32, uint64, int64. 

 *@par Outputs:
 *y: A tensor, indicating whether "x" has been initialized . \n
--- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
@@ -32,7 +32,10 @@ namespace ge {
 *@par Inputs:
 *This op may use some OS-provided source of non-determinism (e.g. an RNG),
 *so each execution will give different results. Inputs included:
 *@li shape: The shape of the output tensor . \n
 *shape: The shape of the output tensor . \n

 *@par Attributes:
 *dtype: required, type. \n

 *@par Outputs:
 *y:A Returns Non-deterministic integer values with specified shape . \n
@@ -54,13 +57,10 @@ REG_OP(NonDeterministicInts)
 *counter is an unspecified implementation detail . \n

 *@par Inputs:
 *@li resource: The handle of the resource variable that stores the state of the RNG.
 *@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li delta: The amount of advancement . \n

 *@par Outputs:
 *y:A Returns the created operation . \n

 *@par Third-party framework compatibility
 * Compatible with tensorflow RngSkip operator.
 */
@@ -81,11 +81,16 @@ power of two.  The bias is small for values of `maxval - minval` significantly
 smaller than the range of the output (either `2^32` or `2^64`) . \n

 *@par Inputs:
 *@li resource: The handle of the resource variable that stores the state of the RNG.
 *@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor.
 *@li minval: Minimum value (inclusive, scalar).
 *@li maxval: Maximum value (exclusive, scalar) . \n
 *@li counts: A 0/1-D Tensor or Python value. The counts of the binomial
 distribution.  Must be broadcastable with the leftmost dimension defined by `shape`.
 *@li probs: A 0/1-D Tensor or Python value. The probability of success for the
 binomial distribution.  Must be broadcastable with the leftmost dimension defined by `shape`.\n

 *@par Attributes:
 *dtype: required, type. \n

 *@par Outputs:
 *y:A Returns Random values with specified shape . \n
@@ -109,7 +114,7 @@ REG_OP(StatefulRandomBinomial)
 *The generated values will have mean 0 and standard deviation 1 . \n

 *@par Inputs:
 *@li resource: The handle of the resource variable that stores the state of the RNG.
 *@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n

@@ -134,7 +139,7 @@ REG_OP(StatefulStandardNormalV2)
 *deviations from the mean are dropped and re-picked . \n

 *@par Inputs:
 *@li resource: The handle of the resource variable that stores the state of the RNG.
 *@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n

@@ -158,7 +163,7 @@ The generated values follow a uniform distribution in the range `[0, 1)`. The
 lower bound 0 is included in the range, while the upper bound 1 is excluded.

 *@par Inputs:
 *@li resource: The handle of the resource variable that stores the state of the RNG.
 *@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n

@@ -181,7 +186,7 @@ REG_OP(StatefulUniform)
 The generated values are uniform integers covering the whole range of `dtype` . \n

 *@par Inputs:
 *@li resource: The handle of the resource variable that stores the state of the RNG.
 *@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor . \n

@@ -209,7 +214,7 @@ power of two.  The bias is small for values of `maxval - minval` significantly
 smaller than the range of the output (either `2^32` or `2^64`) . \n

 *@par Inputs:
 *@li resource: The handle of the resource variable that stores the state of the RNG.
 *@li x: The handle of the resource variable that stores the state of the RNG.
 *@li algorithm: The RNG algorithm.
 *@li shape: The shape of the output tensor.
 *@li minval: Minimum value (inclusive, scalar).
--- a/third_party/fwkacllib/inc/ops/string_ops.h
+++ b/third_party/fwkacllib/inc/ops/string_ops.h
@@ -295,7 +295,7 @@ REG_OP(StringSplit)

 *@par Inputs:
 include:
 *@li input:A Tensor of type string. The text to be processed. \n
 *input:A Tensor of type string. The text to be processed. \n

 *@par Attributes:
 *@li pattern:A string. The regular expression to match the input.
@@ -303,8 +303,8 @@ include:
 *@li replace_global:An optional bool. Defaults to True. If True, the replacement is global,
 otherwise the replacement is done only on the first match.

 *@par output:
 *@li output::A Tensor of type string.
 *@par Outputs:
 *output::A Tensor of type string.
 */
 REG_OP(StaticRegexReplace)
    .INPUT(input, TensorType({DT_STRING}))
@@ -322,13 +322,13 @@ REG_OP(StaticRegexReplace)

 *@par Inputs:
 include:
 *@li input:A Tensor of type string. The text to be processed. \n
 *input:A Tensor of type string. The text to be processed. \n

 *@par Attributes:
 *@li pattern:A string. The regular expression to match the input.
 *pattern:A string. The regular expression to match the input.

 *@par output:
 *@li output::A bool tensor with the same shape as `input`.
 *@par Outputs:
 *output::A bool tensor with the same shape as `input`.
 */
 REG_OP(StaticRegexFullMatch)
    .INPUT(input, TensorType({DT_STRING}))
@@ -347,10 +347,10 @@ include:
 *@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar. 

 *@par Attributes:
 *@li separator:An optional string. Defaults to "". The separator to use when joining.
 *separator:An optional string. Defaults to "". The separator to use when joining.

 *@par output:
 *@li output::A Tensor of type string..
 *@par Outputs:
 *output::A Tensor of type string..
 */
 REG_OP(UnsortedSegmentJoin)
    .INPUT(input, TensorType({DT_STRING}))
@@ -366,13 +366,13 @@ REG_OP(UnsortedSegmentJoin)

 *@par Inputs:
 include:
 *@li input:A Tensor of type string. The text to be processed. 
 *input:A Tensor of type string. The text to be processed. 

 *@par Attributes:
 *@li encoding:An optional string. Defaults to "". 
 *encoding:An optional string. Defaults to "". 

 *@par output:
 *@li output::A Tensor of type string..
 *@par Outputs:
 *output::A Tensor of type string..
 */
 REG_OP(StringLower)
    .INPUT(input, TensorType({DT_STRING}))
@@ -386,13 +386,13 @@ REG_OP(StringLower)

 *@par Inputs:
 include:
 *@li input:A Tensor of type string. The text to be processed. 
 *input:A Tensor of type string. The text to be processed. 

 *@par Attributes:
 *@li encoding:An optional string. Defaults to "". 
 *encoding:An optional string. Defaults to "". 

 *@par output:
 *@li output::A Tensor of type string..
 *@par Outputs:
 *output::A Tensor of type string..
 */
 REG_OP(StringUpper)
    .INPUT(input, TensorType({DT_STRING}))
@@ -901,10 +901,10 @@ REG_OP(DecodeBase64)
 *@brief StringNormalization performs string operations for basic cleaning . \n

 *@par Inputs:
 *@li input: only accepts [C] or [1, C] UTF-8 strings tensor . \n
 *input: only accepts [C] or [1, C] UTF-8 strings tensor . \n

 *@par Outputs:
 *@li output: UTF-8 strings tensor after cleaning . \n
 *output: UTF-8 strings tensor after cleaning . \n

 *@par Attributes:
 *@li stopwords : list of strings (default is empty).
@@ -919,13 +919,13 @@ case-sensitive. Default is false.
 *string enum that cases output to be lowercased/uppercases/unchanged. Valid
 values are "LOWER", "UPPER", "NONE". Default is "NONE".

 *@li local : string (default is "en_US").
 *@li locale : string (default is "C").
 *Environment dependent string that denotes the locale according to which output
 strings needs to be upper/lowercased.Default en_US or platform specific equivalent
 as decided by the implementation . \n
 strings needs to be upper/lowercased.Default C or platform specific equivalent
 as decided by the implementation. \n

 *@attention Constraints:
 *@li input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C].
 *input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C].
 */
 REG_OP(StringNormalizer)
    .INPUT(input, TensorType({DT_STRING}))
@@ -933,7 +933,7 @@ REG_OP(StringNormalizer)
    .ATTR(stopwords, ListString, {})
    .ATTR(is_case_sensitive, Bool, false)
    .ATTR(case_change_action, String, "NONE")
    .ATTR(local, String, "en_US")
    .ATTR(locale, String, "C")
    .OP_END_FACTORY_REG(StringNormalizer)
 }  // namespace ge

--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -29,15 +29,15 @@ namespace ge {

 *@par Inputs:
 *The input handle must have the resource type. Inputs include:
 *@li x:A list of Tensor objects. One or more tensors from which
 *x:A list of Tensor objects. One or more tensors from which
 the enqueued tensors should be taken . \n

 *@par Outputs:
 *@li y:A list of Tensor objects. One or more tensors from which
 *y:A list of Tensor objects. One or more tensors from which
 the enqueued tensors should be taken . \n

 *@par Attributes:
 *@li type: An optional ge::DataType. It refers to the target data type of outputs . \n
 *type: An optional ge::DataType. It refers to the target data type of outputs . \n

 *@par Third-party framework compatibility
 *Compatible with tensorflow QueueIsClosed operator.
@@ -723,11 +723,12 @@ REG_OP(CompressFcOp)
 *@brief Performs Col2im for each batch entry. \n

 *@par Inputs:
 *@li input_x: The Col Tensor. 5-D, shape: `(n, c1, kernel_h*kernel_w, ho*wo, c0)`. 
 where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1     \n
 *@li x: The Col Tensor. 4-D, shape: `(n, c, kernel_h*kernel_w, ho*wo)`. 
 where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1.
 *@li output_size: The img shape Tensor. 1-D, shape:`(2)`, value: (output_h, output_w).  \n

 *@par Outputs:
 *@li output_y: The img Tensor. 5-D, shape: `(n, c1, output_h, output_w, c0)`. \n
 *y: The img Tensor. 4-D, shape: `(n, c, output_h, output_w)`. \n

 *@par Attributes:
 *@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution.
@@ -909,7 +910,7 @@ output shape would be [max(ngram_indexes) + 1]. If input shape is [N, C], this o
 *@li either pool_strings or pool_int64s attributes must be present but not both.
 */

 REG_OP(TfidVectorizer)
 REG_OP(TfIdfVectorizer)
    .INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING}))
    .OUTPUT(output, TensorType({DT_FLOAT}))
    .REQUIRED_ATTR(max_gram_length, Int)
@@ -921,7 +922,7 @@ REG_OP(TfidVectorizer)
    .ATTR(pool_int64s, ListInt, {})
    .ATTR(pool_strings, ListString, {})
    .ATTR(weights, ListFloat, {})
    .OP_END_FACTORY_REG(TfidVectorizer)
    .OP_END_FACTORY_REG(TfIdfVectorizer)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -20,7 +20,7 @@
 #include <stdint.h>
 #include "toolchain/prof_callback.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -357,7 +357,7 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_
 */
 RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -43,6 +43,7 @@ typedef enum tagRtChipType {
    CHIP_LHISI,
    CHIP_DC,
    CHIP_CLOUD_V2,
    CHIP_NO_DEVICE,
    CHIP_END,
 } rtChipType_t;

@@ -53,11 +54,11 @@ typedef enum tagRtAicpuScheType {
 } rtAicpuScheType;

 typedef enum tagRtDeviceCapabilityType {
  RT_SCHEDULE_SOFTWARE = 0, // SoftWare Schedule
  RT_SCHEDULE_SOFTWARE_OPT,
  RT_SCHEDULE_HARDWARE, // HWTS Schedule
  RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
  RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
    RT_SCHEDULE_SOFTWARE = 0, // Software Schedule
    RT_SCHEDULE_SOFTWARE_OPT,
    RT_SCHEDULE_HARDWARE, // HWTS Schedule
    RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
    RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
 } rtDeviceCapabilityType;

 typedef enum tagRtVersion {
@@ -235,7 +236,7 @@ RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout);
 */
 RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/context.h
+++ b/third_party/fwkacllib/inc/runtime/context.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -157,7 +157,7 @@ RTS_API rtError_t rtGetGroupCount(uint32_t *count);
 */
 RTS_API rtError_t rtSetCtxINFMode(bool mode);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -80,15 +80,15 @@ typedef enum tagMemoryInfo {
 } rtMemoryInfo_t;

 typedef enum tagRtDeviceModuleType {
  RT_MODULE_TYPE_SYSTEM = 0,
  RT_MODULE_TYPE_AICPU,
  RT_MODULE_TYPE_CCPU,
  RT_MODULE_TYPE_DCPU,
  RT_MODULE_TYPE_AICORE,
  RT_MODULE_TYPE_TSCPU,
  RT_MODULE_TYPE_PCIE,
  RT_MODULE_TYPE_VECTOR_CORE
 } tagRtDeviceModuleType_t;
    RT_MODULE_TYPE_SYSTEM = 0,  /**< system info*/
    RT_MODULE_TYPE_AICPU,       /** < aicpu info*/
    RT_MODULE_TYPE_CCPU,        /**< ccpu_info*/
    RT_MODULE_TYPE_DCPU,        /**< dcpu info*/
    RT_MODULE_TYPE_AICORE,      /**< AI CORE info*/
    RT_MODULE_TYPE_TSCPU,       /**< tscpu info*/
    RT_MODULE_TYPE_PCIE,        /**< PCIE info*/
    RT_MODULE_TYPE_VECTOR_CORE, /**< VECTOR CORE info*/
 } rtDeviceModuleType_t;

 /**
 * @ingroup dvrt_dev
@@ -380,7 +380,7 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
 */
 RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h
+++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -56,7 +56,7 @@ RTS_API rtError_t rtUnsetDvfsProfile();
 */
 RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -41,16 +41,6 @@ typedef enum rtEventWaitStatus {
 #define RT_EVENT_DDSYNC       0x04U
 #define RT_EVENT_TIME_LINE    0x08U

 #define RT_EVENT_DDSYNC_NS    0x01U
 #define RT_EVENT_STREAM_MARK  0x02U
 #define RT_EVENT_DDSYNC       0x04U
 #define RT_EVENT_TIME_LINE    0x08U

 #define RT_EVENT_DDSYNC_NS    0x01U
 #define RT_EVENT_STREAM_MARK  0x02U
 #define RT_EVENT_DDSYNC       0x04U
 #define RT_EVENT_TIME_LINE    0x08U

 /**
 * @ingroup dvrt_event
 * @brief create event instance
@@ -282,7 +272,7 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs
 */
 RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -20,7 +20,7 @@
 #include "base.h"
 #include "stream.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -647,7 +647,7 @@ RTS_API rtError_t rtStartMDCProfiler(void **addr, uint32_t length);
 */
 RTS_API rtError_t rtStopMDCProfiler(void *addr);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -24,7 +24,7 @@
 #include "config.h"
 #include "stream.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -547,7 +547,7 @@ RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
 */
 RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/rt_ffts.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h
@@ -1,5 +1,5 @@
 /*
 * Copyright (c) Huawei Technologies Co. , Ltd. 2021. All rights reserved.
 * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
 * Description: ffts interface
 */

@@ -8,7 +8,7 @@

 #include "base.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -19,8 +19,8 @@ extern "C" {
 #define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN    32U

 typedef enum tagFftsType {
    RT_FFTS_TYPE_AUTO_THREAD = 2,    // ffts auto thread mode, same as ffts define
    RT_FFTS_TYPE_MANUAL_THREAD = 3,   // ffts manual thread mode, same as ffts define
    RT_FFTS_TYPE_AUTO_THREAD = 2,   // ffts auto thread mode, same as ffts define
    RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define
 } rtFftsType_t;

 typedef enum tagFftsSubTaskType {
@@ -37,7 +37,7 @@ typedef enum tagFftsSubTaskType {
 } rtFftsSubTaskType_t;

 typedef struct tagManualThreadDmuInfo {
    uint64_t dataAddr;    // device mem
    uint64_t dataAddr; // device mem
    uint16_t numOuter;
    uint16_t numInner;
    uint32_t strideOuter;
@@ -50,44 +50,43 @@ typedef struct tagManualThreadDependency {
 } rtManualThreadDependency_t;

 typedef struct tagManualThreadAicAivInfo {
    uint64_t taskParamAddr;    // device mem
    uint64_t taskParamAddr; // device mem
    uint16_t taskParamOffset;
    // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows
    // results will be saturated to +/- MAX of FP16
    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows,
    // results will be saturated to +/-MAX of FP16
    uint8_t satMode;
    uint8_t scheduleMode;    // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved
    uint8_t iCachePrefetchCnt;    // units is 2K
    uint8_t prefetchEnableBitmap;    // 8 bit bitmap 1 0 1 0
    uint8_t prefetchOnceBitmap;    // 8 bit bitmap 1 0 1 0
    uint16_t prefetchOnceDmuNum;   // prefetch_once_dmu_descriptor_index in ffts
    // num: thread0_prefetch_dmu_descriptor_index - prefetch_once_dmu_descriptor_index
    uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM];    // max valid is threadDim
    uint8_t scheduleMode;   // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
    uint8_t iCachePrefetchCnt; // units is 2K
    uint8_t prefetchEnableBitmap; // 8 bit bitmap  1 0 1 0
    uint8_t prefetchOnceBitmap; // 8 bit bitmap  1 0 1 0
    uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts
    // num： thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index
    uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim
    uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM];
    const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM];

    rtManualThreadDmuInfo_t *prefetchList;  // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim - 1]
    rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1]
    rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
 } rtManualThreadAicAivInfo_t;

 typedef struct tagAutoThreadPrefetch {
    uint64_t dataAddr;    // device mem
    uint64_t dataAddr; // device mem
    uint32_t dataAddrOffset;
    uint32_t nonTailDataLen;
    uint32_t tailDataLen;
 } rtAutoThreadPrefetch_t;

 typedef struct tagAutoThreadAicAivInfo {
    uint64_t taskParamAddr;   // device mem
    uint64_t taskParamAddr; // device mem
    uint16_t taskParamOffset;
    // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows
    // results will be saturated to +/- MAX of FP16
    // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to +/-MAX of FP16
    uint8_t satMode;
    uint8_t scheduleMode;    // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved
    uint8_t iCachePrefetchCnt;    // units is 2K
    uint8_t prefetchEnableBitmap;    // 8 bit bitmap
    uint8_t prefetchOnceBitmap;    // 8 bit bitmap
    uint8_t scheduleMode;   // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
    uint8_t iCachePrefetchCnt; // units is 2K
    uint8_t prefetchEnableBitmap;   // 8 bit bitmap
    uint8_t prefetchOnceBitmap;     // 8 bit bitmap

    uint16_t tailBlkDim;
    uint16_t nonTailBlkDim;
@@ -95,13 +94,13 @@ typedef struct tagAutoThreadAicAivInfo {
    const char *nonTailTaskFuncStub;
    const char *tailTaskFuncStub;

    // for prefetch, valid num is prefetchEnableBitmap bit count
    // if prefetchEnableBitmap = '00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid
    // for prefetch, valid num is prefetchEnableBitmap bit count.
    // if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid
    rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
 } rtAutoThreadAicAivInfo_t;

 typedef struct tagAutoThreadCacheInfo {
    uint64_t dataAddr;   // device mem
    uint64_t dataAddr; // device mem
    uint32_t dataAddrOffset;
    uint32_t nonTailDataLen;
    uint32_t tailDataLen;
@@ -109,7 +108,7 @@ typedef struct tagAutoThreadCacheInfo {
 } rtAutoThreadCacheInfo_t;

 typedef struct tagManualThreadCacheInfo {
    rtManualThreadDmuInfo_t *dmuList;    // 0-64k
    rtManualThreadDmuInfo_t *dmuList;  // 0-64k
    uint16_t dmuNum;
    uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM];
    uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM];
@@ -152,11 +151,11 @@ typedef struct tagFftsSubTaskInfo {
 } rtFftsSubTaskInfo_t;

 typedef struct tagFftsDescInfo {
    uint8_t tm;    // thread subtask kickstart mode, 0:order, 1:disorder
    uint8_t di;    // discard invalidate
    uint8_t dw;    // discard write back
    uint8_t df;    // discard flush
    uint8_t dataSplitUnit;    // split source or ticket cache by 2~dataSplitUnit MB
    uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder
    uint8_t di; // discard invalidate
    uint8_t dw; // discard write back
    uint8_t df; // discard flush
    uint8_t dataSplitUnit;  // split source or ticket cache by 2^dataSplitUnit MB
    uint8_t prefetchOstNum;
    uint8_t cacheMaintainOstNum;
    uint8_t aicPrefetchUpper;
@@ -166,20 +165,20 @@ typedef struct tagFftsDescInfo {
 } rtFftsDescInfo_t;

 typedef struct tagFftsTaskInfo {
    rtFftsType_t  fftsType;
    rtFftsType_t fftsType;
    uint16_t subTaskNum;
    uint16_t tickCacheNum;
    rtFftsDescInfo_t fftsDesc;
    // sub task desc, real num is subTaskNum
    rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM];

    // ticket cache, real number is ticketCacheNum
    // ticket cache, real number is tickCacheNum.
    rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM];
 } rtFftsTaskInfo_t;

 RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif
 #endif //__CCE_RUNTIME_FFTS_H
 #endif // __CCE_RUNTIME_FFTS_H
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -19,7 +19,7 @@

 #include "base.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -490,7 +490,7 @@ RTS_API rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *ad
 */
 RTS_API rtError_t rtDebugUnRegister(rtModel_t model);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif

--- a/third_party/fwkacllib/inc/runtime/rt_stars.h
+++ b/third_party/fwkacllib/inc/runtime/rt_stars.h
@@ -8,7 +8,7 @@

 #include "base.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -23,6 +23,7 @@ extern "C" {
 */
 RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream);


 /**
 * @ingroup rt_stars
 * @brief create cdq instance.
@@ -76,10 +77,11 @@ RTS_API rtError_t rtCdqEnQueue(const char *queName, uint32_t cdqeIndex, void *da
 * @param [in] stream       launch task on the stream
 * @return RT_ERROR_NONE for ok, others failed
 */
 RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *prtAddr,
 RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *ptrAddr,
    rtStream_t stream);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)

 }
 #endif
 #endif // __CCE_RUNTIME_STARS_H
--- a/third_party/fwkacllib/inc/runtime/stream.h
+++ b/third_party/fwkacllib/inc/runtime/stream.h
@@ -20,7 +20,7 @@
 #include "base.h"
 #include "event.h"

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 extern "C" {
 #endif

@@ -211,7 +211,7 @@ RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, con
 */
 RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 #if defined(__cplusplus)
 }
 #endif

--- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
@@ -84,6 +84,7 @@
 #endif

 #include <cstdint>
 #include <stddef.h>

 namespace Msprofiler {
 namespace Api {
@@ -105,6 +106,37 @@ extern "C" {

 MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);

 typedef int Status;
 typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1;
 ///
 /// @ingroup AscendCL
 /// @brief subscribe profiling data of graph
 /// @param [in] graphId: the graph id subscribed
 /// @param [in] profSubscribeConfig: pointer to config of model subscribe
 /// @return Status result of function
 ///
 Status aclgrphProfGraphSubscribe(const uint32_t graphId,
    const aclprofSubscribeConfig1 *profSubscribeConfig);

 ///
 /// @ingroup AscendCL
 /// @brief unsubscribe profiling data of graph
 /// @param [in] graphId: the graph id subscribed
 /// @return Status result of function
 ///
 Status aclgrphProfGraphUnSubscribe(const uint32_t graphId);

 /**
 * @ingroup AscendCL
 * @brief get graph id from subscription data
 *
 * @param  opInfo [IN]     pointer to subscription data
 * @param  opInfoLen [IN]  memory size of subscription data
 *
 * @retval graph id of subscription data
 * @retval 0 for failed
 */
 size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index);
 #ifdef __cplusplus
 }
 #endif
--- a/third_party/fwkacllib/inc/toolchain/prof_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -54,6 +54,17 @@ struct ReporterData {
    unsigned char *data;                      // the data content
 };

 /**
 * @name  HashData
 * @brief struct of data to hash
 */
 struct HashData {
    int deviceId;                             // the index of device
    size_t dataLen;                           // the length of data
    unsigned char *data;                      // the data content
    uint64_t hashId;                          // the id of hashed data
 };

 /**
 * @name  MsprofReporterModuleId
 * @brief module id of data to report
@@ -75,6 +86,7 @@ enum MsprofReporterCallbackType {
    MSPROF_REPORTER_INIT,                 // init reporter
    MSPROF_REPORTER_UNINIT,               // uninit reporter
    MSPROF_REPORTER_DATA_MAX_LEN,         // data max length for calling report callback
    MSPROF_REPORTER_HASH                  // hash data to id
 };

 /**