Merge remote-tracking branch 'origin/r1.2' into code_sync_0318

4 years ago · 4a18a6791d
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,8 +82,8 @@ if (ENABLE_OPEN_SRC)
    elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
 	add_subdirectory(tests)
    else()
        find_module(slog libalog.so ${ASCEND_ATC_DIR})
        find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR})
        find_module(slog libalog.so ${ASCEND_ATC_DIR} ${ASCEND_DRIVER_COMMON_DIR})
        find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR})
        if(PLATFORM STREQUAL "train")
            find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
            find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
@@ -150,6 +150,7 @@ elseif(ENABLE_MS_TESTCASES)
    include(cmake/external_libs/protobuf_static.cmake)
    include(cmake/external_libs/protoc.cmake)
    include(cmake/external_libs/securec.cmake)
    include(cmake/external_libs/json.cmake)
    include(cmake/FindModule.cmake)
    include(cmake/intf_pub_linux.cmake)
--- a/cmake/external_libs/json.cmake
+++ b/cmake/external_libs/json.cmake
@@ -9,10 +9,6 @@ if (GE_PB_PKG)
    set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip")
    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
    set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
 #elseif (ENABLE_GITEE)
 #    set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
 #    set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
 #set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
 else()
    set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
    set(MD5 "0dc903888211db3a0f170304cd9f3a89")
--- a/ge/ge_runtime/runtime_model.cc
+++ b/ge/ge_runtime/runtime_model.cc
@@ -21,6 +21,7 @@
 #include "common/ge_inner_error_codes.h"
 #include "common/types.h"
 #include "common/util.h"
 #include "common/math/math_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/op/op_parser_util.h"
 #include "graph/types.h"
--- a/ge/ge_runtime/task/hccl_task.cc
+++ b/ge/ge_runtime/task/hccl_task.cc
@@ -52,15 +52,7 @@ HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<Hccl
  }
 }
 HcclTask::~HcclTask() {
  if (workspace_mem_ != nullptr) {
    rtError_t rt_ret = rtFree(workspace_mem_);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "rtFree workspace_mem_ failed! ret: 0x%X.", rt_ret);
    }
    workspace_mem_ = nullptr;
  }
 }
 HcclTask::~HcclTask() {}
 bool HcclTask::Distribute() {
  // Ops kernel info store
@@ -79,11 +71,7 @@ bool HcclTask::Distribute() {
  SetSecondaryStream();
  if (task_info_->workspace_size() > 0) {
    rtError_t rt_ret = rtMalloc(&workspace_mem_, task_info_->workspace_size(), RT_MEMORYINFO_HBM);
    if (rt_ret != RT_ERROR_NONE) {
      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
      return false;
    }
    workspace_mem_ = task_info_->workspace_addr();
  }
  GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl.");
--- a/inc/external/acl/acl.h
+++ b/inc/external/acl/acl.h
@@ -0,0 +1,73 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_EXTERNAL_ACL_ACL_H_
 #define INC_EXTERNAL_ACL_ACL_H_
 #include "acl_rt.h"
 #include "acl_op.h"
 #include "acl_mdl.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 // Current version is 1.0.0
 #define ACL_MAJOR_VERSION 1
 #define ACL_MINOR_VERSION 0
 #define ACL_PATCH_VERSION 0
 /**
 * @ingroup AscendCL
 * @brief acl initialize
 *
 * @par Restriction
 * The aclInit interface can be called only once in a process
 * @param configPath [IN]    the config path,it can be NULL
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath);
 /**
 * @ingroup AscendCL
 * @brief acl finalize
 *
 * @par Restriction
 * Need to call aclFinalize before the process exits.
 * After calling aclFinalize,the services cannot continue to be used normally.
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclFinalize();
 /**
 * @ingroup AscendCL
 * @brief query ACL interface version
 *
 * @param majorVersion[OUT] ACL interface major version
 * @param minorVersion[OUT] ACL interface minor version
 * @param patchVersion[OUT] ACL interface patch version
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion);
 #ifdef __cplusplus
 }
 #endif
 #endif  // INC_EXTERNAL_ACL_ACL_H_
--- a/inc/external/acl/acl_base.h
+++ b/inc/external/acl/acl_base.h
@@ -0,0 +1,636 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_EXTERNAL_ACL_ACL_BASE_H_
 #define INC_EXTERNAL_ACL_ACL_BASE_H_
 #include <stdint.h>
 #include <stddef.h>
 #include "error_codes/rt_error_codes.h"
 #include "error_codes/ge_error_codes.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define ACL_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define ACL_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define ACL_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define ACL_FUNC_VISIBILITY
 #endif
 #endif
 #ifdef __GNUC__
 #define ACL_DEPRECATED __attribute__((deprecated))
 #define ACL_DEPRECATED_MESSAGE(message) __attribute__((deprecated(message)))
 #elif defined(_MSC_VER)
 #define ACL_DEPRECATED __declspec(deprecated)
 #define ACL_DEPRECATED_MESSAGE(message) __declspec(deprecated(message))
 #else
 #define ACL_DEPRECATED
 #define ACL_DEPRECATED_MESSAGE(message)
 #endif
 typedef void *aclrtStream;
 typedef void *aclrtEvent;
 typedef void *aclrtContext;
 typedef int aclError;
 typedef uint16_t aclFloat16;
 typedef struct aclDataBuffer aclDataBuffer;
 typedef struct aclTensorDesc aclTensorDesc;
 static const int ACL_ERROR_NONE = 0;
 static const int ACL_SUCCESS = 0;
 static const int ACL_ERROR_INVALID_PARAM = 100000;
 static const int ACL_ERROR_UNINITIALIZE = 100001;
 static const int ACL_ERROR_REPEAT_INITIALIZE = 100002;
 static const int ACL_ERROR_INVALID_FILE = 100003;
 static const int ACL_ERROR_WRITE_FILE = 100004;
 static const int ACL_ERROR_INVALID_FILE_SIZE = 100005;
 static const int ACL_ERROR_PARSE_FILE = 100006;
 static const int ACL_ERROR_FILE_MISSING_ATTR = 100007;
 static const int ACL_ERROR_FILE_ATTR_INVALID = 100008;
 static const int ACL_ERROR_INVALID_DUMP_CONFIG = 100009;
 static const int ACL_ERROR_INVALID_PROFILING_CONFIG = 100010;
 static const int ACL_ERROR_INVALID_MODEL_ID = 100011;
 static const int ACL_ERROR_DESERIALIZE_MODEL = 100012;
 static const int ACL_ERROR_PARSE_MODEL = 100013;
 static const int ACL_ERROR_READ_MODEL_FAILURE = 100014;
 static const int ACL_ERROR_MODEL_SIZE_INVALID = 100015;
 static const int ACL_ERROR_MODEL_MISSING_ATTR = 100016;
 static const int ACL_ERROR_MODEL_INPUT_NOT_MATCH = 100017;
 static const int ACL_ERROR_MODEL_OUTPUT_NOT_MATCH = 100018;
 static const int ACL_ERROR_MODEL_NOT_DYNAMIC = 100019;
 static const int ACL_ERROR_OP_TYPE_NOT_MATCH = 100020;
 static const int ACL_ERROR_OP_INPUT_NOT_MATCH = 100021;
 static const int ACL_ERROR_OP_OUTPUT_NOT_MATCH = 100022;
 static const int ACL_ERROR_OP_ATTR_NOT_MATCH = 100023;
 static const int ACL_ERROR_OP_NOT_FOUND = 100024;
 static const int ACL_ERROR_OP_LOAD_FAILED = 100025;
 static const int ACL_ERROR_UNSUPPORTED_DATA_TYPE = 100026;
 static const int ACL_ERROR_FORMAT_NOT_MATCH = 100027;
 static const int ACL_ERROR_BIN_SELECTOR_NOT_REGISTERED = 100028;
 static const int ACL_ERROR_KERNEL_NOT_FOUND = 100029;
 static const int ACL_ERROR_BIN_SELECTOR_ALREADY_REGISTERED = 100030;
 static const int ACL_ERROR_KERNEL_ALREADY_REGISTERED = 100031;
 static const int ACL_ERROR_INVALID_QUEUE_ID = 100032;
 static const int ACL_ERROR_REPEAT_SUBSCRIBE = 100033;
 static const int ACL_ERROR_STREAM_NOT_SUBSCRIBE = 100034;
 static const int ACL_ERROR_THREAD_NOT_SUBSCRIBE = 100035;
 static const int ACL_ERROR_WAIT_CALLBACK_TIMEOUT = 100036;
 static const int ACL_ERROR_REPEAT_FINALIZE = 100037;
 static const int ACL_ERROR_NOT_STATIC_AIPP = 100038;
 static const int ACL_ERROR_COMPILING_STUB_MODE = 100039;
 static const int ACL_ERROR_GROUP_NOT_SET = 100040;
 static const int ACL_ERROR_GROUP_NOT_CREATE = 100041;
 static const int ACL_ERROR_PROF_ALREADY_RUN = 100042;
 static const int ACL_ERROR_PROF_NOT_RUN = 100043;
 static const int ACL_ERROR_DUMP_ALREADY_RUN = 100044;
 static const int ACL_ERROR_DUMP_NOT_RUN = 100045;
 static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046;
 static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
 static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
 static const int ACL_ERROR_INVALID_OPP_PATH = 148049;
 static const int ACL_ERROR_OP_UNSUPPORTED_DYNAMIC = 148050;
 static const int ACL_ERROR_BAD_ALLOC = 200000;
 static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
 static const int ACL_ERROR_INVALID_DEVICE = 200002;
 static const int ACL_ERROR_MEMORY_ADDRESS_UNALIGNED = 200003;
 static const int ACL_ERROR_RESOURCE_NOT_MATCH = 200004;
 static const int ACL_ERROR_INVALID_RESOURCE_HANDLE = 200005;
 static const int ACL_ERROR_FEATURE_UNSUPPORTED = 200006;
 static const int ACL_ERROR_PROF_MODULES_UNSUPPORTED = 200007;
 static const int ACL_ERROR_STORAGE_OVER_LIMIT = 300000;
 static const int ACL_ERROR_INTERNAL_ERROR = 500000;
 static const int ACL_ERROR_FAILURE = 500001;
 static const int ACL_ERROR_GE_FAILURE = 500002;
 static const int ACL_ERROR_RT_FAILURE = 500003;
 static const int ACL_ERROR_DRV_FAILURE = 500004;
 static const int ACL_ERROR_PROFILING_FAILURE = 500005;
 #define ACL_TENSOR_SHAPE_RANGE_NUM 2
 #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE
 typedef enum {
    ACL_DT_UNDEFINED = -1,
    ACL_FLOAT = 0,
    ACL_FLOAT16 = 1,
    ACL_INT8 = 2,
    ACL_INT32 = 3,
    ACL_UINT8 = 4,
    ACL_INT16 = 6,
    ACL_UINT16 = 7,
    ACL_UINT32 = 8,
    ACL_INT64 = 9,
    ACL_UINT64 = 10,
    ACL_DOUBLE = 11,
    ACL_BOOL = 12,
    ACL_STRING = 13,
 } aclDataType;
 typedef enum {
    ACL_FORMAT_UNDEFINED = -1,
    ACL_FORMAT_NCHW = 0,
    ACL_FORMAT_NHWC = 1,
    ACL_FORMAT_ND = 2,
    ACL_FORMAT_NC1HWC0 = 3,
    ACL_FORMAT_FRACTAL_Z = 4,
    ACL_FORMAT_NC1HWC0_C04 = 12,
    ACL_FORMAT_NDHWC = 27,
    ACL_FORMAT_FRACTAL_NZ = 29,
    ACL_FORMAT_NCDHW = 30,
    ACL_FORMAT_NDC1HWC0 = 32,
    ACL_FRACTAL_Z_3D = 33
 } aclFormat;
 typedef enum {
    ACL_DEBUG = 0,
    ACL_INFO = 1,
    ACL_WARNING = 2,
    ACL_ERROR = 3,
 } aclLogLevel;
 typedef enum {
    ACL_MEMTYPE_DEVICE = 0,
    ACL_MEMTYPE_HOST = 1,
 } aclMemType;
 /**
 * @ingroup AscendCL
 * @brief Converts data of type aclFloat16 to data of type float
 *
 * @param value [IN]   Data to be converted
 *
 * @retval Transformed data
 */
 ACL_FUNC_VISIBILITY float aclFloat16ToFloat(aclFloat16 value);
 /**
 * @ingroup AscendCL
 * @brief Converts data of type float to data of type aclFloat16
 *
 * @param value [IN]   Data to be converted
 *
 * @retval Transformed data
 */
 ACL_FUNC_VISIBILITY aclFloat16 aclFloatToFloat16(float value);
 /**
 * @ingroup AscendCL
 * @brief create data of aclDataBuffer
 *
 * @param data [IN]    pointer to data
 * @li Need to be managed by the user,
 *  call aclrtMalloc interface to apply for memory,
 *  call aclrtFree interface to release memory
 *
 * @param size [IN]    size of data in bytes
 *
 * @retval pointer to created instance. nullptr if run out of memory
 *
 * @see aclrtMalloc | aclrtFree
 */
 ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size);
 /**
 * @ingroup AscendCL
 * @brief destroy data of aclDataBuffer
 *
 * @par Function
 *  Only the aclDataBuffer type data is destroyed here.
 *  The memory of the data passed in when the aclDataDataBuffer interface
 *  is called to create aclDataBuffer type data must be released by the user
 *
 * @param  dataBuffer [IN]   pointer to the aclDataBuffer
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclCreateDataBuffer
 */
 ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer);
 /**
 * @ingroup AscendCL
 * @brief update new data of aclDataBuffer
 *
 * @param dataBuffer [OUT]    pointer to aclDataBuffer
 * @li The old data need to be released by the user, otherwise it may occur memory leak leakage
 *  call aclGetDataBufferAddr interface to get old data address
 *  call aclrtFree interface to release memory
 *
 * @param data [IN]    pointer to new data
 * @li Need to be managed by the user,
 *  call aclrtMalloc interface to apply for memory,
 *  call aclrtFree interface to release memory
 *
 * @param size [IN]    size of data in bytes
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr
 */
 ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size);
 /**
 * @ingroup AscendCL
 * @brief get data address from aclDataBuffer
 *
 * @param dataBuffer [IN]    pointer to the data of aclDataBuffer
 *
 * @retval data address
 */
 ACL_FUNC_VISIBILITY void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer);
 /**
 * @ingroup AscendCL
 * @brief get data size of aclDataBuffer
 *
 * @param  dataBuffer [IN]    pointer to the data of aclDataBuffer
 *
 * @retval data size
 */
 ACL_DEPRECATED_MESSAGE("aclGetDataBufferSize is deprecated, use aclGetDataBufferSizeV2 instead")
 ACL_FUNC_VISIBILITY uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer);
 /**
 * @ingroup AscendCL
 * @brief get data size of aclDataBuffer to replace aclGetDataBufferSize
 *
 * @param  dataBuffer [IN]    pointer to the data of aclDataBuffer
 *
 * @retval data size
 */
 ACL_FUNC_VISIBILITY size_t aclGetDataBufferSizeV2(const aclDataBuffer *dataBuffer);
 /**
 * @ingroup AscendCL
 * @brief get size of aclDataType
 *
 * @param  dataType [IN]    aclDataType data the size to get
 *
 * @retval size of the aclDataType
 */
 ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);
 // interfaces of tensor desc
 /**
 * @ingroup AscendCL
 * @brief create data aclTensorDesc
 *
 * @param  dataType [IN]    Data types described by tensor
 * @param  numDims [IN]     the number of dimensions of the shape
 * @param  dims [IN]        the size of the specified dimension
 * @param  format [IN]      tensor format
 *
 * @retval aclTensorDesc pointer.
 * @retval nullptr if param is invalid or run out of memory
 */
 ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType,
                                                       int numDims,
                                                       const int64_t *dims,
                                                       aclFormat format);
 /**
 * @ingroup AscendCL
 * @brief destroy data aclTensorDesc
 *
 * @param desc [IN]     pointer to the data of aclTensorDesc to destroy
 */
 ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
 /**
 * @ingroup AscendCL
 * @brief set tensor shape range for aclTensorDesc
 *
 * @param  desc [OUT]     pointer to the data of aclTensorDesc
 * @param  dimsCount [IN]     the number of dimensions of the shape
 * @param  dimsRange [IN]     the range of dimensions of the shape
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc,
                                                    size_t dimsCount,
                                                    int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);
 /**
 * @ingroup AscendCL
 * @brief get data type specified by the tensor description
 *
 * @param desc [IN]        pointer to the instance of aclTensorDesc
 *
 * @retval data type specified by the tensor description.
 * @retval ACL_DT_UNDEFINED if description is null
 */
 ACL_FUNC_VISIBILITY aclDataType aclGetTensorDescType(const aclTensorDesc *desc);
 /**
 * @ingroup AscendCL
 * @brief get data format specified by the tensor description
 *
 * @param  desc [IN]        pointer to the instance of aclTensorDesc
 *
 * @retval data format specified by the tensor description.
 * @retval ACL_FORMAT_UNDEFINED if description is null
 */
 ACL_FUNC_VISIBILITY aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc);
 /**
 * @ingroup AscendCL
 * @brief get tensor size specified by the tensor description
 *
 * @param  desc [IN]        pointer to the instance of aclTensorDesc
 *
 * @retval data size specified by the tensor description.
 * @retval 0 if description is null
 */
 ACL_FUNC_VISIBILITY size_t aclGetTensorDescSize(const aclTensorDesc *desc);
 /**
 * @ingroup AscendCL
 * @brief get element count specified by the tensor description
 *
 * @param  desc [IN]        pointer to the instance of aclTensorDesc
 *
 * @retval element count specified by the tensor description.
 * @retval 0 if description is null
 */
 ACL_FUNC_VISIBILITY size_t aclGetTensorDescElementCount(const aclTensorDesc *desc);
 /**
 * @ingroup AscendCL
 * @brief get number of dims specified by the tensor description
 *
 * @param  desc [IN]        pointer to the instance of aclTensorDesc
 *
 * @retval number of dims specified by the tensor description.
 * @retval 0 if description is null
 * @retval ACL_UNKNOWN_RANK if the tensor dim is -2
 */
 ACL_FUNC_VISIBILITY size_t aclGetTensorDescNumDims(const aclTensorDesc *desc);
 /**
 * @ingroup AscendCL
 * @brief Get the size of the specified dim in the tensor description
 *
 * @param  desc [IN]        pointer to the instance of aclTensorDesc
 * @param  index [IN]       index of dims, start from 0.
 *
 * @retval dim specified by the tensor description and index.
 * @retval -1 if description or index is invalid
 */
 ACL_DEPRECATED_MESSAGE("aclGetTensorDescDim is deprecated, use aclGetTensorDescDimV2 instead")
 ACL_FUNC_VISIBILITY int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index);
 /**
 * @ingroup AscendCL
 * @brief Get the size of the specified dim in the tensor description
 *
 * @param  desc [IN]        pointer to the instance of aclTensorDesc
 * @param  index [IN]       index of dims, start from 0.
 * @param  dimSize [OUT]    size of the specified dim.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, size_t index, int64_t *dimSize);
 /**
 * @ingroup AscendCL
 * @brief Get the range of the specified dim in the tensor description
 *
 * @param  desc [IN]        pointer to the instance of aclTensorDesc
 * @param  index [IN]       index of dims, start from 0.
 * @param  dimRangeNum [IN]     number of dimRange.
 * @param  dimRange [OUT]       range of the specified dim.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc,
                                                      size_t index,
                                                      size_t dimRangeNum,
                                                      int64_t *dimRange);
 /**
 * @ingroup AscendCL
 * @brief set tensor description name
 *
 * @param desc [OUT]       pointer to the instance of aclTensorDesc
 * @param name [IN]        tensor description name
 */
 ACL_FUNC_VISIBILITY void aclSetTensorDescName(aclTensorDesc *desc, const char *name);
 /**
 * @ingroup AscendCL
 * @brief get tensor description name
 *
 * @param  desc [IN]        pointer to the instance of aclTensorDesc
 *
 * @retval tensor description name.
 * @retval empty string if description is null
 */
 ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);
 /**
 * @ingroup AscendCL
 * @brief Convert the format in the source aclTensorDesc according to
 * the specified dstFormat to generate a new target aclTensorDesc.
 * The format in the source aclTensorDesc remains unchanged.
 *
 * @param  srcDesc [IN]     pointer to the source tensor desc
 * @param  dstFormat [IN]   destination format
 * @param  dstDesc [OUT]    pointer to the pointer to the destination tensor desc
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
    aclTensorDesc **dstDesc);
 /**
 * @ingroup AscendCL
 * @brief Set the storage format specified by the tensor description
 *
 * @param  desc [OUT]     pointer to the instance of aclTensorDesc
 * @param  format [IN]    the storage format
 *
 * @retval ACL_SUCCESS    The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_DEPRECATED_MESSAGE("aclSetTensorStorageFormat is deprecated, use aclSetTensorFormat instead")
 ACL_FUNC_VISIBILITY aclError aclSetTensorStorageFormat(aclTensorDesc *desc, aclFormat format);
 /**
 * @ingroup AscendCL
 * @brief Set the storage shape specified by the tensor description
 *
 * @param  desc [OUT]      pointer to the instance of aclTensorDesc
 * @param  numDims [IN]    the number of dimensions of the shape
 * @param  dims [IN]       the size of the specified dimension
 *
 * @retval ACL_SUCCESS     The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_DEPRECATED_MESSAGE("aclSetTensorStorageShape is deprecated, use aclSetTensorShape instead")
 ACL_FUNC_VISIBILITY aclError aclSetTensorStorageShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
 /**
 * @ingroup AscendCL
 * @brief Set the format specified by the tensor description
 *
 * @param  desc [OUT]     pointer to the instance of aclTensorDesc
 * @param  format [IN]    the storage format
 *
 * @retval ACL_SUCCESS    The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclSetTensorFormat(aclTensorDesc *desc, aclFormat format);
 /**
 * @ingroup AscendCL
 * @brief Set the shape specified by the tensor description
 *
 * @param  desc [OUT]      pointer to the instance of aclTensorDesc
 * @param  numDims [IN]    the number of dimensions of the shape
 * @param  dims [IN]       the size of the specified dimension
 *
 * @retval ACL_SUCCESS     The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclSetTensorShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
 /**
 * @ingroup AscendCL
 * @brief Set the original format specified by the tensor description
 *
 * @param  desc [OUT]     pointer to the instance of aclTensorDesc
 * @param  format [IN]    the storage format
 *
 * @retval ACL_SUCCESS    The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclSetTensorOriginFormat(aclTensorDesc *desc, aclFormat format);
 /**
 * @ingroup AscendCL
 * @brief Set the original shape specified by the tensor description
 *
 * @param  desc [OUT]      pointer to the instance of aclTensorDesc
 * @param  numDims [IN]    the number of dimensions of the shape
 * @param  dims [IN]       the size of the specified dimension
 *
 * @retval ACL_SUCCESS     The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
 /**
 * @ingroup AscendCL
 * @brief get op description info
 *
 * @param desc [IN]     pointer to tensor description
 * @param index [IN]    index of tensor
 *
 * @retval null for failed.
 * @retval OtherValues success.
 */
 ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index);
 /**
 * @ingroup AscendCL
 * @brief get address of tensor
 *
 * @param desc [IN]    pointer to tensor description
 *
 * @retval null for failed
 * @retval OtherValues success
 */
 ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);
 /**
 * @ingroup AscendCL
 * @brief Set the dynamic input name specified by the tensor description
 *
 * @param  desc [OUT]      pointer to the instance of aclTensorDesc
 * @param  dynamicInputName [IN]       pointer to the dynamic input name
 *
 * @retval ACL_SUCCESS     The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName);
 /**
 * @ingroup AscendCL
 * @brief Set const data specified by the tensor description
 *
 * @param  desc [OUT]      pointer to the instance of aclTensorDesc
 * @param  dataBuffer [IN]       pointer to the const databuffer
 * @param  length [IN]       the length of const databuffer
 *
 * @retval ACL_SUCCESS     The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length);
 /**
 * @ingroup AscendCL
 * @brief Set tensor memory type specified by the tensor description
 *
 * @param  desc [OUT]      pointer to the instance of aclTensorDesc
 * @param  memType [IN]       ACL_MEMTYPE_DEVICE means device, ACL_MEMTYPE_HOST means host
 *
 * @retval ACL_SUCCESS     The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemType memType);
 /**
 * @ingroup AscendCL
 * @brief an interface for users to output  APP logs
 *
 * @param logLevel [IN]    the level of current log
 * @param func [IN]        the function where the log is located
 * @param file [IN]        the file where the log is located
 * @param line [IN]        Number of source lines where the log is located
 * @param fmt [IN]         the format of current log
 * @param ... [IN]         the value of current log
 */
 ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
    const char *fmt, ...);
 #define ACL_APP_LOG(level, fmt, ...) \
    aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
 #ifdef __cplusplus
 }
 #endif
 #endif // INC_EXTERNAL_ACL_ACL_BASE_H_
--- a/inc/external/acl/acl_mdl.h
+++ b/inc/external/acl/acl_mdl.h
--- a/inc/external/acl/acl_op.h
+++ b/inc/external/acl/acl_op.h
@@ -0,0 +1,549 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_EXTERNAL_ACL_ACL_OP_H_
 #define INC_EXTERNAL_ACL_ACL_OP_H_
 #include "acl_base.h"
 #include "acl_rt.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 typedef struct aclopHandle aclopHandle;
 typedef struct aclopAttr aclopAttr;
 typedef struct aclopKernelDesc aclopKernelDesc;
 typedef void (*aclDataDeallocator)(void *data, size_t length);
 static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;
 typedef enum aclEngineType {
    ACL_ENGINE_SYS,
    ACL_ENGINE_AICORE,
    ACL_ENGINE_VECTOR,
 } aclopEngineType;
 /**
 * @ingroup AscendCL
 * @brief Set base directory that contains single op models
 *
 * @par Restriction
 * The aclopSetModelDir interface can be called only once in a process.
 * @param  modelDir [IN]   path of the directory
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetModelDir(const char *modelDir);
 /**
 * @ingroup AscendCL
 * @brief load single op models from memory
 *
 * @par Restriction
 * The aclopLoad interface can be called more than one times in a process.
 * @param model [IN]        address of single op models
 * @param modelSize [IN]    size of single op models
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopLoad(const void *model, size_t modelSize);
 /**
 * @ingroup AscendCL
 * @brief create data of type aclopAttr
 *
 * @retval pointer to created instance.
 * @retval nullptr if run out of memory
 */
 ACL_FUNC_VISIBILITY aclopAttr *aclopCreateAttr();
 /**
 * @ingroup AscendCL
 * @brief destroy data of typ aclopAttr
 *
 * @param attr [IN]   pointer to the instance of aclopAttr
 */
 ACL_FUNC_VISIBILITY void aclopDestroyAttr(const aclopAttr *attr);
 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is bool
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param attrValue [IN]   attribute value
 *                         false if attrValue is 0, true otherwise.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrBool(aclopAttr *attr, const char *attrName, uint8_t attrValue);
 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is int64_t
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param attrValue [IN]   attribute value
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrInt(aclopAttr *attr, const char *attrName, int64_t attrValue);
 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is float
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param attrValue [IN]   attribute value
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attrName, float attrValue);
 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is string
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param attrValue [IN]   attribute value
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue);
 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is list of bools
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param numValues [IN]   number of values. false if attrValue is 0, true otherwise.
 * @param values [IN]      pointer to values
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
    const uint8_t *values);
 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is list of ints
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param numValues [IN]   number of values
 * @param values [IN]      pointer to values
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
    const int64_t *values);
 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is list of floats
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param numValues [IN]   number of values
 * @param values [IN]      pointer to values
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
    const float *values);
 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is list of strings
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param numValues [IN]   number of values
 * @param values [IN]      pointer to values
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
    const char **values);
 /**
 * @ingroup AscendCL
 * @brief set an attribute. the type of the attribute is list of list of ints
 *
 * @param attr [OUT]       pointer to the instance of aclopAttr
 * @param attrName [IN]    attribute name
 * @param numLists [IN]    number of lists
 * @param numValues [IN]   pointer to number of values of each list
 * @param values [IN]      pointer to values
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr,
                                                     const char *attrName,
                                                     int numLists,
                                                     const int *numValues,
                                                     const int64_t *const values[]);
 /**
 * @ingroup AscendCL
 * @brief Load and execute the specified operator asynchronously
 *
 * @par Restriction
 * @li The input and output organization of each operator is different,
 * and the application needs to organize the operator strictly
 * according to the operator input and output parameters when calling.
 * @li When the user calls aclopExecute,
 * the ACL finds the corresponding task according to the optype,
 * the description of the input tesnsor,
 * the description of the output tesnsor, and attr, and issues the execution.
 *
 * @param opType [IN]      type of op
 * @param numInputs [IN]   number of inputs
 * @param inputDesc [IN]   pointer to array of input tensor descriptions
 * @param inputs [IN]      pointer to array of input buffers
 * @param numOutputs [IN]  number of outputs
 * @param outputDesc [IN]  pointer to array of output tensor descriptions
 * @param outputs [OUT]    pointer to array of output buffers
 * @param attr [IN]        pointer to instance of aclopAttr.
 *                         may pass nullptr if the op has no attribute
 * @param stream [IN]      stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead")
 ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType,
                                          int numInputs,
                                          const aclTensorDesc *const inputDesc[],
                                          const aclDataBuffer *const inputs[],
                                          int numOutputs,
                                          const aclTensorDesc *const outputDesc[],
                                          aclDataBuffer *const outputs[],
                                          const aclopAttr *attr,
                                          aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief Load and execute the specified operator
 *        The difference with aclopExecute is that aclopExecuteV2 will refresh outputDesc
 *
 * @par Restriction
 * @li The input and output organization of each operator is different,
 * and the application needs to organize the operator strictly
 * according to the operator input and output parameters when calling.
 * @li When the user calls aclopExecuteV2,
 * the ACL finds the corresponding task according to the optype,
 * the description of the input tesnsor,
 * the description of the output tesnsor, and attr, and issues the execution.
 *
 * @param opType [IN]      type of op
 * @param numInputs [IN]   number of inputs
 * @param inputDesc [IN]   pointer to array of input tensor descriptions
 * @param inputs [IN]      pointer to array of input buffers
 * @param numOutputs [IN]  number of outputs
 * @param outputDesc [IN|OUT]  pointer to array of output tensor descriptions
 * @param outputs [OUT]    pointer to array of output buffers
 * @param attr [IN]        pointer to instance of aclopAttr.
 *                         may pass nullptr if the op has no attribute
 * @param stream [IN]      stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType,
                                            int numInputs,
                                            aclTensorDesc *inputDesc[],
                                            aclDataBuffer *inputs[],
                                            int numOutputs,
                                            aclTensorDesc *outputDesc[],
                                            aclDataBuffer *outputs[],
                                            aclopAttr *attr,
                                            aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief create a instance of aclopHandle.
 *
 * @param opType [IN]      type of op
 * @param numInputs [IN]   number of inputs
 * @param inputDesc [IN]   pointer to array of input tensor descriptions
 * @param numOutputs [IN]  number of outputs
 * @param outputDesc [IN]  pointer to array of output tensor descriptions
 * @param opAttr [IN]      pointer to instance of aclopAttr.
 *                         may pass nullptr if the op has no attribute
 * @param handle [OUT]     pointer to the pointer to the handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType,
                                               int numInputs,
                                               const aclTensorDesc *const inputDesc[],
                                               int numOutputs,
                                               const aclTensorDesc *const outputDesc[],
                                               const aclopAttr *opAttr,
                                               aclopHandle **handle);
 /**
 * @ingroup AscendCL
 * @brief destroy aclopHandle instance
 *
 * @param handle [IN]   pointer to the instance of aclopHandle
 */
 ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);
 /**
 * @ingroup AscendCL
 * @brief execute an op with the handle.
 *        can save op model matching cost compared with aclopExecute
 *
 * @param handle [IN]      pointer to the instance of aclopHandle.
 *                         The aclopCreateHandle interface has been called
 *                         in advance to create aclopHandle type data.
 * @param numInputs [IN]   number of inputs
 * @param inputs [IN]      pointer to array of input buffers.
 *                         The aclCreateDataBuffer interface has been called
 *                         in advance to create aclDataBuffer type data.
 * @param numOutputs [IN]  number of outputs
 * @param outputs [OUT]    pointer to array of output buffers
 * @param stream [IN]      stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclopCreateHandle | aclCreateDataBuffer
 */
 ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle,
                                                 int numInputs,
                                                 const aclDataBuffer *const inputs[],
                                                 int numOutputs,
                                                 aclDataBuffer *const outputs[],
                                                 aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief cast data type
 *
 * @param srcDesc [IN]     source tensor desc
 * @param srcBuffer [IN]   source tensor buffer
 * @param dstDesc [IN]     destination tensor desc
 * @param dstBuffer [OUT]  destination tensor buffer
 * @param truncate [IN]    do not truncate if value is 0, truncate otherwise
 * @param stream [IN]      stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc,
                                       const aclDataBuffer *srcBuffer,
                                       const aclTensorDesc *dstDesc,
                                       aclDataBuffer *dstBuffer,
                                       uint8_t truncate,
                                       aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief create a handle for casting datatype
 *
 * @param srcDesc [IN]    source tensor desc
 * @param dstDesc [IN]    destination tensor desc
 * @param truncate [IN]   do not truncate if value is 0, truncate otherwise
 * @param handle [OUT]    pointer to the pointer to the handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc,
                                                      aclTensorDesc *dstDesc,
                                                      uint8_t truncate,
                                                      aclopHandle **handle);
 /**
 * @ingroup AscendCL
 * @brief create kernel
 *
 * @param opType [IN]           op type
 * @param kernelId [IN]         kernel id
 * @param kernelName [IN]       kernel name
 * @param binData [IN]          kernel bin data
 * @param binSize [IN]          kernel bin size
 * @param enginetype [IN]       enigne type
 * @param deallocator [IN]      callback function for deallocating bin data,
 *                              null if bin data to be deallocated by caller
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclopCompile
 */
 ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType,
                                               const char *kernelId,
                                               const char *kernelName,
                                               void *binData,
                                               int binSize,
                                               aclopEngineType enginetype,
                                               aclDataDeallocator deallocator);
 /**
 * @ingroup AscendCL
 * @brief create kernel
 *
 * @param numInputs [IN]            number of inputs
 * @param inputDesc [IN]            pointer to array of input tensor descriptions
 * @param numOutputs [IN]           number of outputs
 * @param outputDesc [IN]           pointer to array of output tensor descriptions
 * @param opAttr [IN]               pointer to instance of aclopAttr
 * @param aclopKernelDesc [IN]      pointer to instance of aclopKernelDesc
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 typedef aclError (*aclopCompileFunc)(int numInputs,
                                     const aclTensorDesc *const inputDesc[],
                                     int numOutputs,
                                     const aclTensorDesc *const outputDesc[],
                                     const aclopAttr *opAttr,
                                     aclopKernelDesc *aclopKernelDesc);
 /**
 * @ingroup AscendCL
 * @brief register compile function
 *
 * @param opType [IN]         op type
 * @param func [IN]           compile function
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclopUnregisterCompileFunc
 */
 ACL_FUNC_VISIBILITY aclError aclopRegisterCompileFunc(const char *opType, aclopCompileFunc func);
 /**
 * @ingroup AscendCL
 * @brief unregister compile function
 *
 * @param opType [IN]         op type
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);
 /**
 * @ingroup AscendCL
 * @brief set kernel args
 *
 * @param kernelDesc [IN]               pointer to instance of aclopKernelDesc
 * @param kernelId [IN]                 kernel id
 * @param blockDim [IN]                 block dim
 * @param args [IN]                     args
 * @param argSize [IN]                  size in bytes of args
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc,
                                                const char *kernelId,
                                                uint32_t blockDim,
                                                const void *args,
                                                uint32_t argSize);
 /**
 * @ingroup AscendCL
 * @brief set workspace sizes
 *
 * @param kernelDesc [IN]               pointer to instance of aclopKernelDesc
 * @param numWorkspaces [IN]            number of workspaces
 * @param workspaceSizes [IN]           pointer to array of sizes of workspaces
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kernelDesc, int numWorkspaces,
                                                          size_t *workspaceSizes);
 /**
 * @ingroup AscendCL
 * @brief compile op with dynamic shape
 *
 * @param opType [IN]       op type
 * @param numInputs [IN]    number of inputs
 * @param inputDesc [IN]    pointer to array of input tensor descriptions
 * @param numOutputs [IN]   number of outputs
 * @param outputDesc [IN]   pointer to array of output tensor descriptions
 * @param attr [IN]         pointer to instance of aclopAttr.
 *                          may pass nullptr if the op has no attribute
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType,
                                               int numInputs,
                                               const aclTensorDesc *const inputDesc[],
                                               int numOutputs,
                                               const aclTensorDesc *const outputDesc[],
                                               const aclopAttr *attr);
 /**
 * @ingroup AscendCL
 * @brief inferShape the specified operator synchronously
 *
 * @param opType [IN]       type of op
 * @param numInputs [IN]    number of inputs
 * @param inputDesc [IN]    pointer to array of input tensor descriptions
 * @param inputs [IN]       pointer to array of input buffers
 * @param numOutputs [IN]   number of outputs
 * @param outputDesc [OUT]  pointer to array of output tensor descriptions
 * @param attr [IN]         pointer to instance of aclopAttr.
 *                          may pass nullptr if the op has no attribute
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType,
                                             int numInputs,
                                             aclTensorDesc *inputDesc[],
                                             aclDataBuffer *inputs[],
                                             int numOutputs,
                                             aclTensorDesc *outputDesc[],
                                             aclopAttr *attr);
 #ifdef __cplusplus
 }
 #endif
 #endif // INC_EXTERNAL_ACL_ACL_OP_H_
--- a/inc/external/acl/acl_op_compiler.h
+++ b/inc/external/acl/acl_op_compiler.h
@@ -0,0 +1,115 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
 #define INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
 #include "acl_base.h"
 #include "acl_op.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 typedef enum aclCompileType {
    ACL_COMPILE_SYS,
    ACL_COMPILE_UNREGISTERED
 } aclopCompileType;
 typedef enum {
    ACL_PRECISION_MODE,
    ACL_AICORE_NUM,
    ACL_AUTO_TUNE_MODE,
    ACL_OP_SELECT_IMPL_MODE,
    ACL_OPTYPELIST_FOR_IMPLMODE,
    ACL_OP_DEBUG_LEVEL,
    ACL_DEBUG_DIR,
    ACL_OP_COMPILER_CACHE_MODE,
    ACL_OP_COMPILER_CACHE_DIR
 } aclCompileOpt;
 /**
 * @ingroup AscendCL
 * @brief compile op
 *
 * @param opType [IN]           op type
 * @param numInputs [IN]        number of inputs
 * @param inputDesc [IN]        pointer to array of input tensor descriptions
 * @param numOutputs [IN]       number of outputs
 * @param outputDesc [IN]       pointer to array of output tensor descriptions
 * @param attr [IN]           pointer to instance of aclopAttr.
 *                              may pass nullptr if the op has no attribute
 * @param engineType [IN]       engine type
 * @param compileFlag [IN]      compile flag
 * @param opPath [IN]           path of op
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType,
                                          int numInputs,
                                          const aclTensorDesc *const inputDesc[],
                                          int numOutputs,
                                          const aclTensorDesc *const outputDesc[],
                                          const aclopAttr *attr,
                                          aclopEngineType engineType,
                                          aclopCompileType compileFlag,
                                          const char *opPath);
 /**
 * @ingroup AscendCL
 * @brief compile and execute op
 *
 * @param opType [IN]           op type
 * @param numInputs [IN]        number of inputs
 * @param inputDesc [IN]        pointer to array of input tensor descriptions
 * @param inputs [IN]           pointer to array of input buffers
 * @param numOutputs [IN]       number of outputs
 * @param outputDesc [IN]       pointer to array of output tensor descriptions
 * @param outputs [IN]          pointer to array of outputs buffers
 * @param attr [IN]             pointer to instance of aclopAttr.
 *                              may pass nullptr if the op has no attribute
 * @param engineType [IN]       engine type
 * @param compileFlag [IN]      compile flag
 * @param opPath [IN]           path of op
 * @param stream [IN]           stream handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType,
    int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
    int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
    const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag,
    const char *opPath, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief set compile option
 *
 * @param aclCompileOpt [IN]      compile option
 * @param value [IN]              pointer for the option value
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value);
 #ifdef __cplusplus
 }
 #endif
 #endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
--- a/inc/external/acl/acl_prof.h
+++ b/inc/external/acl/acl_prof.h
@@ -0,0 +1,329 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_EXTERNAL_ACL_PROF_H_
 #define INC_EXTERNAL_ACL_PROF_H_
 #include "acl_base.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 #define ACL_PROF_ACL_API                0x0001
 #define ACL_PROF_TASK_TIME              0x0002
 #define ACL_PROF_AICORE_METRICS         0x0004
 #define ACL_PROF_AICPU                  0x0008
 /**
 * @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead
 */
 #define ACL_PROF_MAX_OP_NAME_LEN        257
 #define ACL_PROF_MAX_OP_TYPE_LEN        65
 typedef enum {
    ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
    ACL_AICORE_PIPE_UTILIZATION = 1,
    ACL_AICORE_MEMORY_BANDWIDTH = 2,
    ACL_AICORE_L0B_AND_WIDTH = 3,
    ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
    ACL_AICORE_NONE = 0xFF
 } aclprofAicoreMetrics;
 typedef struct aclprofConfig aclprofConfig;
 typedef struct aclprofStopConfig aclprofStopConfig;
 typedef struct aclprofAicoreEvents aclprofAicoreEvents;
 typedef struct aclprofSubscribeConfig aclprofSubscribeConfig;
 /**
 * @ingroup AscendCL
 * @brief profiling initialize
 *
 * @param  profilerResultPath [IN]  path of profiling result
 * @param  length [IN]              length of profilerResultPath
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclprofFinalize
 */
 ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length);
 /**
 * @ingroup AscendCL
 * @brief profiling finalize
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclprofInit
 */
 ACL_FUNC_VISIBILITY aclError aclprofFinalize();
 /**
 * @ingroup AscendCL
 * @brief Start profiling modules by profilerConfig
 *
 * @param  profilerConfig [IN]  config of profiling
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclprofStop
 */
 ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig);
 /**
 * @ingroup AscendCL
 * @brief Create data of type aclprofConfig
 *
 * @param  deviceIdList [IN]      list of device id
 * @param  deviceNums [IN]        number of devices
 * @param  aicoreMetrics [IN]     type of aicore metrics
 * @param  aicoreEvents [IN]      pointer to aicore events, only support NULL now
 * @param  dataTypeConfig [IN]    config modules need profiling
 *
 * @retval the aclprofConfig pointer
 *
 * @see aclprofDestroyConfig
 */
 ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
    aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);
 /**
 * @ingroup AscendCL
 * @brief Destroy data of type aclprofConfig
 *
 * @param  profilerConfig [IN]  config of profiling
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclprofCreateConfig
 */
 ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig);
 /**
 * @ingroup AscendCL
 * @brief stop profiling modules by stopProfilingConfig
 *
 * @param  profilerConfig [IN]  pointer to stop config of profiling
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclprofStart
 */
 ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig);
 /**
 * @ingroup AscendCL
 * @brief subscribe profiling data of model
 *
 * @param  modelId [IN]              the model id subscribed
 * @param  profSubscribeConfig [IN]  pointer to config of model subscribe
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclprofModelUnSubscribe
 */
 ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId,
    const aclprofSubscribeConfig *profSubscribeConfig);
 /**
 * @ingroup AscendCL
 * @brief unsubscribe profiling data of model
 *
 * @param  modelId [IN]  the model id unsubscribed
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclprofModelSubscribe
 */
 ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId);
 /**
 * @ingroup AscendCL
 * @brief create subscribe config
 *
 * @param  timeInfoSwitch [IN] switch whether get time info from model
 * @param  aicoreMetrics [IN]  aicore metrics
 * @param  fd [IN]             pointer to write pipe
 *
 * @retval the aclprofSubscribeConfig pointer
 *
 * @see aclprofDestroySubscribeConfig
 */
 ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
    aclprofAicoreMetrics aicoreMetrics, void *fd);
 /**
 * @ingroup AscendCL
 * @brief destroy subscribe config
 *
 * @param  profSubscribeConfig [IN]  subscribe config
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclprofCreateSubscribeConfig
 */
 ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig);
 /**
 * @ingroup AscendCL
 * @brief create subscribe config
 *
 * @param  opDescSize [OUT]  size of op desc
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize);
 /**
 * @ingroup AscendCL
 * @brief get op number from subscription data
 *
 * @param  opInfo [IN]     pointer to subscription data
 * @param  opInfoLen [IN]  memory size of subscription data
 * @param  opNumber [OUT]  op number of subscription data
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber);
 /**
 * @ingroup AscendCL
 * @brief get length op type from subscription data
 *
 * @param  opInfo [IN]      pointer to subscription data
 * @param  opInfoLen [IN]   memory size of subscription data
 * @param  index [IN]       index of op array in opInfo
 * @param  opTypeLen [OUT]  actual length of op type string
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index,
    size_t *opTypeLen);
 /**
 * @ingroup AscendCL
 * @brief get op type from subscription data
 *
 * @param  opInfo [IN]      pointer to subscription data
 * @param  opInfoLen [IN]   memory size of subscription data
 * @param  index [IN]       index of op array in opInfo
 * @param  opType [OUT]     obtained op type string
 * @param  opTypeLen [IN]   obtained length of op type string
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index,
    char *opType, size_t opTypeLen);
 /**
 * @ingroup AscendCL
 * @brief get length op name from subscription data
 *
 * @param  opInfo [IN]      pointer to subscription data
 * @param  opInfoLen [IN]   memory size of subscription data
 * @param  index [IN]       index of op array in opInfo
 * @param  opNameLen [OUT]  actual length of op name string
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index,
    size_t *opNameLen);
 /**
 * @ingroup AscendCL
 * @brief get op type from subscription data
 *
 * @param  opInfo [IN]      pointer to subscription data
 * @param  opInfoLen [IN]   memory size of subscription data
 * @param  index [IN]       index of op array in opInfo
 * @param  opName [OUT]     obtained op name string
 * @param  opNameLen [IN]   obtained length of op name string
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index,
    char *opName, size_t opNameLen);
 /**
 * @ingroup AscendCL
 * @brief get start time of specified op from subscription data
 *
 * @param  opInfo [IN]     pointer to subscription data
 * @param  opInfoLen [IN]  memory size of subscription data
 * @param  index [IN]      index of op array in opInfo
 *
 * @retval start time(us) of specified op with timestamp
 * @retval 0 for failed
 */
 ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index);
 /**
 * @ingroup AscendCL
 * @brief get end time of specified op from subscription data
 *
 * @param  opInfo [IN]     pointer to subscription data
 * @param  opInfoLen [IN]  memory size of subscription data
 * @param  index [IN]      index of op array in opInfo
 *
 * @retval end time(us) of specified op with timestamp
 * @retval 0 for failed
 */
 ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index);
 /**
 * @ingroup AscendCL
 * @brief get excution time of specified op from subscription data
 *
 * @param  opInfo [IN]     pointer to subscription data
 * @param  opInfoLen [IN]  memory size of subscription data
 * @param  index [IN]      index of op array in opInfo
 *
 * @retval execution time(us) of specified op with timestamp
 * @retval 0 for failed
 */
 ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index);
 /**
 * @ingroup AscendCL
 * @brief get model id from subscription data
 *
 * @param  opInfo [IN]     pointer to subscription data
 * @param  opInfoLen [IN]  memory size of subscription data
 *
 * @retval model id of subscription data
 * @retval 0 for failed
 */
 ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);
 #ifdef __cplusplus
 }
 #endif
 #endif // INC_EXTERNAL_ACL_PROF_H_
--- a/inc/external/acl/acl_rt.h
+++ b/inc/external/acl/acl_rt.h
@@ -0,0 +1,965 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_EXTERNAL_ACL_ACL_RT_H_
 #define INC_EXTERNAL_ACL_ACL_RT_H_
 #include <stdint.h>
 #include <stddef.h>
 #include "acl_base.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 #define ACL_EVENT_TIME_LINE 0x00000008u
 typedef enum aclrtRunMode {
    ACL_DEVICE,
    ACL_HOST,
 } aclrtRunMode;
 typedef enum aclrtTsId {
    ACL_TS_ID_AICORE   = 0,
    ACL_TS_ID_AIVECTOR = 1,
    ACL_TS_ID_RESERVED = 2,
 } aclrtTsId;
 typedef enum aclrtEventStatus {
    ACL_EVENT_STATUS_COMPLETE  = 0,
    ACL_EVENT_STATUS_NOT_READY = 1,
    ACL_EVENT_STATUS_RESERVED  = 2,
 } aclrtEventStatus;
 typedef enum aclrtCallbackBlockType {
    ACL_CALLBACK_NO_BLOCK,
    ACL_CALLBACK_BLOCK,
 } aclrtCallbackBlockType;
 typedef enum aclrtMemcpyKind {
    ACL_MEMCPY_HOST_TO_HOST,
    ACL_MEMCPY_HOST_TO_DEVICE,
    ACL_MEMCPY_DEVICE_TO_HOST,
    ACL_MEMCPY_DEVICE_TO_DEVICE,
 } aclrtMemcpyKind;
 typedef enum aclrtMemMallocPolicy {
    ACL_MEM_MALLOC_HUGE_FIRST,
    ACL_MEM_MALLOC_HUGE_ONLY,
    ACL_MEM_MALLOC_NORMAL_ONLY,
    ACL_MEM_MALLOC_HUGE_FIRST_P2P,
    ACL_MEM_MALLOC_HUGE_ONLY_P2P,
    ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
 } aclrtMemMallocPolicy;
 typedef enum aclrtMemAttr {
    ACL_DDR_MEM,
    ACL_HBM_MEM,
    ACL_DDR_MEM_HUGE,
    ACL_DDR_MEM_NORMAL,
    ACL_HBM_MEM_HUGE,
    ACL_HBM_MEM_NORMAL,
    ACL_DDR_MEM_P2P_HUGE,
    ACL_DDR_MEM_P2P_NORMAL,
    ACL_HBM_MEM_P2P_HUGE,
    ACL_HBM_MEM_P2P_NORMAL,
 } aclrtMemAttr;
 typedef enum aclrtGroupAttr {
    ACL_GROUP_AICORE_INT,
    ACL_GROUP_AIV_INT,
    ACL_GROUP_AIC_INT,
    ACL_GROUP_SDMANUM_INT,
    ACL_GROUP_ASQNUM_INT,
    ACL_GROUP_GROUPID_INT
 } aclrtGroupAttr;
 typedef struct tagRtGroupInfo aclrtGroupInfo;
 typedef struct rtExceptionInfo aclrtExceptionInfo;
 typedef void (*aclrtCallback)(void *userData);
 typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo);
 /**
 * @ingroup AscendCL
 * @brief Set a callback function to handle exception information
 *
 * @param callback [IN] callback function to handle exception information
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback);
 /**
 * @ingroup AscendCL
 * @brief Get task id from exception information
 *
 * @param info [IN]   pointer of exception information
 *
 * @retval The task id from exception information
 * @retval 0xFFFFFFFF if info is null
 */
 ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info);
 /**
 * @ingroup AscendCL
 * @brief Get stream id from exception information
 *
 * @param info [IN]   pointer of exception information
 *
 * @retval The stream id from exception information
 * @retval 0xFFFFFFFF if info is null
 */
 ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info);
 /**
 * @ingroup AscendCL
 * @brief Get thread id from exception information
 *
 * @param info [IN]   pointer of exception information
 *
 * @retval The thread id of fail task
 * @retval 0xFFFFFFFF if info is null
 */
 ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info);
 /**
 * @ingroup AscendCL
 * @brief Get device id from exception information
 *
 * @param info [IN]   pointer of exception information
 *
 * @retval The thread id of fail task
 * @retval 0xFFFFFFFF if info is null
 */
 ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info);
 /**
 * @ingroup AscendCL
 * @brief The thread that handles the callback function on the Stream
 *
 * @param threadId [IN] thread ID
 * @param stream [IN]   stream handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief Add a callback function to be executed on the host
 *        to the task queue of the Stream
 *
 * @param fn [IN]   Specify the callback function to be added
 *                  The function prototype of the callback function is:
 *                  typedef void (*aclrtCallback)(void *userData);
 * @param userData [IN]   User data to be passed to the callback function
 * @param blockType [IN]  callback block type
 * @param stream [IN]     stream handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType,
                                                 aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief After waiting for a specified time, trigger callback processing
 *
 * @par Function
 *  The thread processing callback specified by
 *  the aclrtSubscribeReport interface
 *
 * @param timeout [IN]   timeout value
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtSubscribeReport
 */
 ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout);
 /**
 * @ingroup AscendCL
 * @brief Cancel thread registration,
 *        the callback function on the specified Stream
 *        is no longer processed by the specified thread
 *
 * @param threadId [IN]   thread ID
 * @param stream [IN]     stream handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief create context and associates it with the calling thread
 *
 * @par Function
 * The following use cases are supported:
 * @li If you don't call the aclrtCreateContext interface
 * to explicitly create the context,
 * the system will use the default context, which is implicitly created
 * when the aclrtSetDevice interface is called.
 * @li If multiple contexts are created in a process
 * (there is no limit on the number of contexts),
 * the current thread can only use one of them at the same time.
 * It is recommended to explicitly specify the context of the current thread
 * through the aclrtSetCurrentContext interface to increase.
 * the maintainability of the program.
 *
 * @param  context [OUT]    point to the created context
 * @param  deviceId [IN]    device to create context on
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtSetDevice | aclrtSetCurrentContext
 */
 ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId);
 /**
 * @ingroup AscendCL
 * @brief destroy context instance
 *
 * @par Function
 * Can only destroy context created through aclrtCreateContext interface
 *
 * @param  context [IN]   the context to destroy
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtCreateContext
 */
 ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context);
 /**
 * @ingroup AscendCL
 * @brief set the context of the thread
 *
 * @par Function
 * The following scenarios are supported:
 * @li If the aclrtCreateContext interface is called in a thread to explicitly
 * create a Context (for example: ctx1), the thread's Context can be specified
 * without calling the aclrtSetCurrentContext interface.
 * The system uses ctx1 as the context of thread1 by default.
 * @li If the aclrtCreateContext interface is not explicitly created,
 * the system uses the default context as the context of the thread.
 * At this time, the aclrtDestroyContext interface cannot be used to release
 * the default context.
 * @li If the aclrtSetCurrentContext interface is called multiple times to
 * set the thread's Context, the last one prevails.
 *
 * @par Restriction
 * @li If the cevice corresponding to the context set for the thread
 * has been reset, you cannot set the context as the context of the thread,
 * otherwise a business exception will result.
 * @li It is recommended to use the context created in a thread.
 * If the aclrtCreateContext interface is called in thread A to create a context,
 * and the context is used in thread B,
 * the user must guarantee the execution order of tasks in the same stream
 * under the same context in two threads.
 *
 * @param  context [IN]   the current context of the thread
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtCreateContext | aclrtDestroyContext
 */
 ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context);
 /**
 * @ingroup AscendCL
 * @brief get the context of the thread
 *
 * @par Function
 * If the user calls the aclrtSetCurrentContext interface
 * multiple times to set the context of the current thread,
 * then the last set context is obtained
 *
 * @param  context [OUT]   the current context of the thread
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtSetCurrentContext
 */
 ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context);
 /**
 * @ingroup AscendCL
 * @brief Specify the device to use for the operation
 * implicitly create the default context and the default stream
 *
 * @par Function
 * The following use cases are supported:
 * @li Device can be specified in the process or thread.
 * If you call the aclrtSetDevice interface multiple
 * times to specify the same device,
 * you only need to call the aclrtResetDevice interface to reset the device.
 * @li The same device can be specified for operation
 *  in different processes or threads.
 * @li Device is specified in a process,
 * and multiple threads in the process can share this device to explicitly
 * create a Context (aclrtCreateContext interface).
 * @li In multi-device scenarios, you can switch to other devices
 * through the aclrtSetDevice interface in the process.
 *
 * @param  deviceId [IN]  the device id
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtResetDevice |aclrtCreateContext
 */
 ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId);
 /**
 * @ingroup AscendCL
 * @brief Reset the current operating Device and free resources on the device,
 * including the default context, the default stream,
 * and all streams created under the default context,
 * and synchronizes the interface.
 * If the task under the default context or stream has not been completed,
 * the system will wait for the task to complete before releasing it.
 *
 * @par Restriction
 * @li The Context, Stream, and Event that are explicitly created
 * on the device to be reset. Before resetting,
 * it is recommended to follow the following interface calling sequence,
 * otherwise business abnormalities may be caused.
 * @li Interface calling sequence:
 * call aclrtDestroyEvent interface to release Event or
 * call aclrtDestroyStream interface to release explicitly created Stream->
 * call aclrtDestroyContext to release explicitly created Context->
 * call aclrtResetDevice interface
 *
 * @param  deviceId [IN]   the device id
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId);
 /**
 * @ingroup AscendCL
 * @brief get target device of current thread
 *
 * @param deviceId [OUT]  the device id
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId);
 /**
 * @ingroup AscendCL
 * @brief get target side
 *
 * @param runMode [OUT]    the run mode
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode);
 /**
 * @ingroup AscendCL
 * @brief Wait for compute device to finish
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void);
 /**
 * @ingroup AscendCL
 * @brief Set Scheduling TS
 *
 * @param tsId [IN]   the ts id
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId);
 /**
 * @ingroup AscendCL
 * @brief get total device number.
 *
 * @param count [OUT]    the device number
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count);
 /**
 * @ingroup AscendCL
 * @brief create event instance
 *
 * @param event [OUT]   created event
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event);
 /**
 * @ingroup AscendCL
 * @brief create event instance with flag
 *
 * @param event [OUT]   created event
 * @param flag [IN]     event flag
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtCreateEventWithFlag(aclrtEvent *event, uint32_t flag);
 /**
 * @ingroup AscendCL
 * @brief destroy event instance
 *
 * @par Function
 *  Only events created through the aclrtCreateEvent interface can be
 *  destroyed, synchronous interfaces. When destroying an event,
 *  the user must ensure that the tasks involved in the aclrtSynchronizeEvent
 *  interface or the aclrtStreamWaitEvent interface are completed before
 *  they are destroyed.
 *
 * @param  event [IN]   event to destroy
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent
 */
 ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event);
 /**
 * @ingroup AscendCL
 * @brief Record an Event in the Stream
 *
 * @param event [IN]    event to record
 * @param stream [IN]   stream handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief Reset an event
 *
 * @par Function
 *  Users need to make sure to wait for the tasks in the Stream
 *  to complete before resetting the Event
 *
 * @param event [IN]    event to reset
 * @param stream [IN]   stream handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief Queries an event's status
 *
 * @param  event [IN]    event to query
 * @param  status [OUT]  event status
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status);
 /**
 * @ingroup AscendCL
 * @brief Block Host Running, wait event to be complete
 *
 * @param  event [IN]   event to wait
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event);
 /**
 * @ingroup AscendCL
 * @brief computes the elapsed time between events.
 *
 * @param ms [OUT]     time between start and end in ms
 * @param start [IN]   starting event
 * @param end [IN]     ending event
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream
 */
 ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end);
 /**
 * @ingroup AscendCL
 * @brief alloc memory on device
 *
 * @par Function
 *  alloc for size linear memory on device
 *  and return a pointer to allocated memory by *devPtr
 *
 * @par Restriction
 * @li The memory requested by the aclrtMalloc interface needs to be released
 * through the aclrtFree interface.
 * @li Before calling the media data processing interface,
 * if you need to apply memory on the device to store input or output data,
 * you need to call acldvppMalloc to apply for memory.
 *
 * @param devPtr [OUT]  pointer to pointer to allocated memory on device
 * @param size [IN]     alloc memory size
 * @param policy [IN]   memory alloc policy
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtFree | acldvppMalloc | aclrtMallocCached
 */
 ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr,
                                         size_t size,
                                         aclrtMemMallocPolicy policy);
 /**
 * @ingroup AscendCL
 * @brief allocate memory on device with cache
 *
 * @par Function
 *  alloc for size linear memory on device
 *  and return a pointer to allocated memory by *devPtr
 *
 * @par Restriction
 * @li The memory requested by the aclrtMallocCached interface needs to be released
 * through the aclrtFree interface.
 *
 * @param devPtr [OUT]  pointer to pointer to allocated memory on device
 * @param size [IN]     alloc memory size
 * @param policy [IN]   memory alloc policy
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtFree | aclrtMalloc
 */
 ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr,
                                               size_t size,
                                               aclrtMemMallocPolicy policy);
 /**
 * @ingroup AscendCL
 * @brief flush cache data to ddr
 *
 * @param devPtr [IN]  the pointer that flush data to ddr
 * @param size [IN]    flush size
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size);
 /**
 * @ingroup AscendCL
 * @brief invalidate cache data
 *
 * @param devPtr [IN]  pointer to invalidate cache data
 * @param size [IN]    invalidate size
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size);
 /**
 * @ingroup AscendCL
 * @brief free device memory
 *
 * @par Function
 *  can only free memory allocated through the aclrtMalloc interface
 *
 * @param  devPtr [IN]  Pointer to memory to be freed
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtMalloc
 */
 ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr);
 /**
 * @ingroup AscendCL
 * @brief alloc memory on host
 *
 * @par Restriction
 * @li The requested memory cannot be used in the Device
 * and needs to be explicitly copied to the Device.
 * @li The memory requested by the aclrtMallocHost interface
 * needs to be released through the aclrtFreeHost interface.
 *
 * @param  hostPtr [OUT] pointer to pointer to allocated memory on the host
 * @param  size [IN]     alloc memory size
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtFreeHost
 */
 ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size);
 /**
 * @ingroup AscendCL
 * @brief free host memory
 *
 * @par Function
 *  can only free memory allocated through the aclrtMallocHost interface
 *
 * @param  hostPtr [IN]   free memory pointer
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtMallocHost
 */
 ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);
 /**
 * @ingroup AscendCL
 * @brief synchronous memory replication between host and device
 *
 * @param dst [IN]       destination address pointer
 * @param destMax [IN]   Max length of the destination address memory
 * @param src [IN]       source address pointer
 * @param count [IN]     the length of byte to copy
 * @param kind [IN]      memcpy type
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst,
                                         size_t destMax,
                                         const void *src,
                                         size_t count,
                                         aclrtMemcpyKind kind);
 /**
 * @ingroup AscendCL
 * @brief Initialize memory and set contents of memory to specified value
 *
 * @par Function
 *  The memory to be initialized is on the Host or device side,
 *  and the system determines whether
 *  it is host or device according to the address
 *
 * @param devPtr [IN]    Starting address of memory
 * @param maxCount [IN]  Max length of destination address memory
 * @param value [IN]     Set value
 * @param count [IN]     The length of memory
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count);
 /**
 * @ingroup AscendCL
 * @brief  Asynchronous memory replication between Host and Device
 *
 * @par Function
 *  After calling this interface,
 *  be sure to call the aclrtSynchronizeStream interface to ensure that
 *  the task of memory replication has been completed
 *
 * @par Restriction
 * @li For on-chip Device-to-Device memory copy,
 *     both the source and destination addresses must be 64-byte aligned
 *
 * @param dst [IN]     destination address pointer
 * @param destMax [IN] Max length of destination address memory
 * @param src [IN]     source address pointer
 * @param count [IN]   the number of byte to copy
 * @param kind [IN]    memcpy type
 * @param stream [IN]  asynchronized task stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtSynchronizeStream
 */
 ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst,
                                              size_t destMax,
                                              const void *src,
                                              size_t count,
                                              aclrtMemcpyKind kind,
                                              aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief Asynchronous initialize memory
 * and set contents of memory to specified value async
 *
 * @par Function
 *  The memory to be initialized is on the Host or device side,
 *  and the system determines whether
 *  it is host or device according to the address
 *
 * @param devPtr [IN]      destination address pointer
 * @param maxCount [IN]    Max length of destination address memory
 * @param value [IN]       set value
 * @param count [IN]       the number of byte to set
 * @param stream [IN]      asynchronized task stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtSynchronizeStream
 */
 ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr,
                                              size_t maxCount,
                                              int32_t value,
                                              size_t count,
                                              aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief  create stream instance
 *
 * @param  stream [OUT]   the created stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream);
 /**
 * @ingroup AscendCL
 * @brief destroy stream instance
 *
 * @par Function
 * Can only destroy streams created through the aclrtCreateStream interface
 *
 * @par Restriction
 * Before calling the aclrtDestroyStream interface to destroy
 * the specified Stream, you need to call the aclrtSynchronizeStream interface
 * to ensure that the tasks in the Stream have been completed.
 *
 * @param stream [IN]  the stream to destroy
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtCreateStream | aclrtSynchronizeStream
 */
 ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief block the host until all tasks
 * in the specified stream have completed
 *
 * @param  stream [IN]   the stream to wait
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief Blocks the operation of the specified Stream until
 * the specified Event is completed.
 * Support for multiple streams waiting for the same event.
 *
 * @param  stream [IN]   the wait stream If using thedefault Stream, set NULL
 * @param  event [IN]    the event to wait
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event);
 /**
 * @ingroup AscendCL
 * @brief set group
 *
 * @par Function
 *  set the task to the corresponding group
 *
 * @param groupId [IN]   group id
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail
 */
 ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId);
 /**
 * @ingroup AscendCL
 * @brief get the number of group
 *
 * @par Function
 *  get the number of group. if the number of group is zero,
 *  it means that group is not supported or group is not created.
 *
 * @param count [OUT]   the number of group
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 */
 ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count);
 /**
 * @ingroup AscendCL
 * @brief create group information
 *
 * @retval null for failed.
 * @retval OtherValues success.
 *
 * @see aclrtDestroyGroupInfo
 */
 ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo();
 /**
 * @ingroup AscendCL
 * @brief destroy group information
 *
 * @param groupInfo [IN]   pointer to group information
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtCreateGroupInfo
 */
 ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo);
 /**
 * @ingroup AscendCL
 * @brief get all group information
 *
 * @param groupInfo [OUT]   pointer to group information
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtGetGroupCount
 */
 ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);
 /**
 * @ingroup AscendCL
 * @brief get detail information of group
 *
 * @param groupInfo [IN]    pointer to group information
 * @param groupIndex [IN]   group index value
 * @param attr [IN]         group attribute
 * @param attrValue [OUT]   pointer to attribute value
 * @param valueLen [IN]     length of attribute value
 * @param paramRetSize [OUT]   pointer to real length of attribute value
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtGetGroupCount | aclrtGetAllGroupInfo
 */
 ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo,
                                                     int32_t groupIndex,
                                                     aclrtGroupAttr attr,
                                                     void *attrValue,
                                                     size_t valueLen,
                                                     size_t *paramRetSize);
 /**
 * @ingroup AscendCL
 * @brief checking whether current device and peer device support the p2p feature
 *
 * @param canAccessPeer [OUT]   pointer to save the checking result
 * @param deviceId [IN]         current device id
 * @param peerDeviceId [IN]     peer device id
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess
 */
 ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId);
 /**
 * @ingroup AscendCL
 * @brief enable the peer device to support the p2p feature
 *
 * @param peerDeviceId [IN]   the peer device id
 * @param flags [IN]   reserved field, now it must be zero
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess
 */
 ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags);
 /**
 * @ingroup AscendCL
 * @brief disable the peer device to support the p2p function
 *
 * @param peerDeviceId [IN]   the peer device id
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess
 */
 ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId);
 /**
 * @ingroup AscendCL
 * @brief Obtain the free memory and total memory of specified attribute.
 * the specified memory include normal memory and huge memory.
 *
 * @param attr [IN]    the memory attribute of specified device
 * @param free [OUT]   the free memory of specified device
 * @param total [OUT]  the total memory of specified device.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total);
 #ifdef __cplusplus
 }
 #endif
 #endif // INC_EXTERNAL_ACL_ACL_RT_H_
--- a/inc/external/acl/acl_tdt.h
+++ b/inc/external/acl/acl_tdt.h
@@ -0,0 +1,283 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_EXTERNAL_ACL_ACL_TDT_H_
 #define INC_EXTERNAL_ACL_ACL_TDT_H_
 #include "acl/acl_base.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 enum acltdtTensorType {
    ACL_TENSOR_DATA_UNDEFINED = -1,
    ACL_TENSOR_DATA_TENSOR,
    ACL_TENSOR_DATA_END_OF_SEQUENCE,
    ACL_TENSOR_DATA_ABNORMAL
 };
 typedef struct acltdtDataItem acltdtDataItem;
 typedef struct acltdtDataset acltdtDataset;
 typedef struct acltdtChannelHandle acltdtChannelHandle;
 /**
 * @ingroup AscendCL
 * @brief Get tensor type from item
 *
 * @param dataItem [IN] pointer to the data item
 *
 * @retval Tensor type.
 * @retval ACL_DT_UNDEFINED if dataItem is null
 */
 ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem);
 /**
 * @ingroup AscendCL
 * @brief Get data type from item
 *
 * @param dataItem [IN] pointer to the data item
 *
 * @retval Data type.
 * @retval ACL_DT_UNDEFINED if dataItem is null
 */
 ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem);
 /**
 * @ingroup AscendCL
 * @brief Get data address from item
 *
 * @param dataItem [IN] pointer to data item
 *
 * @retval null for failed
 * @retval OtherValues success
 */
 ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);
 /**
 * @ingroup AscendCL
 * @brief Get data size from item
 *
 * @param dataItem [IN] pointer to data item
 *
 * @retval 0 for failed
 * @retval OtherValues success
 */
 ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);
 /**
 * @ingroup AscendCL
 * @brief Get dim's number from item
 *
 * @param dataItem [IN] pointer to data item
 *
 * @retval 0 for failed
 * @retval OtherValues success
 */
 ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);
 /**
 * @ingroup AscendCL
 * @brief Get dims from item
 *
 * @param  dataItem [IN]      the struct of data item
 * @param  dims [IN|OUT]      pointer to the dims of dataTtem
 * @param  dimNum [IN]        the size of the dims
 *
 * @retval ACL_SUCCESS  The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum);
 /**
 * @ingroup AscendCL
 * @brief Create the struct of data item
 *
 * @param tdtType [IN]  Tdt tensor type
 * @param dims [IN]     pointer of tdtDataItem's dims
 * @param dimNum [IN]   Dim number
 * @param dataType [IN] Data type
 * @param data [IN]     Data pointer
 * @param size [IN]     Data size
 *
 * @retval null for failed
 * @retval OtherValues success
 *
 * @see acltdtDestroyDataItem
 */
 ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType,
                                                         const int64_t *dims,
                                                         size_t dimNum,
                                                         aclDataType dataType,
                                                         void *data,
                                                         size_t size);
 /**
 * @ingroup AscendCL
 * @brief Destroy the struct of data item
 *
 * @param dataItem [IN]  pointer to the data item
 *
 * @retval ACL_SUCCESS  The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see acltdtCreateDataItem
 */
 ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem);
 /**
 * @ingroup AscendCL
 * @brief Create the tdt dataset
 *
 * @retval null for failed
 * @retval OtherValues success
 *
 * @see acltdtDestroyDataset
 */
 ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset();
 /**
 * @ingroup AscendCL
 * @brief Destroy the tdt dataset
 *
 * @param dataset [IN]  pointer to the dataset
 *
 * @retval ACL_SUCCESS  The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see acltdtCreateDataset
 */
 ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset);
 /**
 * @ingroup AscendCL
 * @brief Get the data item
 *
 * @param dataset [IN] pointer to the dataset
 * @param index [IN]   index of the dataset
 *
 * @retval null for failed
 * @retval OtherValues success
 *
 * @see acltdtAddDataItem
 */
 ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index);
 /**
 * @ingroup AscendCL
 * @brief Get the data item
 *
 * @param dataset [OUT] pointer to the dataset
 * @param dataItem [IN] pointer to the data item
 *
 * @retval ACL_SUCCESS  The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see acltdtGetDataItem
 */
 ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem);
 /**
 * @ingroup AscendCL
 * @brief Get the size of dataset
 *
 * @param dataset [IN]  pointer to the dataset
 *
 * @retval 0 for failed
 * @retval OtherValues success
 */
 ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset);
 /**
 * @ingroup AscendCL
 * @brief Stop the channel
 *
 * @param handle [IN]  pointer to the channel handle
 *
 * @retval ACL_SUCCESS  The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see acltdtCreateChannel | acltdtDestroyChannel
 */
 ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle);
 /**
 * @ingroup AscendCL
 * @brief Create the channel
 *
 * @param deviceId [IN]  the device id
 * @param name [IN]      the channel's name
 *
 * @retval null for failed
 * @retval OtherValues success
 *
 * @see acltdtStopChannel | acltdtDestroyChannel
 */
 ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name);
 /**
 * @ingroup AscendCL
 * @brief Destroy the channel
 *
 * @param handle [IN]  pointer to the channel handle
 *
 * @retval ACL_SUCCESS  The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see acltdtCreateChannel | acltdtStopChannel
 */
 ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);
 /**
 * @ingroup AscendCL
 * @brief Send tensor to device
 *
 * @param handle [IN]  pointer to the channel handle
 * @param dataset [IN] pointer to the dataset
 * @param timeout [IN] to be reserved, now it must be -1
 *
 * @retval ACL_SUCCESS  The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see acltdtReceiveTensor
 */
 ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle,
                                              const acltdtDataset *dataset,
                                              int32_t timeout);
 /**
 * @ingroup AscendCL
 * @brief Receive tensor from device
 *
 * @param handle [IN]      pointer to the channel handle
 * @param dataset [OUT]    pointer to the dataset
 * @param timeout [IN]     to be reserved, now it must be -1
 *
 * @retval ACL_SUCCESS  The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see acltdtSendTensor
 */
 ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle,
                                                 acltdtDataset *dataset,
                                                 int32_t timeout);
 #ifdef __cplusplus
 }
 #endif
 #endif //INC_EXTERNAL_ACL_ACL_TDT_H_
--- a/inc/external/acl/error_codes/ge_error_codes.h
+++ b/inc/external/acl/error_codes/ge_error_codes.h
@@ -0,0 +1,75 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
 #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_
 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000;
 static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009;
 static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011;
 static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012;
 static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013;
 static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014;
 static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015;
 static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
 static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
 static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
 static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
 static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020;
 static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021;
 static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022;
 static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
 static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
 static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;
 static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005;
 static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006;
 static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007;
 static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008;
 static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009;
 #ifdef __cplusplus
 }  // namespace ge
 #endif
 #endif  // INC_EXTERNAL_GE_GE_ERROR_CODES_H_
--- a/inc/external/acl/error_codes/rt_error_codes.h
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -0,0 +1,102 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
 #define __INC_EXTERNEL_RT_ERROR_CODES_H__
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 static const int32_t ACL_RT_SUCCESS                          = 0; // success
 static const int32_t ACL_ERROR_RT_PARAM_INVALID              = 107000; // param invalid
 static const int32_t ACL_ERROR_RT_INVALID_DEVICEID           = 107001; // invalid device id
 static const int32_t ACL_ERROR_RT_CONTEXT_NULL               = 107002; // current context null
 static const int32_t ACL_ERROR_RT_STREAM_CONTEXT             = 107003; // stream not in current context
 static const int32_t ACL_ERROR_RT_MODEL_CONTEXT              = 107004; // model not in current context
 static const int32_t ACL_ERROR_RT_STREAM_MODEL               = 107005; // stream not in model
 static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID    = 107006; // event timestamp invalid
 static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL   = 107007; // event timestamp reversal
 static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED             = 107008; // memory address unaligned
 static const int32_t ACL_ERROR_RT_FILE_OPEN                  = 107009; // open file failed
 static const int32_t ACL_ERROR_RT_FILE_WRITE                 = 107010; // write file failed
 static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE           = 107011; // error subscribe stream
 static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE           = 107012; // error subscribe thread
 static const int32_t ACL_ERROR_RT_GROUP_NOT_SET              = 107013; // group not set
 static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE           = 107014; // group not create
 static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG           = 107015; // callback not register to stream
 static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE        = 107016; // invalid memory type
 static const int32_t ACL_ERROR_RT_INVALID_HANDLE             = 107017; // invalid handle
 static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE        = 107018; // invalid malloc type
 static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT        = 207000; // feature not support
 static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION          = 207001; // memory allocation error
 static const int32_t ACL_ERROR_RT_MEMORY_FREE                = 207002; // memory free error
 static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW           = 207003; // aicore over flow
 static const int32_t ACL_ERROR_RT_NO_DEVICE                  = 207004; // no device
 static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL        = 207005; // resource alloc fail
 static const int32_t ACL_ERROR_RT_NO_PERMISSION              = 207006; // no permission
 static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE          = 207007; // no event resource
 static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE         = 207008; // no stream resource
 static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE         = 207009; // no notify resource
 static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE          = 207010; // no model resource
 static const int32_t ACL_ERROR_RT_INTERNAL_ERROR             = 507000; // runtime internal error
 static const int32_t ACL_ERROR_RT_TS_ERROR                   = 507001; // ts internel error
 static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL           = 507002; // task full in stream
 static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY          = 507003; // task empty in stream
 static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE        = 507004; // stream not complete
 static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE            = 507005; // end of sequence
 static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE         = 507006; // event not complete
 static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR      = 507007; // context release error
 static const int32_t ACL_ERROR_RT_SOC_VERSION                = 507008; // soc version error
 static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT      = 507009; // task type not support
 static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT             = 507010; // ts lost heartbeat
 static const int32_t ACL_ERROR_RT_MODEL_EXECUTE              = 507011; // model execute failed
 static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT             = 507012; // report timeout
 static const int32_t ACL_ERROR_RT_SYS_DMA                    = 507013; // sys dma error
 static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT             = 507014; // aicore timeout
 static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION           = 507015; // aicore exception
 static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION      = 507016; // aicore trap exception
 static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT              = 507017; // aicpu timeout
 static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION            = 507018; // aicpu exception
 static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR     = 507019; // aicpu datadump response error
 static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR        = 507020; // aicpu model operate response error
 static const int32_t ACL_ERROR_RT_PROFILING_ERROR            = 507021; // profiling error
 static const int32_t ACL_ERROR_RT_IPC_ERROR                  = 507022; // ipc error
 static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL         = 507023; // model abort normal
 static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING       = 507024; // kernel unregistering
 static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT        = 507025; // ringbuffer not init
 static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA         = 507026; // ringbuffer no data
 static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP              = 507027; // kernel lookup error
 static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE           = 507028; // kernel register duplicate
 static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL        = 507029; // debug register failed
 static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL      = 507030; // debug unregister failed
 static const int32_t ACL_ERROR_RT_LABEL_CONTEXT              = 507031; // label not in current context
 static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT            = 507032; // program register num use out
 static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR            = 507033; // device setup error
 static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR         = 507899; // drv internal error
 static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR       = 507900; // aicpu internal error
 #ifdef __cplusplus
 }
 #endif
 #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
--- a/inc/external/acl/ops/acl_cblas.h
+++ b/inc/external/acl/ops/acl_cblas.h
@@ -0,0 +1,431 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
 #define INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
 #include "acl/acl.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 typedef enum aclTransType {
    ACL_TRANS_N,
    ACL_TRANS_T,
    ACL_TRANS_NZ,
    ACL_TRANS_NZ_T
 } aclTransType;
 typedef enum aclComputeType {
    ACL_COMPUTE_HIGH_PRECISION,
    ACL_COMPUTE_LOW_PRECISION
 } aclComputeType;
 /**
 * @ingroup AscendCL
 * @brief perform the matrix-vector multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param m [IN]           number of rows of matrix A
 * @param n [IN]           number of columns of matrix A
 * @param alpha [IN]       pointer to scalar used for multiplication.
 *                         of same type as dataTypeC
 * @param a [IN]           pointer to matrix A
 * @param lda [IN]         leading dimension used to store the matrix A
 * @param dataTypeA [IN]   datatype of matrix A
 * @param x [IN]           pointer to vector x
 * @param incx [IN]        stride between consecutive elements of vector x
 * @param dataTypeX [IN]   datatype of vector x
 * @param beta [IN]        pointer to scalar used for multiplication.
 *                         of same type as dataTypeC If beta == 0,
 *                         then y does not have to be a valid input
 * @param y [IN|OUT]       pointer to vector y
 * @param incy [IN]        stride between consecutive elements of vector y
 * @param dataTypeY [IN]   datatype of vector y
 * @param type [IN]        computation type
 * @param stream [IN]      stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n,
    const void *alpha, const void *a, int lda, aclDataType dataTypeA,
    const void *x, int incx, aclDataType dataTypeX,
    const void *beta, void *y, int incy, aclDataType dataTypeY,
    aclComputeType type, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief create a handle for performing the matrix-vector multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param m [IN]           number of rows of matrix A
 * @param n [IN]           number of columns of matrix A
 * @param dataTypeA [IN]   datatype of matrix A
 * @param dataTypeX [IN]   datatype of vector x
 * @param dataTypeY [IN]   datatype of vector y
 * @param type [IN]        computation type
 * @param handle [OUT]     pointer to the pointer to the handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA,
                                                          int m,
                                                          int n,
                                                          aclDataType dataTypeA,
                                                          aclDataType dataTypeX,
                                                          aclDataType dataTypeY,
                                                          aclComputeType type,
                                                          aclopHandle **handle);
 /**
 * @ingroup AscendCL
 * @brief perform the matrix-vector multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param m [IN]           number of rows of matrix A
 * @param n [IN]           number of columns of matrix A
 * @param alpha [IN]       pointer to scalar used for multiplication
 * @param a [IN]           pointer to matrix A
 * @param lda [IN]         leading dimension used to store the matrix A
 * @param x [IN]           pointer to vector x
 * @param incx [IN]        stride between consecutive elements of vector x
 * @param beta [IN]        pointer to scalar used for multiplication.
 *                         If beta value == 0,
 *                         then y does not have to be a valid input
 * @param y [IN|OUT]       pointer to vector y
 * @param incy [IN]        stride between consecutive elements of vector y
 * @param type [IN]        computation type
 * @param stream [IN]      stream
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA,
                                          int m,
                                          int n,
                                          const aclFloat16 *alpha,
                                          const aclFloat16 *a,
                                          int lda,
                                          const aclFloat16 *x,
                                          int incx,
                                          const aclFloat16 *beta,
                                          aclFloat16 *y,
                                          int incy,
                                          aclComputeType type,
                                          aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief create a handle for performing the matrix-vector multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param m [IN]           number of rows of matrix A
 * @param n [IN]           number of columns of matrix A
 * @param type [IN]        computation type
 * @param handle [OUT]     pointer to the pointer to the handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA,
                                                         int m,
                                                         int n,
                                                         aclComputeType type,
                                                         aclopHandle **handle);
 /**
 * @ingroup AscendCL
 * @brief perform the matrix-vector multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param m [IN]           number of rows of matrix A
 * @param n [IN]           number of columns of matrix A
 * @param alpha [IN]       pointer to scalar used for multiplication
 * @param a [IN]           pointer to matrix A
 * @param lda [IN]         leading dimension used to store the matrix A
 * @param x [IN]           pointer to vector x
 * @param incx [IN]        stride between consecutive elements of vector x
 * @param beta [IN]        pointer to scalar used for multiplication.
 *                         If beta value == 0,
 *                         then y does not have to be a valid input
 * @param y [IN|OUT]       pointer to vector y
 * @param incy [IN]        stride between consecutive elements of vector y
 * @param type [IN]        computation type
 * @param stream [IN]      stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA,
                                           int m,
                                           int n,
                                           const int32_t *alpha,
                                           const int8_t *a,
                                           int lda,
                                           const int8_t *x,
                                           int incx,
                                           const int32_t *beta,
                                           int32_t *y,
                                           int incy,
                                           aclComputeType type,
                                           aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief create a handle for performing the matrix-vector multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param m [IN]           number of rows of matrix A
 * @param n [IN]           number of columns of matrix A
 * @param handle [OUT]     pointer to the pointer to the handle
 * @param type [IN]        computation type
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA,
                                                          int m,
                                                          int n,
                                                          aclComputeType type,
                                                          aclopHandle **handle);
 /**
 * @ingroup AscendCL
 * @brief perform the matrix-matrix multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param transB [IN]      transpose type of matrix B
 * @param transC [IN]      transpose type of matrix C
 * @param m [IN]           number of rows of matrix A and matrix C
 * @param n [IN]           number of columns of matrix B and matrix C
 * @param k [IN]           number of columns of matrix A and rows of matrix B
 * @param alpha [IN]       pointer to scalar used for multiplication. of same type as dataTypeC
 * @param matrixA [IN]     pointer to matrix A
 * @param lda [IN]         leading dimension array used to store  matrix A
 * @param dataTypeA [IN]   datatype of matrix A
 * @param matrixB [IN]     pointer to matrix B
 * @param ldb [IN]         leading dimension array used to store  matrix B
 * @param dataTypeB [IN]   datatype of matrix B
 * @param beta [IN]        pointer to scalar used for multiplication.
 *                         of same type as dataTypeC If beta == 0,
 *                         then matrixC does not have to be a valid input
 * @param matrixC [IN|OUT] pointer to matrix C
 * @param ldc [IN]         leading dimension array used to store  matrix C
 * @param dataTypeC [IN]   datatype of matrix C
 * @param type [IN]        computation type
 * @param stream [IN]      stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA,
                                           aclTransType transB,
                                           aclTransType transC,
                                           int m,
                                           int n,
                                           int k,
                                           const void *alpha,
                                           const void *matrixA,
                                           int lda,
                                           aclDataType dataTypeA,
                                           const void *matrixB,
                                           int ldb,
                                           aclDataType dataTypeB,
                                           const void *beta,
                                           void *matrixC,
                                           int ldc,
                                           aclDataType dataTypeC,
                                           aclComputeType type,
                                           aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief create a handle for performing the matrix-matrix multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param transB [IN]      transpose type of matrix B
 * @param transC [IN]      transpose type of matrix C
 * @param m [IN]           number of rows of matrix A and matrix C
 * @param n [IN]           number of columns of matrix B and matrix C
 * @param k [IN]           number of columns of matrix A and rows of matrix B
 * @param dataTypeA [IN]   datatype of matrix A
 * @param dataTypeB [IN]   datatype of matrix B
 * @param dataTypeC [IN]   datatype of matrix C
 * @param type [IN]        computation type
 * @param handle [OUT]     pointer to the pointer to the handle
 * @param type [IN]        computation type
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA,
                                                          aclTransType transB,
                                                          aclTransType transC,
                                                          int m,
                                                          int n,
                                                          int k,
                                                          aclDataType dataTypeA,
                                                          aclDataType dataTypeB,
                                                          aclDataType dataTypeC,
                                                          aclComputeType type,
                                                          aclopHandle **handle);
 /**
 * @ingroup AscendCL
 * @brief perform the matrix-matrix multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param transB [IN]      transpose type of matrix B
 * @param transC [IN]      transpose type of matrix C
 * @param m [IN]           number of rows of matrix A and matrix C
 * @param n [IN]           number of columns of matrix B and matrix C
 * @param k [IN]           number of columns of matrix A and rows of matrix B
 * @param alpha [IN]       pointer to scalar used for multiplication
 * @param matrixA [IN]     pointer to matrix A
 * @param lda [IN]         leading dimension used to store the matrix A
 * @param matrixB [IN]     pointer to matrix B
 * @param ldb [IN]         leading dimension used to store the matrix B
 * @param beta [IN]        pointer to scalar used for multiplication.
 *                         If beta value == 0,
 *                         then matrixC does not have to be a valid input
 * @param matrixC [IN|OUT] pointer to matrix C
 * @param ldc [IN]         leading dimension used to store the matrix C
 * @param type [IN]        computation type
 * @param stream [IN]      stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA,
                                          aclTransType transB,
                                          aclTransType transC,
                                          int m,
                                          int n,
                                          int k,
                                          const aclFloat16 *alpha,
                                          const aclFloat16 *matrixA,
                                          int lda,
                                          const aclFloat16 *matrixB,
                                          int ldb,
                                          const aclFloat16 *beta,
                                          aclFloat16 *matrixC,
                                          int ldc,
                                          aclComputeType type,
                                          aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief create a handle for performing the matrix-matrix multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param transB [IN]      transpose type of matrix B
 * @param transC [IN]      transpose type of matrix C
 * @param m [IN]           number of rows of matrix A and matrix C
 * @param n [IN]           number of columns of matrix B and matrix C
 * @param k [IN]           number of columns of matrix A and rows of matrix B
 * @param type [IN]        computation type
 * @param handle [OUT]     pointer to the pointer to the handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA,
                                                         aclTransType transB,
                                                         aclTransType transC,
                                                         int m,
                                                         int n,
                                                         int k,
                                                         aclComputeType type,
                                                         aclopHandle **handle);
 /**
 * @ingroup AscendCL
 * @brief perform the matrix-matrix multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param transB [IN]      transpose type of matrix B
 * @param transC [IN]      transpose type of matrix C
 * @param m [IN]           number of rows of matrix A and matrix C
 * @param n [IN]           number of columns of matrix B and matrix C
 * @param k [IN]           number of columns of matrix A and rows of matrix B
 * @param alpha [IN]       pointer to scalar used for multiplication
 * @param matrixA [IN]     pointer to matrix A
 * @param lda [IN]         leading dimension used to store the matrix A
 * @param matrixB [IN]     pointer to matrix B
 * @param ldb [IN]         leading dimension used to store the matrix B
 * @param beta [IN]        pointer to scalar used for multiplication.
 *                         If beta value == 0,
 *                         then matrixC does not have to be a valid input
 * @param matrixC [IN|OUT] pointer to matrix C
 * @param ldc [IN]         leading dimension used to store the matrix C
 * @param type [IN]        computation type
 * @param stream [IN]      stream
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA,
                                           aclTransType transB,
                                           aclTransType transC,
                                           int m,
                                           int n,
                                           int k,
                                           const int32_t *alpha,
                                           const int8_t *matrixA,
                                           int lda,
                                           const int8_t *matrixB,
                                           int ldb,
                                           const int32_t *beta,
                                           int32_t *matrixC,
                                           int ldc,
                                           aclComputeType type,
                                           aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief create a handle for performing the matrix-matrix multiplication
 *
 * @param transA [IN]      transpose type of matrix A
 * @param transB [IN]      transpose type of matrix B
 * @param transC [IN]      transpose type of matrix C
 * @param m [IN]           number of rows of matrix A and matrix C
 * @param n [IN]           number of columns of matrix B and matrix C
 * @param k [IN]           number of columns of matrix A and rows of matrix B
 * @param type [IN]        computation type
 * @param handle [OUT]     pointer to the pointer to the handle
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA,
                                                          aclTransType transB,
                                                          aclTransType transC,
                                                          int m,
                                                          int n,
                                                          int k,
                                                          aclComputeType type,
                                                          aclopHandle **handle);
 #ifdef __cplusplus
 }
 #endif
 #endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
--- a/inc/external/acl/ops/acl_dvpp.h
+++ b/inc/external/acl/ops/acl_dvpp.h
--- a/inc/external/acl/ops/acl_fv.h
+++ b/inc/external/acl/ops/acl_fv.h
@@ -0,0 +1,351 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
 #define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
 #include "acl/acl.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 typedef struct aclfvInitPara aclfvInitPara;
 typedef struct aclfvFeatureInfo aclfvFeatureInfo;
 typedef struct aclfvRepoRange aclfvRepoRange;
 typedef struct aclfvQueryTable aclfvQueryTable;
 typedef struct aclfvSearchInput aclfvSearchInput;
 typedef struct aclfvSearchResult aclfvSearchResult;
 // search operation type
 enum aclfvSearchType {
    SEARCH_1_N, // 1:N operation type
    SEARCH_N_M  // N:M operation type
 };
 /**
 * @ingroup AscendCL
 * @brief Create fv init param.
 *
 * @param fsNum [IN]  The feature num
 *
 * @retval null for failed.
 * @retval OtherValues success.
 */
 ACL_FUNC_VISIBILITY aclfvInitPara *aclfvCreateInitPara(uint64_t fsNum);
 /**
 * @ingroup AscendCL
 * @brief Destroy fv init param.
 *
 * @par Function
 * Can only destroy fv init param information created
 * through aclfvCreateInitPara interface.
 *
 * @param initPara [IN]   fv init param.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclfvCreateInitPara
 */
 ACL_FUNC_VISIBILITY aclError aclfvDestroyInitPara(aclfvInitPara *initPara);
 /**
 * @ingroup AscendCL
 * @brief set value for maxTopNumFor1N which in fv init param.
 *
 * @param initPara [IN|OUT]     fv init param.
 * @param maxTopNumFor1N [IN]   maxTopNumFor1N value for init param.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclfvSet1NTopNum(aclfvInitPara *initPara, uint32_t maxTopNumFor1N);
 /**
 * @ingroup AscendCL
 * @brief set value for maxTopNumForNM which in fv init param.
 *
 * @param initPara [IN|OUT]        fv init param.
 * @param maxTopNumForNM [IN]   maxTopNumForNM value for init param.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t maxTopNumForNM);
 /**
 * @ingroup AscendCL
 * @brief Create fv feature info.
 *
 * @param id0 [IN]     The first level library id0
 * @param id1 [IN]     Secondary library id1
 * @param offset [IN]  The offset of the first feature in the library
 * @param featureLen [IN]       Single feature length
 * @param featureCount [IN]     Single feature count
 * @param featureData [IN]      Feature value list
 * @param featureDataLen [IN]   Feature value list length
 *
 * @retval null for failed.
 * @retval OtherValues success.
 */
 ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
    uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen);
 /**
 * @ingroup AscendCL
 * @brief Destroy fv feature info.
 *
 * @par Function
 * Can only destroy fv feature info information created
 * through aclfvCreateFeatureInfo interface.
 *
 * @param featureInfo [IN]     fv feature info.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclfvCreateFeatureInfo
 */
 ACL_FUNC_VISIBILITY aclError aclfvDestroyFeatureInfo(aclfvFeatureInfo *featureInfo);
 /**
 * @ingroup AscendCL
 * @brief Create fv repo range.
 *
 * @param id0Min [IN]  id0 start value
 * @param id0Min [IN]  id0 max
 * @param id1Min [IN]  id0 start value
 * @param id1Max [IN]  id1 max
 *
 * @retval null for failed. OtherValues success
 */
 ACL_FUNC_VISIBILITY aclfvRepoRange *aclfvCreateRepoRange(uint32_t id0Min, uint32_t id0Max, uint32_t id1Min,
                                                         uint32_t id1Max);
 /**
 * @ingroup AscendCL
 * @brief Destroy fv repo range.
 *
 * @par Function
 * Can only destroy fv repo range information created
 * through aclfvCreateRepoRange interface.
 *
 * @param repoRange [IN]     fv repo range.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclfvCreateRepoRange
 */
 ACL_FUNC_VISIBILITY aclError aclfvDestroyRepoRange(aclfvRepoRange *repoRange);
 /**
 * @ingroup AscendCL
 * @brief Create query table.
 *
 * @param queryCnt [IN]   Number of tables, the maximum number is 6
 * @param tableLen [IN]   Single table length, table length is 32KB
 * @param tableData [IN]  Feature value list
 * @param tableDataLen [IN]   The length of memory requested by the featureData pointer
 *
 * @retval null for failed. OtherValues success
 */
 ACL_FUNC_VISIBILITY aclfvQueryTable *aclfvCreateQueryTable(uint32_t queryCnt, uint32_t tableLen, uint8_t *tableData,
                                                           uint32_t tableDataLen);
 /**
 * @ingroup AscendCL
 * @brief Destroy query table.
 *
 * @par Function
 * Can only destroy query table information created
 * through aclfvCreateQueryTable interface.
 *
 * @param queryTable [IN]     query table.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclfvCreateQueryTable
 */
 ACL_FUNC_VISIBILITY aclError aclfvDestroyQueryTable(aclfvQueryTable *queryTable);
 /**
 * @ingroup AscendCL
 * @brief Create search input.
 *
 * @param queryTable [IN]  query table
 * @param repoRange [IN]   query repo range
 * @param topk [IN]  query topk
 *
 * @retval null for failed. OtherValues success
 */
 ACL_FUNC_VISIBILITY aclfvSearchInput *aclfvCreateSearchInput(aclfvQueryTable *queryTable, aclfvRepoRange *repoRange,
                                                             uint32_t topk);
 /**
 * @ingroup AscendCL
 * @brief Destroy search input.
 *
 * @par Function
 * Can only destroy search input information created
 * through aclfvCreateSearchInput interface.
 *
 * @param searchInput [IN]     search input.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclfvCreateSearchInput
 */
 ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInput);
 /**
 * @ingroup AscendCL
 * @brief Create search result.
 *
 * @param queryCnt [IN]   Retrieve the number of features
 * @param resultNum [IN]  The number of search results for each feature, the number is queryCnt
 * @param resultNumDataLen [IN]  resultNum memory length
 * @param id0 [IN]  Level 1 library id0
 * @param id1 [IN]  Secondary library id1
 * @param resultOffset [IN]   The offset of the bottom library corresponding
 * to each feature retrieval result, total length topK * queryCnt
 * @param resultDistance [IN]  Distance, total length topK * queryCnt
 * @param dataLen [IN]  The memory size requested by
 * id0\id1\reslutOffset\resultDistance
 *
 * @retval null for failed. OtherValues success
 */
 ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
    uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance,
    uint32_t dataLen);
 /**
 * @ingroup AscendCL
 * @brief Destroy search result.
 *
 * @par Function
 * Can only destroy search result information created
 * through aclfvCreateSearchResult interface.
 *
 * @param searchResult [IN]     search result.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclfvCreateSearchResult
 */
 ACL_FUNC_VISIBILITY aclError aclfvDestroySearchResult(aclfvSearchResult *searchResult);
 /**
 * @ingroup AscendCL
 * @brief fv IP initialize.
 *
 * @param initPara [IN]     fv init param.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure.
 */
 ACL_FUNC_VISIBILITY aclError aclfvInit(aclfvInitPara *initPara);
 /**
 * @ingroup AscendCL
 * @brief release fv resources.
 *
 * @par Function
 * Can only release fv resources created
 * through aclfvInit interface.
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure.
 *
 * @see aclfvInit
 */
 ACL_FUNC_VISIBILITY aclError aclfvRelease();
 /**
 * @ingroup AscendCL
 * @brief fv repo add.
 *
 * @param type [IN]          repo add type
 * @param featureInfo [IN]   add feature information
 * @param stream [IN]        stream of task execute
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure.
 */
 ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief fv repo del.
 *
 * @param type [IN]       repo delete type
 * @param repoRange [IN]  repo range information
 * @param stream [IN]     stream of task execute
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure.
 */
 ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief fv accurate del.
 *
 * @param featureInfo [IN]   accurate delete feature information
 * @param stream [IN]        stream of task execute
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure.
 */
 ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief fv accurate modify.
 *
 * @param featureInfo [IN]  accurate modify feature information
 * @param stream [IN]  stream of task execute
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure.
 */
 ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo, aclrtStream stream);
 /**
 * @ingroup AscendCL
 * @brief fv search.
 *
 * @param type [IN]  search type
 * @param searchInput [IN]    search input
 * @param searchRst [OUT]     search result
 * @param stream [IN]  stream of task execute
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure.
 */
 ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput *searchInput,
                                         aclfvSearchResult *searchRst, aclrtStream stream);
 #ifdef __cplusplus
 }
 #endif
 #endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
--- a/inc/external/hccl/hccl.h
+++ b/inc/external/hccl/hccl.h
@@ -0,0 +1,133 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 * @file hccl.h
 * @brief HCCL API
 */
 #ifndef HCCL_H_
 #define HCCL_H_
 #include <hccl/hccl_types.h>
 #include <acl/acl.h>
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
 /**
 * @brief Initialize HCCL.
 *
 * @param clusterInfo A string identifying the cluster info file path, include file name.
 * @param rank A integer identifying the identify for the rank.
 * @param comm A pointer identifying the initialized communication resource.
 * @return HcclResult
 * @see HcclCommDestroy()
 */
 extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm);
 /**
 * @brief Get hccl root info.
 *
 * @param rootInfo A pointer identifying the hccl root info.
 * @return HcclResult
 */
 extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo);
 /**
 * @brief Initialize HCCL with root info.
 *
 * @param nRanks A integer identifying the rank size of the cluster.
 * @param rootInfo A struct identifying the hccl root info.
 * @param rank A integer identifying the identify for the rank.
 * @param comm A pointer identifying the initialized communication resource.
 * @return HcclResult
 * @see HcclCommDestroy()
 */
 extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm);
 /**
 * @brief AllReduce operator.
 *
 * @param sendBuf A pointer identifying the input data address of the operator.
 * @param recvBuf A pointer identifying the output data address of the operator.
 * @param count An integer(u64) identifying the number of the output data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32.
 * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
 * @param comm A pointer identifying the communication resource based on.
 * @param stream A pointer identifying the stream information.
 * @return HcclResult 
 */
 extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, 
 HcclReduceOp op, HcclComm comm, aclrtStream stream);
 /**
 * @brief Broadcast operator.
 *
 * @param buf A pointer identifying the data address of the operator.
 * @param count An integer(u64) identifying the number of the data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param root An integer(u32) identifying the the root rank in the operator.
 * @param comm A pointer identifying the communication resource based on
 * @param stream A pointer identifying the stream information.
 * @return HcclResult 
 */
 extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, 
 aclrtStream stream);
 /**
 * @brief ReduceScatter operator.
 *
 * @param sendBuf A pointer identifying the input data address of the operator.
 * @param recvBuf A pointer identifying the output data address of the operator.
 * @param recvCount An integer(u64) identifying the number of the output data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
 * @param comm A pointer identifying the communication resource based on.
 * @param stream A pointer identifying the stream information.
 * @return HcclResult 
 */
 extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, 
 HcclReduceOp op, HcclComm comm, aclrtStream stream);
 /**
 * @brief AllGather operator.
 *
 * @param sendBuf A pointer identifying the input data address of the operator.
 * @param recvBuf A pointer identifying the output data address of the operator.
 * @param sendCount An integer(u64) identifying the number of the input data.
 * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
 * @param comm A pointer identifying the communication resource based on.
 * @param stream A pointer identifying the stream information.
 * @return HcclResult 
 */
 extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, 
 HcclComm comm, aclrtStream stream);
 /**
 * @brief Destroy HCCL comm
 *
 * @param comm A pointer identifying the communication resource targetting
 * @return HcclResult
 * @see HcclCommInitClusterInfo()
 */
 extern HcclResult HcclCommDestroy(HcclComm comm);
 #ifdef __cplusplus
 }
 #endif // __cplusplus
 #endif // HCCL_H_
--- a/third_party/fwkacllib/inc/hccl/hccl_types.h
+++ b/third_party/fwkacllib/inc/hccl/hccl_types.h
--- a/inc/external/runtime/rt_error_codes.h
+++ b/inc/external/runtime/rt_error_codes.h
@@ -0,0 +1,102 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
 #define __INC_EXTERNEL_RT_ERROR_CODES_H__
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 static const int32_t ACL_RT_SUCCESS                          = 0; // success
 static const int32_t ACL_ERROR_RT_PARAM_INVALID              = 107000; // param invalid
 static const int32_t ACL_ERROR_RT_INVALID_DEVICEID           = 107001; // invalid device id
 static const int32_t ACL_ERROR_RT_CONTEXT_NULL               = 107002; // current context null
 static const int32_t ACL_ERROR_RT_STREAM_CONTEXT             = 107003; // stream not in current context
 static const int32_t ACL_ERROR_RT_MODEL_CONTEXT              = 107004; // model not in current context
 static const int32_t ACL_ERROR_RT_STREAM_MODEL               = 107005; // stream not in model
 static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID    = 107006; // event timestamp invalid
 static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL   = 107007; // event timestamp reversal
 static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED             = 107008; // memory address unaligned
 static const int32_t ACL_ERROR_RT_FILE_OPEN                  = 107009; // open file failed
 static const int32_t ACL_ERROR_RT_FILE_WRITE                 = 107010; // write file failed
 static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE           = 107011; // error subscribe stream
 static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE           = 107012; // error subscribe thread
 static const int32_t ACL_ERROR_RT_GROUP_NOT_SET              = 107013; // group not set
 static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE           = 107014; // group not create
 static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG           = 107015; // callback not register to stream
 static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE        = 107016; // invalid memory type
 static const int32_t ACL_ERROR_RT_INVALID_HANDLE             = 107017; // invalid handle
 static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE        = 107018; // invalid malloc type
 static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT        = 207000; // feature not support
 static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION          = 207001; // memory allocation error
 static const int32_t ACL_ERROR_RT_MEMORY_FREE                = 207002; // memory free error
 static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW           = 207003; // aicore over flow
 static const int32_t ACL_ERROR_RT_NO_DEVICE                  = 207004; // no device
 static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL        = 207005; // resource alloc fail
 static const int32_t ACL_ERROR_RT_NO_PERMISSION              = 207006; // no permission
 static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE          = 207007; // no event resource
 static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE         = 207008; // no stream resource
 static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE         = 207009; // no notify resource
 static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE          = 207010; // no model resource
 static const int32_t ACL_ERROR_RT_INTERNAL_ERROR             = 507000; // runtime internal error
 static const int32_t ACL_ERROR_RT_TS_ERROR                   = 507001; // ts internel error
 static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL           = 507002; // task full in stream
 static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY          = 507003; // task empty in stream
 static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE        = 507004; // stream not complete
 static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE            = 507005; // end of sequence
 static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE         = 507006; // event not complete
 static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR      = 507007; // context release error
 static const int32_t ACL_ERROR_RT_SOC_VERSION                = 507008; // soc version error
 static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT      = 507009; // task type not support
 static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT             = 507010; // ts lost heartbeat
 static const int32_t ACL_ERROR_RT_MODEL_EXECUTE              = 507011; // model execute failed
 static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT             = 507012; // report timeout
 static const int32_t ACL_ERROR_RT_SYS_DMA                    = 507013; // sys dma error
 static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT             = 507014; // aicore timeout
 static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION           = 507015; // aicore exception
 static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION      = 507016; // aicore trap exception
 static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT              = 507017; // aicpu timeout
 static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION            = 507018; // aicpu exception
 static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR     = 507019; // aicpu datadump response error
 static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR        = 507020; // aicpu model operate response error
 static const int32_t ACL_ERROR_RT_PROFILING_ERROR            = 507021; // profiling error
 static const int32_t ACL_ERROR_RT_IPC_ERROR                  = 507022; // ipc error
 static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL         = 507023; // model abort normal
 static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING       = 507024; // kernel unregistering
 static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT        = 507025; // ringbuffer not init
 static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA         = 507026; // ringbuffer no data
 static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP              = 507027; // kernel lookup error
 static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE           = 507028; // kernel register duplicate
 static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL        = 507029; // debug register failed
 static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL      = 507030; // debug unregister failed
 static const int32_t ACL_ERROR_RT_LABEL_CONTEXT              = 507031; // label not in current context
 static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT            = 507032; // program register num use out
 static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR            = 507033; // device setup error
 static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR         = 507899; // drv internal error
 static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR       = 507900; // aicpu internal error
 #ifdef __cplusplus
 }
 #endif
 #endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
--- a/inc/framework/ge_runtime/task_info.h
+++ b/inc/framework/ge_runtime/task_info.h
@@ -271,13 +271,14 @@ class FusionEndTaskInfo : public TaskInfo {
 class HcclTaskInfo : public TaskInfo {
 public:
  HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr,
               void *output_data_addr, int64_t workspace_size, int64_t hccl_stream_num,
               void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num,
               const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id,
               int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag)
      : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag),
        hccl_type_(hccl_type),
        input_data_addr_(input_data_addr),
        output_data_addr_(output_data_addr),
        workspace_addr_(workspace_addr),
        workspace_size_(workspace_size),
        hccl_stream_num_(hccl_stream_num),
        private_def_(private_def),
@@ -292,6 +293,7 @@ class HcclTaskInfo : public TaskInfo {
  const std::string &hccl_type() const { return hccl_type_; }
  void *input_data_addr() const { return input_data_addr_; }
  void *output_data_addr() const { return output_data_addr_; }
  void *workspace_addr() const { return workspace_addr_; }
  int64_t workspace_size() const { return workspace_size_; }
  int64_t hccl_stream_num() const { return hccl_stream_num_; }
  const std::vector<uint8_t> &private_def() const { return private_def_; }
@@ -306,6 +308,7 @@ class HcclTaskInfo : public TaskInfo {
  std::string hccl_type_;
  void *input_data_addr_;
  void *output_data_addr_;
  void *workspace_addr_;
  int64_t workspace_size_;
  int64_t hccl_stream_num_;
  std::vector<uint8_t> private_def_;
--- a/scripts/format_source_code.sh
+++ b/scripts/format_source_code.sh
--- a/third_party/fwkacllib/inc/ops/aipp.h
+++ b/third_party/fwkacllib/inc/ops/aipp.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -65,6 +65,8 @@ in aipp config file, framework will auto add one input node to graph at last. \n
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator AippData.
 *@par Restrictions:
 *Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
 */
 REG_OP(AippData)
    .INPUT(data, TensorType::ALL())
--- a/third_party/fwkacllib/inc/ops/all_ops.h
+++ b/third_party/fwkacllib/inc/ops/all_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -39,6 +39,7 @@
 #include "image_ops.h"
 #include "internal_ops.h"
 #include "linalg_ops.h"
 #include "list_ops.h"
 #include "logging_ops.h"
 #include "lookup_ops.h"
 #include "math_ops.h"
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -1153,6 +1153,79 @@ REG_OP(EditDistance)
    .OUTPUT(output, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(EditDistance)
 /**
 * @brief sort_v2.
 * @par Inputs:
 * @li x: An ND tensor of type float16.
 * @par Attributes:
 * @li axis: An optional int. The dimension to sort along. This value defaults to -1.
 * @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False.
 * @par Outputs:
 * @li y: An ND tensor of type float16.
 * @attention Constraints:
 * @li Axis should select the last dim.
 * @li When the sorting data is less than 150K, it is recommended to use this tbe ops,
 and the descending performance is better than the ascending.
 * @li The upper limit of data on Ascend910 is 2000K.
 */
 REG_OP(SortV2)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .ATTR(axis, Int, -1)
    .ATTR(descending, Bool, false)
    .OP_END_FACTORY_REG(SortV2)
 /**
 * @brief Expand the input tensor to a compatible shape. \n
 * @par Inputs:
 * One inputs, including:
 * @li x: A Tensor. Must be one of the following types:
 *     float16, float32, int32, int8 ,uint8. \n
 * @li shape: A Tensor to specify the shape that the input tensor expanded to. \n
 * @par Outputs:
 * @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
 * @par Third-party framework compatibility
 * Compatible with the ONNX operator Expand.
 */
 REG_OP(Expand)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .OP_END_FACTORY_REG(Expand)
 /**
 * @brief Expand the input tensor to a compatible shape. \n
 * @par Inputs:
 * One inputs, including:
 * @li x: A Tensor. Must be one of the following types:
 *     float16, float32, int32, int8 ,uint8. \n
 * @par Attributes:
 * @li shape: A required listInt to specify the shape that the input tensor expanded to. \n
 * @par Outputs:
 * @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
 * @par Third-party framework compatibility
 * Compatible with the ONNX operator Expand.
 */
 REG_OP(ExpandD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .REQUIRED_ATTR(shape, ListInt)
    .OP_END_FACTORY_REG(ExpandD)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_
--- a/third_party/fwkacllib/inc/ops/audio_ops.h
+++ b/third_party/fwkacllib/inc/ops/audio_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
+++ b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
@@ -0,0 +1,58 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /*!
 * \file avg_pool_1d_ops.h
 * \brief
 */
 #ifndef OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_
 #define OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_
 #include "graph/operator_reg.h"
 namespace ge {
 /**
 *@brief Generate an auxiliary matrix .  \n
 *@par Inputs:
 * @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32,
 int64, float16, float, double.The format must be NHWC NCHW NC1HWC0.
 *@par Attributes:
 *@li ksize: Kernel size. Input type is int.
 *@li strides: Input type is int.
 *@li pads: Input type is listInt .
 *@li ceil_mode: Bool, default value is false.
 *@li count_include_pad: Bool, default value is false.  \n
 *@par Outputs:
 *y_tensor: A  tensor with the same types as "x" .  \n
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Unbatch.
 */
 REG_OP(AvgPool1DAvgMatrix)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8,
                          DT_INT32, DT_INT64, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8,
                           DT_INT32, DT_INT64, DT_DOUBLE}))
    .REQUIRED_ATTR(ksize, Int)
    .REQUIRED_ATTR(strides, Int)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(ceil_mode, Bool, false)
    .ATTR(count_include_pad, Bool, false)
    .OP_END_FACTORY_REG(AvgPool1DAvgMatrix)
 }
 #endif
--- a/third_party/fwkacllib/inc/ops/batch_ops.h
+++ b/third_party/fwkacllib/inc/ops/batch_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -107,11 +107,13 @@ across multiple sessions .   \n
 REG_OP(Unbatch)
  .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
      DT_COMPLEX64, DT_COMPLEX128}))
  .INPUT(index, TensorType({DT_INT64}))
  .INPUT(id, TensorType({DT_INT64}))
  .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
      DT_COMPLEX64, DT_COMPLEX128}))
  .REQUIRED_ATTR(timeout_micros, Int)
  .ATTR(container, String, "")
  .ATTR(shared_name, String, "")
@@ -146,13 +148,16 @@ across multiple sessions .   \n
 REG_OP(UnbatchGrad)
  .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
      DT_COMPLEX64, DT_COMPLEX128}))
  .INPUT(index, TensorType({DT_INT64}))
  .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
      DT_COMPLEX64, DT_COMPLEX128}))
  .INPUT(id, TensorType({DT_INT64}))
  .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
      DT_COMPLEX64, DT_COMPLEX128}))
  .ATTR(container, String, "")
  .ATTR(shared_name, String, "")
  .OP_END_FACTORY_REG(UnbatchGrad)
--- a/third_party/fwkacllib/inc/ops/bitwise_ops.h
+++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
+++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
+++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/condtake_ops.h
+++ b/third_party/fwkacllib/inc/ops/condtake_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/control_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/ctc_ops.h
+++ b/third_party/fwkacllib/inc/ops/ctc_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -908,7 +908,7 @@ REG_OP(TensorArray)
    .OUTPUT(handle, TensorType({DT_RESOURCE}))
    .OUTPUT(flow, TensorType({DT_FLOAT}))
    .REQUIRED_ATTR(dtype, Type)
    .ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE)
    .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK)
    .ATTR(dynamic_size, Bool, false)
    .ATTR(clear_after_read, Bool, true)
    .ATTR(identical_element_shapes, Bool, false)
@@ -963,7 +963,7 @@ REG_OP(TensorArrayConcat)
        DT_QUINT8, DT_QINT32}))
    .OUTPUT(lengths, TensorType({DT_INT64}))
    .REQUIRED_ATTR(dtype, Type)
    .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_SHAPE)
    .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_RANK)
    .OP_END_FACTORY_REG(TensorArrayConcat)
 /**
@@ -999,7 +999,7 @@ REG_OP(TensorArrayGather)
        DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8,
        DT_QUINT8, DT_QINT32}))
    .REQUIRED_ATTR(dtype, Type)
    .ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE)
    .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK)
    .OP_END_FACTORY_REG(TensorArrayGather)
 /**
@@ -1430,6 +1430,24 @@ REG_OP(OrderedMapClear)
    .ATTR(shared_name, String, "")
    .OP_END_FACTORY_REG(OrderedMapClear)
 /**
 *@brief FakeQueue, support tf api FixedLengthRecordReader. \n
 *@par Inputs:
 *Including:
 * @li resource: A Tensor of type DT_RESOURCE.
 *@par Outputs:
 *handle: A Tensor of type DT_STRING ref. \n
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator FakeQueue.
 */
 REG_OP(FakeQueue)
    .INPUT(resource, TensorType({DT_RESOURCE}))
    .OUTPUT(handle, TensorType({DT_STRING}))
    .OP_END_FACTORY_REG(FakeQueue)
 /**
 *@brief Returns the number of incomplete elements in the underlying container. \n
@@ -2258,6 +2276,7 @@ REG_OP(LruCache)
  .ATTR(shared_name, String, "LruCache")
  .ATTR(cache_size, Int, 100000)
  .ATTR(load_factor, Float, 1)
  .REQUIRED_ATTR(dtype, Type)
  .OP_END_FACTORY_REG(LruCache)
 /**
@@ -2277,9 +2296,9 @@ REG_OP(CacheAdd)
  .INPUT(cache, TensorType({DT_RESOURCE}))
  .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OUTPUT(swap_in_idx, TensorType({DT_INT64}))
  .OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OUTPUT(swap_out_idx, TensorType({DT_INT64}))
  .OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OP_END_FACTORY_REG(CacheAdd)
 /**
@@ -2295,9 +2314,31 @@ REG_OP(CacheAdd)
 REG_OP(CacheRemoteIndexToLocal)
  .INPUT(cache, TensorType({DT_RESOURCE}))
  .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OUTPUT(local_idx, TensorType({DT_INT64}))
  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OP_END_FACTORY_REG(CacheRemoteIndexToLocal)
 /**
 *@brief CacheAllToLocalIndex, get id in cache
 *@par Inputs:
 *cache: resource data
 *local_idx: id in cache.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(CacheAllIndexToLocal)
  .INPUT(cache, TensorType({DT_RESOURCE}))
  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .REQUIRED_ATTR(dtype, Type)
  .OP_END_FACTORY_REG(CacheAllIndexToLocal)
 REG_OP(DynamicGetNext)
  .INPUT(x, TensorType::ALL())
  .DYNAMIC_OUTPUT(y, TensorType::ALL())
  .ATTR(output_types, ListType, {})
  .ATTR(output_shapes, ListListInt, {{}, {}})
  .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
  .ATTR(_getnext_inputs_shape_range, String, "")
  .OP_END_FACTORY_REG(DynamicGetNext)
 }   // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -32,6 +32,9 @@ namespace ge {
 *   float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *   qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n
 *@par Attributes:
 *N: An required attribute of type int32, means nums of inputs. \n
 *@par Outputs:
 *y: A Tensor. Has the same shape and type as the elements of "x". \n
@@ -122,7 +125,8 @@ REG_OP(MinimumGrad)
 *@par Inputs:
 *One input:
 *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8,
   int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n
   int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
   For float32 type, the actual calculation on the chip is based on float16.  \n
 *@par Attributes:
 *dst_type: An required attribute of type int32, specifying the dst data type. \n
@@ -611,6 +615,15 @@ REG_OP(Log1p)
 *@par Outputs:
 *y: A Tensor. Has the same type as "x1".
 *@attention Constraints:
 *@li x2: The input data does not support 0
 *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
 *requirement of double thousandths in the mini form
 *@li Due to different architectures, the calculation results of this operator 
 *on NPU and CPU may be inconsistent
 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Mod.
 */
@@ -2042,6 +2055,15 @@ REG_OP(FloorDiv)
 *
 *@par Outputs:
 *y: Result remainder.
 *@attention Constraints:
 *@li x2: The input data does not support 0
 *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
 *requirement of double thousandths in the mini form
 *@li Due to different architectures, the calculation results of this operator 
 *on NPU and CPU may be inconsistent
 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator FloorMod.
 */
@@ -2168,6 +2190,14 @@ REG_OP(Tan)
 *@par Outputs:
 *y: A Tensor. Has the same type as "x1". \n
 *@attention Constraints:
 *@li x2: The input data does not support 0
 *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
 *requirement of double thousandths in the mini form
 *@li Due to different architectures, the calculation results of this operator 
 *on NPU and CPU may be inconsistent
 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
 *@par Third-party framework compatibility
 *@li Compatible with the TensorFlow operator TruncateMod.
 */
@@ -2424,6 +2454,25 @@ REG_OP(Eltwise)
    .ATTR(coeff, ListFloat, {})
    .OP_END_FACTORY_REG(Eltwise)
 /**
 *@brief Computes the inverse error function of each element of input. \n
 *@par Inputs:
 *One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 *@par Outputs:
 *y: A Tensor with the same type and shape of input_x's. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Erfinv. \n
 */
 REG_OP(Erfinv)
    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OP_END_FACTORY_REG(Erfinv)
 /**
 *@brief Computes element-wise population count. \n
@@ -2829,9 +2878,9 @@ REG_OP(AdamApplyOneAssign)
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LambApplyOptimizerAssign)
    .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2842,6 +2891,8 @@ REG_OP(LambApplyOptimizerAssign)
    .INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OP_END_FACTORY_REG(LambApplyOptimizerAssign)
 /**
@@ -2873,7 +2924,8 @@ REG_OP(LambApplyWeightAssign)
    .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OP_END_FACTORY_REG(LambApplyWeightAssign)
 /**
@@ -3329,8 +3381,451 @@ REG_OP(TensorRedirect)
    .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
                           DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32}))
    .OP_END_FACTORY_REG(TensorRedirect)
 }  // namespace ge
 /**
 * @brief Performs the element-wise division of tensor x2 by tensor x3,
 * multiply the result by the scalar value and add it to tensor x1
 * @par Inputs:
 * Three inputs, including:
 * @li input_data: A mutable input Tensor. Must be one of the following types:
 *     float16, float32.
 * @li x1: A mutable input Tensor of the same type as x1.
 * @li x2: A mutable input Tensor of the same type as x1.
 * @li value: A mutable input Tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 * @par Outputs:
 * @li y: A mutable Tensor. Has the same type as "x1". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Addcdiv.
 */
 REG_OP(Addcdiv)
    .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32 }))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OP_END_FACTORY_REG(Addcdiv)
 /**
 * @brief Performs the element-wise multiplication of tensor x2 by tensor x3, 
 * multiply the result by the scalar value and add it to tensor input_data 
 * @par Inputs:
 * Three inputs, including:
 * @li input_data: A mutable input Tensor. Must be one of the following types:
 *     float16, float32, int8, int32, uint8.
 * @li x1: A mutable input Tensor of the same type as x1.
 * @li x2: A mutable input Tensor of the same type as x1.
 * @li value: A tensor which includes only one element of the same type as x1. \n
 * @par Outputs:
 * @li y: A mutable output Tensor. Has the same type as "x1". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Addcmul.
 */
 REG_OP(Addcmul)
    .INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
    .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
    .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
    .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
    .OP_END_FACTORY_REG(Addcmul)
 /**
 * @brief Computes the result of x2 * alpha + x1.
 * @par Inputs:
 * @li x1: An ND tensor of type float16, float32, int32.
 * @li x2: An ND tensor of type float16, float32, int32.
 * @li alpha: A scalar tensor of type float16, float32. \n
 * @par Outputs:
 * @li y: An ND tensor tensor with the same shape and type as "x1". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Axpy.
 */
 REG_OP(AxpyV2)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OP_END_FACTORY_REG(AxpyV2)
 /**
 * @brief Computes the result of x1 + x2.
 * @par Inputs:
 * @li x1: An ND tensor of type float16, float, int32.
 * @li x2: An ND tensor of type float16, float, int32. \n
 * @par Outputs:
 * @li y: An ND tensor tensor with the same type as "x1". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Add.
 */
 REG_OP(PtAdd)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OP_END_FACTORY_REG(PtAdd)
 /**
 * @brief Computes the result of x1 * x2.
 * @par Inputs:
 * @li x1: An ND tensor of type float16, float32, int32.
 * @li x2: An ND tensor of type float16, float32, int32. \n
 * @par Outputs:
 * @li y: Same shape and type as the largest ND tensor in x1 x2. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator muls.
 */
 REG_OP(PtMuls)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OP_END_FACTORY_REG(PtMuls)
 /**
 * @brief Computes the result of x1 - x2.
 * @par Inputs:
 * @li x1: An ND tensor of type float16, float, int32.
 * @li x2: An ND tensor of type float16, float, int32. \n
 * @par Outputs:
 * @li y: An ND tensor tensor with the same type as "x1". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Sub.
 */
 REG_OP(PtSub)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OP_END_FACTORY_REG(PtSub)
 /**
 * @brief Add the partial values of two tensors in format NC1HWC0.
 * @par Inputs:
 * @li x1: A Tensor in 5HD, and must be one of the following types: float16,
 * float32. \n
 * @li x2: A Tensor of the same type as "x1", and the same shape as "x1",
 * except for the C1 value. \n
 * @par Attributes:
 * @li x1_c1_offset: A required int. Offset value of C1 in "x1". \n
 * @li x2_c1_offset: A required int. Offset value of C1 in "x2". \n
 * @li c1_len: A required int. C1 len of "y". The value must be less than
 * the difference between C1 and offset in "x1" and "x2". \n
 * @par Outputs:
 * @li y:  A Tensor of the same type as "x1", and the same shape as "x1",
 * except for the C1 value. Record the result after adding. \n
 */
 REG_OP(StrideAdd)
    .INPUT(x1, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(x2, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .REQUIRED_ATTR(x1_c1_offset, Int)
    .REQUIRED_ATTR(x2_c1_offset, Int)
    .REQUIRED_ATTR(c1_len, Int)
    .OP_END_FACTORY_REG(StrideAdd)
 /**
 * @brief Compare two tensors are totally equal or not, only output a bool value"
 * @par Inputs:
 * Two inputs, including:
 * @li input_x: A Tensor. the first tensor. \n
 * @li input_y: A Tensor. the second tensor. \n
 * @par Outputs:
 * @li output_z: A Tensor. Bool type, compare result of the two inputs. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch equal operator. \n
 */
 REG_OP(TensorEqual)
    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .OUTPUT(output_z, TensorType({DT_BOOL}))
    .OP_END_FACTORY_REG(TensorEqual)
 /**
 * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). 
 * All inputs and outputs must have the same data type. This operator supports multidirectional 
 * (i.e., Numpy-style) broadcasting
 * 
 * @par inputs
 * one input including:
 * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
 * 
 * @par output
 * one output including:
 * @li y:A Tensor of the same type as x
 * 
 */
 REG_OP(MaxN)
    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) 
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
    .OP_END_FACTORY_REG(MaxN)
 /**
 * @brief Calculates x * maske * value.
 *
 * @par Inputs:
 * @li x: An tensor of type float16 or float32, specifying the input to the data layer.
 * @li mask: An tensor of type int8 or float16 or float32, be same shape with x. \n
 *
 * @par Attributes:
 * value: A optional float. \n
 *
 * @par Outputs:
 * y: The output tensor of type float16 or float32.
 @ li y:A Tensor of the same type and shape as x
 *
 */
 REG_OP(MaskedScale)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32}))
    .INPUT(mask, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
    .REQUIRED_ATTR(value, Float)
    .OP_END_FACTORY_REG(MaskedScale)
 /**
 * @brief Calculate the lerp function. \n
 * @par Inputs:
 * Three inputs, including:
 * @li start: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @li end: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @li weight: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @par Outputs:
 * y: A Tensor with the same type and shape of input_x's. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Lerp. \n
 */
 REG_OP(Lerp)
    .INPUT(start, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(end, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OP_END_FACTORY_REG(Lerp)
 /**
 *@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n
 *
 *@par Inputs:
 *@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16
 *@li x2: A tensor of the same type as "x1".
 *
 *@par Attributes:
 * atol: Defaults to "1e-05".
 * rtol: Defaults to "1e-03".
 *
 *@par Outputs:
 * num: A tensor of type float32.
 *
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 *
 */
 REG_OP(DataCompare)
  .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
  .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
  .OUTPUT(num, TensorType({DT_FLOAT}))
  .ATTR(atol, Float, 1e-5)
  .ATTR(rtol, Float, 1e-3)
  .OP_END_FACTORY_REG(DataCompare)
 /**
 *@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0
 *otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along
 *which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the
 *corresponding input.
 *
 *@par inputs
 *one input including:
 *@li x: input A Tensor.Must be one of the following types:float32,float16
 *
 *@par Attributes:
 *@li axis:A required int attribute that decides which dimension will be used to cal the hard_max
 *
 *@par output:
 *one output including:
 *@li y:A Tensor of the same type as x
 *
 */
 REG_OP(HardMax)
    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT }))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(axis, Int, -1)
    .OP_END_FACTORY_REG(HardMax)
 /**
 * @brief Computes the dot product (inner product) of two tensors. This function does not broadcast.
 * @par Inputs:
 * Two inputs, including:
 * @li input_x: A Tensor. the first tensor must be 1d. \n
 * @li input_y: A Tensor. the second tensor must be 1d. \n
 * @par Outputs:
 * @li output: A Tensor. Result of the two inputs, must be 1d. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch dot operator. \n
 */
 REG_OP(Dot)
    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
    .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
    .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
    .OP_END_FACTORY_REG(Dot)
 /**
 *@brief Returns a new tensor with boolean elements representing \n
 *if each element of input is “close” to the corresponding element of other \n
 *@par Inputs:
 *Two inputs, including:
 * @li x1: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 * @li x2: A tensor with the same type and shape of x1's. \n
 *@par Attributes:
 *@li rtol: An optional float.Defaults to 1e-05. \n
 *@li atol: An optional float.Defaults to 1e-08. \n
 *@li equal_nan: An optional bool.Defaults to false. \n
 *@par Outputs:
 *y: A Tensor bool with the same shape of x1's. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator isclose. \n
 */
 REG_OP(IsClose)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_BOOL}))
    .ATTR(rtol, Float, 1e-05)
    .ATTR(atol, Float, 1e-08)
    .ATTR(equal_nan, Bool, false)
    .OP_END_FACTORY_REG(IsClose)
 /**
 * @brief Returns the reverse tensor of the ArgMax operator of a tensor. \n
 * @par Inputs:
 * three input, including:
 * var: A Tensor of type float16, float32, int32 or int8. \n
 * indices: A Tensor of type int32. \n
 * updates: A Tensor of type float16, float32, int32 or int8. \n
 * @par Attributes:
 * @li dimension: An integer of type int, specifying the axis information of the index with the maximum value.\n
 * @par Outputs:
 * y: A Tensor of type float16, float32, int32 or int8. \n
 *
 *@attention Constraints:
 *@li indices: only support int32,and shape same to "updates"
 *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". 
 *@li y:A Tensor, the type and shape is same to "var" \n
 *@par Third-party framework compatibility
 * not support all scene like pytorch operator scatter
 * exp:
 * var.shape=[2,3,4,5], dim=2, the shape of indices and updates should be [2,3,5]
 * not support the shape of indices and updates is [2,3,2,5] like pytorch operator scatter. \n
 */
 REG_OP(ArgMaxGrad)
    .INPUT(var, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
    .INPUT(indices, TensorType({DT_INT32}))
    .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
    .REQUIRED_ATTR(dimension, Int)
    .OP_END_FACTORY_REG(ArgMaxGrad)
 /**
 * @brief Returns the reverse tensor of the ArgMax operator of a tensor. \n
 * @par Inputs:
 * three input, including:
 * var: A Tensor of type float16, float32, int32 or int8. \n
 * indices: A Tensor of type int32. \n
 * updates: A Tensor of type float16, float32, int32 or int8. \n
 * assist: A Tensor of int32,also a assist matrix and it's shape must match the shape of var \n
 * @par Attributes:
 * @li dimension: An integer of type int, specifying the axis information of the index with the maximum value.\n
 * @par Outputs:
 * y: A Tensor of type float16, float32, int32 or int8. \n
 *@attention Constraints:
 *@li indices: only support int32,and shape same to "updates"
 *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". 
 *@li y:A Tensor, the type and shape is same to "var" \n
 *@par Third-party framework compatibility
 * not support all scene like pytorch operator scatter
 * exp:
 * var.shape=[2,3,4,5], dim=2, the shape of indices and updates should be [2,3,5]
 * not support the shape of indices and updates is [2,3,2,5] like pytorch operator scatter. \n
 */
 REG_OP(ArgMaxGradD)
    .INPUT(var, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
    .INPUT(indices, TensorType({DT_INT32}))
    .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
    .INPUT(assist, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
    .REQUIRED_ATTR(dimension, Int)
    .OP_END_FACTORY_REG(ArgMaxGradD)
 /**
 *@brief Returns cosine similarity between x1 and x2,computed along dim. \n
 *@par Inputs:
 *Two inputs, including:
 * @li input_x1: A tensor. Must be the following types:
 *     float32. \n
 *@par Inputs:
 *@li input_x2: A tensor. Must of the following types:
 *     float32. \n
 *@par Outputs:
 *@li output_y: A Tensor with the same type of input_x's. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator CosineSimilarity. \n
 */
 REG_OP(CosineSimilarity)
    .INPUT(input_x1, TensorType({DT_FLOAT}))  /* "First operand." */
    .INPUT(input_x2, TensorType({DT_FLOAT}))  /* "Second operand." */
    .OUTPUT(output_y, TensorType({DT_FLOAT})) /* "Result, has same element type as two inputs" */
    .ATTR(dim, Int, 1)
    .ATTR(eps, Float, 1e-8)
    .OP_END_FACTORY_REG(CosineSimilarity)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/functional_ops.h
+++ b/third_party/fwkacllib/inc/ops/functional_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/get_data_ops.h
+++ b/third_party/fwkacllib/inc/ops/get_data_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/hcom_ops.h
+++ b/third_party/fwkacllib/inc/ops/hcom_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -45,8 +45,6 @@ REG_OP(HcomAllGather)
    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
    .REQUIRED_ATTR(rank_size, Int)
    .REQUIRED_ATTR(group, String)
    .ATTR(alpha, Float, 1.0)
    .ATTR(beta, Float, 0.0)
    .OP_END_FACTORY_REG(HcomAllGather)
 /**
@@ -77,8 +75,6 @@ REG_OP(HcomAllReduce)
    .REQUIRED_ATTR(group, String)
    .ATTR(fusion, Int, 1)
    .ATTR(fusion_id, Int, -1)
    .ATTR(alpha, Float, 1.0)
    .ATTR(beta, Float, 0.0)
    .OP_END_FACTORY_REG(HcomAllReduce)
 /**
@@ -91,7 +87,7 @@ REG_OP(HcomAllReduce)
  input of this rank will be broadcast to other ranks.
 * @li fusion: A required integer identifying if the op need to fusion,the 
  default value is none fusion
  * @li fusion: A required integer identifying the fusion id if para fusion
  * @li fusion_id: A required integer identifying the fusion id if para fusion
  is set.
 * @li group: A required string identifying the group name of ranks
  participating in the op.
@@ -109,10 +105,39 @@ REG_OP(HcomBroadcast)
    .REQUIRED_ATTR(group, String)
    .ATTR(fusion, Int, 0)
    .ATTR(fusion_id, Int, -1)
    .ATTR(alpha, Float, 1.0)
    .ATTR(beta, Float, 0.0)
    .OP_END_FACTORY_REG(HcomBroadcast)
 /**
 * @brief preforms reduction from others rank to rootrank
 * @par Inputs:
 * @li root_rank: A required integer identifying the root rank in the op
  the reduction result will be on this root rank
 * x: A tensor. Must be one of the following types: int8, int16, int32, float16,
  float32.
 * @par Attributes:
 * @li reduction: A required string identifying the reduction operation to
  perform.The supported operation are: "sum", "max", "min", "prod".
 * @li group: A required string identifying the group name of ranks
  participating in the op.
 * @li fusion: An optional integer identifying the fusion flag of the op.
  0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id.
 * @li fusion_id: An optional integer identifying the fusion id of the op.
 * The HcomReduce ops with the same fusion id will be fused.
 * @par Outputs:
 * y: A Tensor. Has the same type as "x".
 * @attention Constraints:
 *"group" is limited to 128 characters. Use "hccl_world_group"
  as the name of a world group.
 */
 REG_OP(HcomReduce)
    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
    .REQUIRED_ATTR(root_rank, Int)
    .REQUIRED_ATTR(reduction, String)
    .REQUIRED_ATTR(group, String)
    .ATTR(fusion, Int, 0)
    .ATTR(fusion_id, Int, -1)
    .OP_END_FACTORY_REG(HcomReduce)
 /**
 * @brief Performs reduction across all input tensors, scattering in equal
  blocks among ranks, each rank getting a chunk of data based on its rank
@@ -139,8 +164,6 @@ REG_OP(HcomReduceScatter)
    .REQUIRED_ATTR(reduction, String)
    .REQUIRED_ATTR(group, String)
    .REQUIRED_ATTR(rank_size, Int)
    .ATTR(alpha, Float, 1.0)
    .ATTR(beta, Float, 0.0)
    .OP_END_FACTORY_REG(HcomReduceScatter)
 /**
@@ -167,8 +190,6 @@ REG_OP(HcomSend)
    .REQUIRED_ATTR(group, String)
    .REQUIRED_ATTR(sr_tag, Int)
    .REQUIRED_ATTR(dest_rank, Int)
    .ATTR(alpha, Float, 1.0)
    .ATTR(beta, Float, 0.0)
    .OP_END_FACTORY_REG(HcomSend)
 /**
@@ -202,8 +223,6 @@ REG_OP(HcomReceive)
    .REQUIRED_ATTR(src_rank, Int)
    .REQUIRED_ATTR(shape, ListInt)
    .REQUIRED_ATTR(dtype, Type)
    .ATTR(alpha, Float, 1.0)
    .ATTR(beta, Float, 0.0)
    .OP_END_FACTORY_REG(HcomReceive)
 /**
@@ -219,6 +238,15 @@ REG_OP(HcomRemoteRead)
    .REQUIRED_ATTR(dtype, Type)
    .OP_END_FACTORY_REG(HcomRemoteRead)
 /**
 * @brief Performs Remote Ref Read of input tensors
 * @par Inputs:
 * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
 * cache_var: The local base address
 * local_offset: Skip step length
 * @par Outputs:
 * cache_var: The local base address
 */
 REG_OP(HcomRemoteRefRead)
    .INPUT(remote, TensorType({DT_UINT64}))
    .INPUT(cache_var, TensorType({DT_UINT64}))
@@ -239,6 +267,13 @@ REG_OP(HcomRemoteWrite)
    .INPUT(local, TensorType::ALL())
    .OP_END_FACTORY_REG(HcomRemoteWrite)
 /**
 * @brief Performs Remote Write of input tensors
 * @par Inputs:
 * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
 * @par Inputs:
 * local: A Tensor. whose value is length / size_of(Type)
 */
 REG_OP(HcomRemoteScatterWrite)
    .INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
    .INPUT(local, TensorType::ALL())
--- a/third_party/fwkacllib/inc/ops/hvd_ops.h
+++ b/third_party/fwkacllib/inc/ops/hvd_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -31,11 +31,12 @@ namespace ge {
 *@par Inputs:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
 interpretted as channels, and must be three. Inputs include:
 *@li images:A Tensor of type float. Images to adjust. At least 3-D.
 *@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
 must be NHWC.
 *@li delta:A Tensor of type float. A float delta to add to the hue . \n
 *@par Outputs:
 *y:A Tensor of type float . \n
 *y:A Tensor of type float. The format must be NHWC. \n
 *@attention Constraints:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -57,11 +58,12 @@ REG_OP(AdjustHue)
 *@par Inputs:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
 interpretted as channels, and must be three. Inputs include:
 *@li images:A Tensor of type float. Images to adjust. At least 3-D.
 *@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
 must be NHWC.
 *@li scale:A Tensor of type float. A float scale to add to the saturation . \n
 *@par Outputs:
 *y:A Tensor of type float . \n
 *y:A Tensor of type float. The format must be NHWC. \n
 *@attention Constraints:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -83,11 +85,12 @@ REG_OP(AdjustSaturation)
 *@par Inputs:
 *Input images is a tensor of at least 3 dimensions. The last 3 dimensions are
 interpreted as '[height, width, channels]'. Inputs include:
 *@li images:A Tensor of type float. Images to adjust. At least 3-D.
 *@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
 must be NHWC.
 *@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n
 *@par Outputs:
 *y:A Tensor of type float . \n
 *y:A Tensor of type float. The format must be NHWC. \n
 *@attention Constraints:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -112,7 +115,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n
 *Input images must be a 4-D tensor. Inputs include:
 *@li images:A Tensor. Must be one of the following types:uint8, uint16, int8,
 int16, int32, int64, float16, float, double. A 4-D tensor of shape
 [batch, image_height, image_width, depth].
 [batch, image_height, image_width, depth]. The format must be NHWC.
 *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
 *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with
 int32 values in [0, batch).
@@ -127,7 +130,7 @@ extrapolation, when applicable.
 NearestNeighbor . \n
 *@par Outputs:
 *y:A Tensor of type float . \n
 *y:A Tensor of type float. The format must be NHWC. \n
 *@attention Constraints:
 *Input images must be a 4-D tensor . \n
@@ -193,7 +196,9 @@ boxes tensor . \n
 *@par Inputs:
 *Input images and grads must be a 4-D tensor. Inputs include:
 *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
 The format must be NHWC.
 *@li images: A 4-D tensor of shape [batch, image_height, image_width, depth].
 The format must be NHWC.
 Both image_height and image_width need to be positive.
 *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor
 specifies the coordinates of a box in the box_ind[i] image and is specified in
@@ -233,6 +238,7 @@ images tensor . \n
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
 *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
 The format must be NHWC.
 *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor
 specifies the coordinates of a box in the box_ind[i] image and is specified
 in normalized coordinates [y1, x1, y2, x2].
@@ -248,7 +254,8 @@ method: A string specifying the interpolation method. Only 'bilinear' is
 supported for now . \n
 *@par Outputs:
 *y:A 4-D tensor of shape [batch, image_height, image_width, depth] . \n
 *y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format
 must be NHWC. \n
 *@attention Constraints:
 *Input grads must be a 4-D tensor . \n
@@ -273,6 +280,7 @@ REG_OP(CropAndResizeGradImage)
 *@par Inputs:
 *Input x must be a 4-D tensor. Inputs include:
 *@li x: A 4-D float tensor of shape [batch_size, height, width, channels].
 The format must be NHWC.
 *@li size: A 1-D tensor of 2 elements containing the size of the glimpses to
 extract. The glimpse height must be specified first, following by the glimpse
 width.
@@ -293,7 +301,7 @@ uniform_noise . \n
 *@par Outputs:
 *y:A tensor representing the glimpses [batch_size, glimpse_height,
 glimpse_width, channels] . \n
 glimpse_width, channels]. The format must be NHWC. \n
 *@attention Constraints:
 *Input x must be a 4-D tensor . \n
@@ -340,7 +348,8 @@ REG_OP(HSVToRGB)
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li images: 4-D with shape [batch, height, width, channels].
 *@li images: 4-D with shape [batch, height, width, channels]. The format must
 be NHWC.
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
 size for the images.
 *@li min: A Tensor of type float.
@@ -354,6 +363,7 @@ the values at the corner pixels. Defaults to false.
 *@par Outputs:
 *@li resized_images: 4-D with shape [batch, new_height, new_width, channels].
 The format must be NHWC.
 *@li y_min: A Tensor of type float.
 *@li y_max: A Tensor of type float . \n
@@ -381,7 +391,8 @@ REG_OP(QuantizedResizeBilinear)
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li images: 4-D with shape [batch, height, width, channels].
 *@li images: 4-D with shape [batch, height, width, channels]. The format must
 be NHWC.
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width.
 The new size for the images . \n
@@ -391,7 +402,8 @@ output tensors are aligned, preserving the values at the corner pixels.
 Defaults to false . \n
 *@par Outputs:
 *y: 4-D with shape [batch, new_height, new_width, channels] . \n
 *y: 4-D with shape [batch, new_height, new_width, channels]. The format must
 be NHWC. \n
 *@attention Constraints:
 *Input images can be of different types but output images are always float . \n
@@ -414,10 +426,10 @@ REG_OP(ResizeArea)
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
 *@li grads: A Tensor of type float. 4-D with shape [batch, height, width,
 channels].
 channels]. The format must be NHWC.
 *@li original_image: A Tensor. Must be one of the following types: float,
 double. 4-D with shape [batch, orig_height, orig_width, channels], The image
 tensor that was resized . \n
 tensor that was resized. The format must be NHWC. \n
 *@par Attributes:
 *@li align_corners: An optional bool. Defaults to False. If true, the centers
@@ -426,10 +438,10 @@ false.
 *@li half_pixel_centers: An optional bool. Defaults to False . \n
 *@par Outputs:
 *y: A Tensor. Has the same type as original_image . \n
 *y: A Tensor. Has the same type as original_image. The format must be NHWC. \n
 *@attention Constraints:
 *Input images can be of different types but output images are always float . \n
 *Input images can be of different types but output images are always float .
 *@par Third-party framework compatibility
 *Compatible with tensorflow ResizeBicubicGrad operator.
@@ -448,7 +460,8 @@ REG_OP(ResizeBicubicGrad)
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li images: 4-D with shape [batch, height, width, channels].
 *@li images: 4-D with shape [batch, height, width, channels]. The format
 must be NHWC.
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
 size for the images . \n
@@ -459,10 +472,11 @@ Defaults to false.
 *@li half_pixel_centers: An optional bool. Defaults to False . \n
 *@par Outputs:
 *y: 4-D with shape [batch, new_height, new_width, channels] . \n
 *y: 4-D with shape [batch, new_height, new_width, channels]. The format
 must be NHWC. \n
 *@attention Constraints:
 *Input images can be of different types but output images are always float . \n
 *Input images can be of different types but output images are always float .
 *@par Third-party framework compatibility
 *Compatible with tensorflow ResizeBicubic operator.
@@ -483,7 +497,7 @@ REG_OP(ResizeBicubic)
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
 *@li grads: A Tensor. Must be one of the following types: uint8, int8, int32,
 float16, float, double. 4-D with shape [batch, height, width, channels].
 float16, float, double. Must set the format, supported format list ["NCHW, NHWC"]
 *@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width.
 The original input size . \n
@@ -550,9 +564,8 @@ REG_OP(ResizeNearestNeighborV2GradD)
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
 *@li grads: A Tensor of type float32. 4-D with shape [batch, height, width,
 channels].
 *@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width,
 *@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"]
 *@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"]
 channels], The image tensor that was resized . \n
 *@par Attributes:
@@ -583,7 +596,7 @@ REG_OP(ResizeBilinearV2Grad)
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li x: 4-D with shape [batch, height, width, channels].
 *@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
 size for the images . \n
@@ -639,6 +652,62 @@ REG_OP(RGBToHSV)
 /**
 *@brief Generate a single randomly distorted bounding box for an image . \n
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li image_size: 1-D, containing [height, width, channels].
 *@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
 boxes associated with the image. \n
 *@par Attributes:
 *@li seed: If either seed or seed2 are set to non-zero, the random number
 generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2: A second seed to avoid seed collision.
 *@li min_object_covered: The cropped area of the image must contain at least
 this fraction of any bounding box supplied. The value of this parameter should
 be non-negative. In the case of 0, the cropped area does not need to overlap
 any of the bounding boxes supplied .
 *@li aspect_ratio_range: The cropped area of the image must have an aspect
 ratio = width / height within this range.
 *@li max_attempts: Number of attempts at generating a cropped region of the
 image of the specified constraints. After max_attempts failures, return the
 entire image.
 *@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes
 supplied. If true, assume an implicit bounding box covering the whole input.
 If false, raise an error . \n
 *@par Outputs:
 *@li begin: 1-D, containing [offset_height, offset_width, 0].
 *@li size: 1-D, containing [target_height, target_width, -1].
 *@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n
 *@attention Constraints:
 *Input images can be of different types but output images are always float . \n
 *@par Third-party framework compatibility
 *Compatible with tensorflow SampleDistortedBoundingBox operator.
 */
 REG_OP(SampleDistortedBoundingBox)
    .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
        DT_INT32, DT_INT64 }))
    .INPUT(bounding_boxes, TensorType({ DT_FLOAT }))
    .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
        DT_INT32, DT_INT64 }))
    .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
        DT_INT32, DT_INT64 }))
    .OUTPUT(bboxes, TensorType({ DT_FLOAT }))
    .ATTR(seed, Int, 0)
    .ATTR(seed2, Int, 0)
    .ATTR(min_object_covered, Float, 0.1f)
    .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f })
    .ATTR(area_range, ListFloat, { 0.05f, 1.0f })
    .ATTR(max_attempts, Int, 100)
    .ATTR(use_image_if_no_bounding_boxes, Bool, false)
    .OP_END_FACTORY_REG(SampleDistortedBoundingBox)
 /**
 *@brief Generate a single randomly distorted bounding box for an image . \n
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li image_size: 1-D, containing [height, width, channels].
@@ -697,7 +766,7 @@ REG_OP(SampleDistortedBoundingBoxExt2)
 *@par Inputs:
 *Input x must be a 4-D tensor. Inputs include:
 *@li x: 4-D with shape [batch, height, width, channels].
 *@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"].
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width.
 The new size for the images . \n
@@ -729,12 +798,12 @@ REG_OP(ResizeNearestNeighborV2)
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li images: A Tensor. Must be one of the following types: float. 4-D with
 shape [batch, height, width, depth]. A batch of images.
 shape [batch, height, width, depth]. A batch of images. The format must be NHWC.
 *@li boxes: A Tensor of type float32. 3-D with shape [batch,
 num_bounding_boxes, 4] containing bounding boxes . \n
 *@par Outputs:
 *A Tensor. Has the same type as images . \n
 *A Tensor. Has the same type as images. The format must be NHWC. \n
 *@attention Constraints:
 *Input images must be a 4-D tensor . \n
@@ -1342,6 +1411,295 @@ REG_OP(SpatialTransformerD)
    .ATTR(use_default_theta, ListBool, {})
    .OP_END_FACTORY_REG(SpatialTransformerD)
 }  // namespace ge
 /**
 * @brief Resize the input tensor. \n
 currently, only support resize image tensor using nearest neighbor and linear interpolation.
 * @par Inputs:
 * Input x must be a 4-D tensor. Inputs include: \n
 * @li x: A Tensor. Must be one of the following types: uint8, int8, int16, \n
 int32, int64, float16, float, double. 4-D with shape [batch, height, width, channels] \n
 or shape [batch, channels, height, width].
 * @li roi: A 1-D float Tensor. only takes effect when attr coordinate_transformation_mode \n
 is "tf_crop_and_resize"
 * @li scales: A 1-D float Tensor, the scale array along each dimension, Only one of \n
 'scales' and 'sizes' can be specified.
 * @li sizes: A 1-D int64 Tensor, The size of the output tensor. nly one of \n
 'scales' and 'sizes' can be specified.  If 'size' is specified, then set scales \n
 to empty data (zero shape) in this operator's input list.
 * @par Attributes:
 * @li coordinate_transformation_mode: String. Defaults to half_pixel. how to transform \n
 the coordinate in the resized tensor to the coordinate in the original tensor. \n
 other optional: pytorch_half_pixel, align_corners, asymmetric, tf_half_pixel_for_nn, \n
 tf_crop_and_resize.
 * @li cubic_coeff_a: Float. Defaults to -0.75, only used in cubic interpolation. \n
 other optional: -0.5
 * @li exclude_outside: Int. Defaults to 0, If set to 1, the weight of sampling \n
 locations outside the tensor will be set to 0 and the weight will be renormalized \n
 so that their sum is 1.0.
 * @li extrapolation_value: Float. Defaults to 0.0f. When coordinate_transformation_mode \n
 is "tf_crop_and_resize" and x_original is outside the range [0, length_original - 1], \n
 this value is used as the corresponding output value.
 * @li mode: String. Defaults to nearest. Three interpolation modes: nearest (default), \n
 linear and cubic.
 * @li nearest_mode: String. Defaults to round_prefer_floor. Four modes: round_prefer_floor, \n
 round_prefer_ceil, floor, ceil. Only used by nearest interpolation.
 * @par Outputs:
 * y: A Tensor. Has the same type as x.
 * @attention Constraints: \n
 * Input x must be a 4-D tensor.
 * @par Third-party framework compatibility
 * Compatible with tensorflow ResizeNearestNeighborV2 operator.
 */
 REG_OP(Resize)
    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
                                DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(scales, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(sizes, TensorType({DT_INT64}))
    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
                                DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .ATTR(coordinate_transformation_mode, String, "half_pixel")
    .ATTR(cubic_coeff_a, Float, -0.75)
    .ATTR(exclude_outside, Int, 0)
    .ATTR(extrapolation_value, Float, 0)
    .ATTR(mode, String, "nearest")
    .ATTR(nearest_mode, String, "round_prefer_floor")
    .OP_END_FACTORY_REG(Resize)
 /**
 *@brief Function parse image from string to int. \n
 *@par Inputs:
 *@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
 *@par Attributes:
 *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
 *@li ratio: An optional int. Defaults to 1. Downscaling ratio.
 *@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes
 *@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input.
 *@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted.
 *@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n
 *@par Outputs:
 *image: A Tensor dtype of uint8.
 */
 REG_OP(DecodeJpeg)
    .INPUT(contents, TensorType({DT_STRING}))
    .OUTPUT(image, TensorType({DT_UINT8}))
    .ATTR(channels, Int, 0)
    .ATTR(ratio, Int, 1)
    .ATTR(fancy_upscaling, Bool, true)
    .ATTR(try_recover_truncated, Bool, false)
    .ATTR(acceptable_fraction, Float, 1.0)
    .ATTR(dct_method, String, "")
    .OP_END_FACTORY_REG(DecodeJpeg)
 /**
 *@brief Image warping using per-pixel flow vectors. \n
 *@par Inputs:
 *@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
 *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
 *@par Outputs:
 *y: Returns 4-D with the same shape and dtype as `images`. \n
 */
 REG_OP(DenseImageWarp)
    .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OP_END_FACTORY_REG(DenseImageWarp)
 /**
 *@brief Calculate the resize_d function. \n
 *@par Inputs:
 *One inputs, including:
 * @li x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 *@par Attributes:
 *@li sizes: An optional listInt. \n
 *@li scales: An optional listFloat.
    Defaults to none. \n
 *@li roi: An optional listInt.
    Defaults to none. \n
 *@li coordinate_transformation_mode: An optional String.
    Defaults to "half_pixel". \n
 *@li cubic_coeff_a: An optional float.
    Defaults to -0.75. \n
 *@li exclude_outside: An optional int.
    Defaults to 0. \n
 *@li extrapolation_value: An optional float.
    Defaults to 0.0. \n
 *@li mode: An optional String.
    Defaults to "nearest". \n
 *@li nearest_mode: An optional String.
    Defaults to "round_prefer_floor". \n
 *@par Outputs:
 *y: A Tensor with the same type of x's,
    shape depends on x and sizes. \n
 */
 REG_OP(ResizeD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(sizes, ListInt)
    .ATTR(scales, ListFloat, {})
    .ATTR(roi, ListInt, {})
    .ATTR(coordinate_transformation_mode, String, "half_pixel")
    .ATTR(cubic_coeff_a, Float, -0.75)
    .ATTR(exclude_outside, Int, 0)
    .ATTR(extrapolation_value, Float, 0.0)
    .ATTR(mode, String, "nearest")
    .ATTR(nearest_mode, String, "round_prefer_floor")
    .OP_END_FACTORY_REG(ResizeD)
 /**
 *@brief Calculate the resize_grad_d function. \n
 *@par Inputs:
 *One inputs, including:
 * @li grads: A tensor. Must be one of the following types:
 *     float16, float32. \n
 *@par Attributes:
 *@li original_size: An optional listInt. \n
 *@li roi: An optional listInt.
    Defaults to none. \n
 *@li scales: An optional listFloat.
    Defaults to none. \n
 *@li coordinate_transformation_mode: An optional String.
    Defaults to "half_pixel". \n
 *@li cubic_coeff_a: An optional float.
    Defaults to -0.75. \n
 *@li exclude_outside: An optional int.
    Defaults to 0. \n
 *@li extrapolation_value: An optional float.
    Defaults to 0.0. \n
 *@li mode: An optional String.
    Defaults to "nearest". \n
 *@li nearest_mode: An optional String.
    Defaults to "round_prefer_floor". \n
 *@par Outputs:
 *y: A Tensor with the same type of x's,
    shape depends on x and sizes. \n
 */
 REG_OP(ResizeGradD)
    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(original_size, ListInt)
    .ATTR(roi, ListInt, {})
    .ATTR(scales, ListFloat, {})
    .ATTR(coordinate_transformation_mode, String, "half_pixel")
    .ATTR(cubic_coeff_a, Float, -0.75)
    .ATTR(exclude_outside, Int, 0)
    .ATTR(extrapolation_value, Float, 0.0)
    .ATTR(mode, String, "nearest")
    .ATTR(nearest_mode, String, "round_prefer_floor")
    .OP_END_FACTORY_REG(ResizeGradD)
 /**
 *@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n
 *@par Inputs:
 *@li grad: gradients with respect to DenseImageWarp output.
 *@li images: 4-D Tensor with shape `[batch, height, width, channels]`.
 *@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
 *@par Outputs:
 *grad_image: Returns 4-D with the same shape and dtype as `images`.
 *grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
 */
 REG_OP(DenseImageWarpGrad)
    .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OP_END_FACTORY_REG(DenseImageWarpGrad)
 /**
 *@brief This operation samples input X by using interpolation based on flow field grid,
 which is usually gennerated by affine_grid. The grid of shape [N, H, W, 2] is the concatenation of
 (x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of
 input data x and y is indexng the 3rd dimention (in height dimension), finally results is
 the interpolation value of 4 nearest corner points. The output tensor shape will be [N, C, H, W].
 *@par Inputs:
 *@li x: 4-D Tensor with shape `[batch, channels, height, width]`.
 *@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.
 *@par Attributes:
 *@li interpolation_mode: An optional string specifying the interpolation method. Only 'bilinear' is
 supported for now .
 *@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
 *@li align_corners: An optional bool. If "true", the centers of the corner
 pixels of the input and output tensors are aligned. Defaults to "false" .
 *@par Outputs:
 *y: Returns 4-D Tensor with the same dtype as `X`.
 *@par Third-party framework compatibility
 *Compatible with pytorch GridSampler2D operator.
 */
 REG_OP(GridSampler2D)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(interpolation_mode, String, "bilinear")
    .ATTR(padding_mode, String, "zeros")
    .ATTR(align_corners, Bool, false)
    .OP_END_FACTORY_REG(GridSampler2D)
 /**
 *@brief This operation unnormalize input Grid, which is usually gennerated by affine_grid.
 *@par Inputs:
 *@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.
 *@li assist: Assist matrix, a 4-D tensor of type float16.
 *@par Attributes:
 *@li align_corners: An optional bool. If "true", the centers of the corner
 pixels of the input and output tensors are aligned. Defaults to "false" .
 *@par Outputs:
 *diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
 *position: Returns 4-D Tensor with the same shape as `grid`.
 */
 REG_OP(GridUnnormal)
    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(assist, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(diff, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(position, TensorType({DT_INT32}))
    .ATTR(align_corners, Bool, false)
    .OP_END_FACTORY_REG(GridUnnormal)
 /**
 *@brief This operation unfold input X based on unnormalized grid, which is gennerated by GridUnnormal.
 *@par Inputs:
 *@li x: 4-D Tensor with shape `[batch, channels, height, width]`.
 *@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`.
 *@par Attributes:
 *@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
 *@par Outputs:
 *y: Returns 4-D Tensor with the same dtype as `x`.
 */
 REG_OP(ImageUnfold)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(position, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(padding_mode, String, "zeros")
    .OP_END_FACTORY_REG(ImageUnfold)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_
--- a/third_party/fwkacllib/inc/ops/internal_ops.h
+++ b/third_party/fwkacllib/inc/ops/internal_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/linalg_ops.h
+++ b/third_party/fwkacllib/inc/ops/linalg_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -61,8 +61,8 @@ REG_OP(CholeskyGrad)
 *@par Inputs:
 *The input x has to be symmetric and positive definite.Inputs include:
 *x:A Tensor. Must be one of the following types: double, float32. Shape
 is [..., M, M] . \n
 *x:A Tensor. Must be one of the following types: double, float32, float16,
 complex64, complex128. Shape is [..., M, M] . \n
 *@par Outputs:
 *y:A Tensor. Has the same type as x . \n
@@ -76,8 +76,10 @@ form square matrices.
 */
 REG_OP(Cholesky)
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \
        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \
        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
    .OP_END_FACTORY_REG(Cholesky)
 /**
@@ -87,8 +89,8 @@ of one or more square matrices . \n
 *@par Inputs:
 *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
 *x:A Tensor. Must be one of the following types: double, float32. Shape is
 [..., M, M] . \n
 *x:A Tensor. Must be one of the following types: double, float32,
 complex64, complex128. Shape is [..., M, M] . \n
 *@par Outputs:
 *@li y:A Tensor. Has the same type as x.
@@ -103,9 +105,9 @@ form square matrices. \n
 */
 REG_OP(LogMatrixDeterminant)
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OP_END_FACTORY_REG(LogMatrixDeterminant)
 /**
@@ -114,8 +116,8 @@ REG_OP(LogMatrixDeterminant)
 *@par Inputs:
 *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
 *x:A Tensor. Must be one of the following types: double, float32. Shape is
 [..., M, M] . \n
 *x:A Tensor. Must be one of the following types: double, float32, complex64,
 complex128. Shape is [..., M, M] . \n
 *@par Outputs:
 *y:A Tensor. Has the same type as x . \n
@@ -129,8 +131,8 @@ form square matrices.
 */
 REG_OP(MatrixDeterminant)
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OP_END_FACTORY_REG(MatrixDeterminant)
 /**
@@ -140,8 +142,7 @@ their adjoints (conjugate transposes) . \n
 *@par Inputs:
 *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
 *x:A Tensor. Must be one of the following types: double, float. Shape is
 [..., M, M] . \n
 *x:A Tensor of input. Shape is [..., M, M] . \n
 *@par Attributes:
 *adjoint:An optional bool. Defaults to False.Boolean indicating whether to
@@ -159,8 +160,8 @@ form square matrices.  \n
 */
 REG_OP(MatrixInverse)
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .ATTR(adjoint, Bool, false)
    .OP_END_FACTORY_REG(MatrixInverse)
@@ -169,8 +170,7 @@ REG_OP(MatrixInverse)
 *@par Inputs:
 *The input rhs must have the same type as matrix. Inputs include:
 *@li matrix:A Tensor. Must be one of the following types: double, float.
 Shape is [..., M, M].
 *@li matrix:A Tensor of input. Shape is [..., M, M].
 *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n
 *@par Attributes:
@@ -189,9 +189,9 @@ dimensions form square matrices.  \n
 */
 REG_OP(MatrixSolve)
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .ATTR(adjoint, Bool, false)
    .OP_END_FACTORY_REG(MatrixSolve)
@@ -221,8 +221,8 @@ dimensions form square matrices.  \n
 */
 REG_OP(MatrixSolveLs)
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .INPUT(l2, TensorType({DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .ATTR(fast, Bool, true)
@@ -234,8 +234,7 @@ matrices by backsubstitution . \n
 *@par Inputs:
 *The input rhs must have the same type as matrix. Inputs include:
 *@li matrix: A Tensor. Must be one of the following types: double, float.
 Shape is [..., M, M].
 *@li matrix: A Tensor. Shape is [..., M, M].
 *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n
 *@par Attributes:
@@ -256,9 +255,9 @@ dimensions form square matrices.  \n
 */
 REG_OP(MatrixTriangularSolve)
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .ATTR(lower, Bool, true)
    .ATTR(adjoint, Bool, false)
    .OP_END_FACTORY_REG(MatrixTriangularSolve)
@@ -268,8 +267,7 @@ REG_OP(MatrixTriangularSolve)
 *@par Inputs:
 *The input shape of x must be [..., M, N]. Inputs include:
 *x:A Tensor whose shape is [..., M, N]. Must be one of the following types:
 double, float . \n
 *x:A Tensor whose shape is [..., M, N]. \n
 *@par Attributes:
 *full_matrices: An optional bool. Defaults to False. If true, compute
@@ -289,9 +287,12 @@ dimensions form matrices of size [M, N].  \n
 */
 REG_OP(Qr)
    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
        DT_COMPLEX64, DT_COMPLEX128 }))
    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
        DT_COMPLEX64, DT_COMPLEX128 }))
    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
        DT_COMPLEX64, DT_COMPLEX128 }))
    .ATTR(full_matrices, Bool, false)
    .OP_END_FACTORY_REG(Qr)
@@ -384,8 +385,8 @@ of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n
 */
 REG_OP(Lu)
    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(p, TensorType({DT_INT32, DT_INT64}))
    .REQUIRED_ATTR(output_idx_type, Type)
    .OP_END_FACTORY_REG(Lu)
@@ -404,8 +405,8 @@ y: Shape is `[..., M, M]` . \n
 */
 REG_OP(MatrixSquareRoot)
    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OP_END_FACTORY_REG(MatrixSquareRoot)
 /**
@@ -424,9 +425,9 @@ y: Tensor of shape `[..., M, K]` containing the solutions \n
 */
 REG_OP(TridiagonalSolve)
    .INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .ATTR(partial_pivoting, Bool, true)
    .OP_END_FACTORY_REG(TridiagonalSolve)
--- a/third_party/fwkacllib/inc/ops/list_ops.h
+++ b/third_party/fwkacllib/inc/ops/list_ops.h
@@ -0,0 +1,504 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /*!
 * \file list_ops.h
 * \brief
 */
 #ifndef OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
 #define OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
 #include <algorithm>
 #include "graph/operator_reg.h"
 #include "graph/operator.h"
 namespace ge {
 /**
 *@brief Creates and returns an empty tensor list. \n
 *@par Inputs:
 *@li element_shape: A shape compatible with that of elements in the list.
 *@li max_num_elements: The maximum number of elements. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li handle: An empty tensor list . \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow EmptyTensorList operator.
 */
 REG_OP(EmptyTensorList)
    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
    .INPUT(max_num_elements, TensorType({DT_INT32}))
    .OUTPUT(handle, TensorType({DT_VARIANT}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(EmptyTensorList)
 /**
 *@brief Returns a list which has the passed-in `Tensor` as last element
 and the other elements of the given list in `input_handle`. \n
 *@par Inputs:
 *@li input_handle: The old list.
 *@li tensor: The tensor to put on the list. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li output_handle:A list with the elements of old list followed by tensor. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListPushBack operator.
 */
 REG_OP(TensorListPushBack)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListPushBack)
 /**
 *@brief The last element of the input list as well as a
 list with all but that element. \n
 *@par Inputs:
 *@li input_handle: The input list.
 *@li element_shape: A shape compatible with that of elements in the list. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li output_handle:A list with the elements of the old list followed by tensor.
 *@li tensor:The withdrawn last element of the list. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListPopBack operator.
 */
 REG_OP(TensorListPopBack)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .INPUT(element_shape, TensorType({DT_INT32}))
    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
    .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListPopBack)
 /**
 *@brief The number of tensors in the input tensor list. \n
 *@par Inputs:
 *@li input_handle: The input list. \n
 *@par Outputs:
 *@li length:The number of tensors in the list. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListLength operator.
 */
 REG_OP(TensorListLength)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .OUTPUT(length, TensorType({DT_INT32}))
    .OP_END_FACTORY_REG(TensorListLength)
 /**
 *@brief The shape of elements in the input tensor list. \n
 *@par Inputs:
 *@li input_handle: The input list. \n
 *@par Attributes:
 *@li shape_type: The type of shape in the list. \n
 *@par Outputs:
 *@li element_shape:A shape compatible with that of elements in the list. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListElementShape operator.
 */
 REG_OP(TensorListElementShape)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .OUTPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
    .ATTR(shape_type, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListElementShape)
 /**
 *@brief List of the given size with empty elements. \n
 *@par Inputs:
 *@li element_shape: A shape compatible with that of elements in the list.
 *@li num_elements: The number of elements to reserve. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list.
 *@li shape_type: The type of shape in the list. \n
 *@par Outputs:
 *@li handle: An output tensor list . \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListReserve operator.
 */
 REG_OP(TensorListReserve)
    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
    .INPUT(num_elements, TensorType({DT_INT32}))
    .OUTPUT(handle, TensorType({DT_VARIANT}))
    .ATTR(element_dtype, Type, DT_INT32)
    .ATTR(shape_type, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListReserve)
 /**
 *@brief Get input tensor list elements of index position. \n
 *@par Inputs:
 *@li input_handle: The input list.
 *@li index: A tensor of position.
 *@li element_shape: A shape compatible with that of elements in the list. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li item: An output tensor value of index position . \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListGetItem operator.
 */
 REG_OP(TensorListGetItem)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .INPUT(index, TensorType({DT_INT32}))
    .INPUT(element_shape, TensorType({DT_INT32}))
    .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListGetItem)
 /**
 *@brief Sets the index-th position of the list to contain the given tensor. \n
 *@par Inputs:
 *@li input_handle: The input list.
 *@li index: The position in the list to which the tensor will be assigned.
 *@li item: The element to be assigned to that position. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li output_handle: An output tensor list . \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListSetItem operator.
 */
 REG_OP(TensorListSetItem)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .INPUT(index, TensorType({DT_INT32}))
    .INPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListSetItem)
 /**
 *@brief Push tensor to list. \n
 *@par Inputs:
 *@li input_handles: The input tensor lists.
 *@li tensor: The tensor push into tensor list. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li output_handles: The output tensor lists. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListPushBackBatch operator.
 */
 REG_OP(TensorListPushBackBatch)
    .INPUT(input_handles, TensorType({DT_VARIANT}))
    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .OUTPUT(output_handles, TensorType({DT_VARIANT}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListPushBackBatch)
 /**
 *@brief Stacks all tensors in the list. \n
 *@par Inputs:
 *@li input_handle: The input tensor list.
 *@li element_shape: A shape compatible with that of elements in the tensor. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list.
 *@li num_elements: The number of elements in the list. \n
 *@par Outputs:
 *@li tensor: The tensor of list. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListStack operator.
 */
 REG_OP(TensorListStack)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .INPUT(element_shape, TensorType({DT_INT32}))
    .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .ATTR(element_dtype, Type, DT_INT32)
    .ATTR(num_elements, Int, -1)
    .OP_END_FACTORY_REG(TensorListStack)
 /**
 *@brief Concats all tensors in the list along the 0th dimension.
 Requires that all tensors have the same shape except the first dimension. \n
 *@par Inputs:
 *@li input_handle: The input list.
 *@li element_shape: The shape of the uninitialized elements in the list.
 If the first dimension is not -1, it is assumed that all list elements have
 the same leading dim.
 *@li leading_dims: The list of leading dims of uninitialized list elements. Used if
 the leading dim of input_handle.element_shape or the element_shape input arg
 is not already set. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li tensor: The concated result.
 *@li lengths: Output tensor containing sizes of the 0th dimension of tensors
 in the list, used for computing the gradient. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListConcatV2 operator.
 */
 REG_OP(TensorListConcatV2)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
    .INPUT(leading_dims, TensorType({DT_INT64}))
    .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .OUTPUT(lengths, TensorType({DT_INT64}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListConcatV2)
 /**
 *@brief Splits a tensor into a list. \n
 *@par Inputs:
 *@li tensor: The input tensor.
 *@li element_shape: A shape compatible with that of elements in the tensor.
 *@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li output_handle: The list. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListSplit operator.
 */
 REG_OP(TensorListSplit)
    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
    .INPUT(lengths, TensorType({DT_INT64}))
    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListSplit)
 /**
 *@brief Creates a TensorList which, when stacked, has the value of `tensor`. \n
 *@par Inputs:
 *@li tensor: The input tensor.
 *@li element_shape: The shape of elements in the list. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li output_handle: An output tensor list . \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListFromTensor operator.
 */
 REG_OP(TensorListFromTensor)
    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListFromTensor)
 /**
 *@brief Resizes the list. \n
 *@par Inputs:
 *@li input_handle: The input tensor list.
 *@li size: size of the output list. \n
 *@par Outputs:
 *@li output_handle: The output tensor list. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListResize operator.
 */
 REG_OP(TensorListResize)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .INPUT(size, TensorType({DT_INT32}))
    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
    .OP_END_FACTORY_REG(TensorListResize)
 /**
 *@brief Creates a Tensor by indexing into the TensorList. \n
 *@par Inputs:
 *@li input_handle: The input tensor list.
 *@li indices: The indices used to index into the list.
 *@li element_shape: The shape of elements in the list. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li values: The tensor. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListGather operator.
 */
 REG_OP(TensorListGather)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .INPUT(indices, TensorType({DT_INT32}))
    .INPUT(element_shape, TensorType({DT_INT32}))
    .OUTPUT(values, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListGather)
 /**
 *@brief Creates a TensorList by indexing into a Tensor. \n
 *@par Inputs:
 *@li tensor: The input tensor.
 *@li indices: The indices used to index into the list.
 *@li element_shape: The shape of the elements in the list (can be less specified than
 the shape of the tensor).
 *@li num_elements: The size of the output list. Must be large enough to accommodate
 the largest index in indices. If -1, the list is just large enough to include
 the largest index in indices. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li output_handle: The TensorList. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListScatterV2 operator.
 */
 REG_OP(TensorListScatterV2)
    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .INPUT(indices, TensorType({DT_INT32}))
    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
    .INPUT(num_elements, TensorType({DT_INT32}))
    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListScatterV2)
 /**
 *@brief Scatters tensor at indices in an input list. \n
 *@par Inputs:
 *@li input_handle: The input tensor list.
 *@li tensor: The input tensor.
 *@li indices: The indices used to index into the list. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li output_handle: The TensorList. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListScatterIntoExistingList operator.
 */
 REG_OP(TensorListScatterIntoExistingList)
    .INPUT(input_handle, TensorType({DT_VARIANT}))
    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .INPUT(indices, TensorType({DT_INT32}))
    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListScatterIntoExistingList)
 /**
 *@brief Concat two tensor lists to a new tensor list. \n
 *@par Inputs:
 *@li input_a: The input tensor list A.
 *@li input_b: The input tensor list B. \n
 *@par Attributes:
 *@li element_dtype: The type of elements in the list. \n
 *@par Outputs:
 *@li output: The output list. \n
 *@par Third-party framework compatibility.
 *Compatible with tensorflow TensorListConcatLists operator.
 */
 REG_OP(TensorListConcatLists)
    .INPUT(input_a, TensorType({DT_VARIANT}))
    .INPUT(input_b, TensorType({DT_VARIANT}))
    .OUTPUT(output, TensorType({DT_VARIANT}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListConcatLists)
 }   // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
--- a/third_party/fwkacllib/inc/ops/logging_ops.h
+++ b/third_party/fwkacllib/inc/ops/logging_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/lookup_ops.h
+++ b/third_party/fwkacllib/inc/ops/lookup_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -222,6 +222,24 @@ REG_OP(Bucketize)
    .REQUIRED_ATTR(boundaries, ListFloat)
    .OP_END_FACTORY_REG(Bucketize)
 /**
 *@brief Returns a new tensor with the truncated integer values of the elements of input. \n
 *@par Inputs:
 *One inputs, including:
 *   @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n
 *@par Outputs:
 *y: A tensor with the same type and shape of input_x \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Trunc. \n
 */
 REG_OP(Trunc)
    .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
    .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
    .OP_END_FACTORY_REG(Trunc)
 /**
 *@brief Computes the sum along sparse segments of a tensor . \n
@@ -365,6 +383,27 @@ REG_OP(GetNext)
    .ATTR(channel_name, String, "")
    .OP_END_FACTORY_REG(GetNext)
 /**
 *@brief Get dynamic dims after GetNext. \n
 *@par Inputs:
 *input: A nested structure of Tensor objects, from GetNext's output. \n
 *@par Attributes:
 *@li shape_info: GE shape_info for each inputs, -1 means unknow dim.
 *@li N: Inputs number. \n
 *@par Outputs:
 *dims: GE unknow dims, a vector of int64. \n
 */
 REG_OP(GetDynamicDims)
    .DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64}))
    .OUTPUT(dims, TensorType({DT_INT32, DT_INT64}))
    .REQUIRED_ATTR(shape_info, ListInt)
    .REQUIRED_ATTR(N, Int)
    .OP_END_FACTORY_REG(GetDynamicDims)
 /**
 *@brief End of sequence . \n
@@ -624,6 +663,7 @@ REG_OP(NLLLoss)
    .OUTPUT(y, TensorType({DT_FLOAT}))
    .OUTPUT(total_weight, TensorType({DT_FLOAT}))
    .ATTR(reduction, String, "mean")
    .ATTR(ignore_index, Int, -100)
    .OP_END_FACTORY_REG(NLLLoss)
 /**
@@ -653,6 +693,7 @@ REG_OP(NLLLossGrad)
    .INPUT(total_weight, TensorType({DT_FLOAT}))
    .OUTPUT(x_grad, TensorType({DT_FLOAT}))
    .ATTR(reduction, String, "mean")
    .ATTR(ignore_index, Int, -100)
    .OP_END_FACTORY_REG(NLLLossGrad)
 /**
@@ -710,6 +751,9 @@ REG_OP(IFMR)
 *@par Third-party framework compatibility
 *Compatible with mindspore
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(WtsARQ)
@@ -741,6 +785,9 @@ REG_OP(WtsARQ)
 *@par Third-party framework compatibility
 *Compatible with mindspore
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(ActsULQ)
@@ -768,6 +815,9 @@ REG_OP(ActsULQ)
 *@par Third-party framework compatibility
 *Compatible with mindspore
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(ActsULQInputGrad)
@@ -790,6 +840,9 @@ REG_OP(ActsULQInputGrad)
 *@par Third-party framework compatibility
 *Compatible with mindspore
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(ActULQClampMaxGrad)
@@ -812,6 +865,9 @@ REG_OP(ActULQClampMaxGrad)
 *@par Third-party framework compatibility
 *Compatible with mindspore
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(ActULQClampMinGrad)
@@ -821,6 +877,170 @@ REG_OP(ActULQClampMinGrad)
  .OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
  .OP_END_FACTORY_REG(ActULQClampMinGrad)
 /**
 * @brief Computes Lp norm.
 * @par Inputs:
 * @li x: An ND tensor of type float16, float32. \n
 *
 * @par Attributes:
 * @li p: Int, "inf" or "-inf", default value is 2.
 * @li axes: ListInt, {} means all axes will be computed.
 * @li keepdim: Bool, default is false.
 * @li epsilon: Float, default is 1e-12. \n
 * @par Outputs:
 * @li y: An ND tensor of type float16, float32. The shape of y is depending
 * on axes and keepdim. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator LpNorm.
 */
 REG_OP(LpNorm)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(p, Int, 2)
    .ATTR(axes, ListInt, {})
    .ATTR(keepdim, Bool, false)
    .ATTR(epsilon, Float, 1e-12)
    .OP_END_FACTORY_REG(LpNorm)
 /**
 * @brief get complex.
 * @par Inputs:
 * @li real: An ND tensor of type  float32. double
 * @li imag: An ND tensor of type  float32. double \n
 *
 * @par Outputs:
 * @li out: An ND tensor of type complex64, complex128 \n
 */
 REG_OP(Complex)
    .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
    .ATTR(Tout, Type, DT_COMPLEX64)
    .OP_END_FACTORY_REG(Complex)
 /**
 * @brief  deal complex.
 * @par Inputs:
 * @li input: An ND tensor of type complex64, complex128 \n
 *
 * @par Outputs:
 * @li output: An ND tensor of type float32. double \n
 */
 REG_OP(Imag)
    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
    .ATTR(Tout, Type, DT_FLOAT)
    .OP_END_FACTORY_REG(Imag)
 /**
 * @brief  deal complex.
 * @par Inputs:
 * @li input: An ND tensor of type complex64, complex128 \n
 *
 * @par Outputs:
 * @li output: An ND tensor of type float32. double \n
 */
 REG_OP(Angle)
    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
    .ATTR(Tout, Type, DT_FLOAT)
    .OP_END_FACTORY_REG(Angle)
 /**
 *@brief Computes the gradient of SoftMarginLossGrad. \n
 *@par Inputs:
 *Three inputs, including:
 * @li predict: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @li label: A tensor with same shape of predict. Must be one of the following types:
 *     float16, float32. \n
 * @li dout: A tensor with same shpae of predcit. Must be one of the following types:
 *     float16, float32. \n
 *@par Attributes:
 * @li reduction: Specifies the reduction to apply to the output:
 *     'none' | 'mean' | 'sum'. Default: 'mean'. \n
 *@par Outputs:
 * gradient: A Tensor with the same type of predict. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator SoftMarginLoss Backward. \n
 */
 REG_OP(SoftMarginLossGrad)
    .INPUT(predict, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(label, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(dout, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(gradient, TensorType({DT_FLOAT16,DT_FLOAT}))
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(SoftMarginLossGrad)
 /**
 *@brief Computes batched the p-norm distance between each pair of
 *the two collections of row vectors. \n
 *@par Inputs:
 *Two inputs, including:
 * @li x1: A tensor with shpae: BxPXM. Must be one of the following types:
 *     float16, float32. \n
 * @li x2: A tensor with shpae: BxRxM. Must be one of the following types:
 *     float16, float32. \n
 *@par Attributes:
 * @li p: An optional float >= 0 or inf. Defaults to 2.0. \n
 *@par Outputs:
 * y: A Tensor with the same type of x1's and with shape BxPxR. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Cdist. \n
 */
 REG_OP(Cdist)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(p, Float, 2.0)
    .OP_END_FACTORY_REG(Cdist)
 /**
 *@brief  Computes the grad of x1 in cdist. \n
 *@par Inputs:
 *Four inputs, including:
 * @li grad: Grad with shape BxPxR. Must be one of the following types:
 *     float16, float32. \n
 * @li x1: A tensor with shpae: BxPXM. Must be one of the following types:
 *     float16, float32. \n
 * @li x2: A tensor with shpae: BxRxM. Must be one of the following types:
 *     float16, float32. \n
 * @li cdist: Output tensor of cdist forward with shpae: BxPXR.
 *     Must be one of the following types: float16, float32. \n
 *@par Attributes:
 * @li p: An optional float >= 0 or inf. Defaults to 2.0. \n
 *@par Outputs:
 * y: A Tensor with the same type and shape of x1's. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Cdist Backward. \n
 */
 REG_OP(CdistGrad)
    .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(cdist, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
    .ATTR(p, Float, 2.0)
    .OP_END_FACTORY_REG(CdistGrad)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -38,8 +38,8 @@ namespace ge {
 * float32, int32. Has format [ND, NHWC] . \n
 *@par Attributes:
 *@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
 *@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
 *@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
 *@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
@@ -70,8 +70,8 @@ REG_OP(MatMul)
 * float32, int32. Has format [ND, NHWC] . \n
 *@par Attributes:
 *@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
 *@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
 *@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
 *@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
@@ -156,8 +156,8 @@ REG_OP(GEMM)
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
 *@par Attributes:
 *@li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
 *@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
 *@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
 *@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
 *@par Outputs:
 *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
@@ -175,6 +175,41 @@ REG_OP(BatchMatMul)
    .ATTR(adj_x2, Bool, false)
    .OP_END_FACTORY_REG(BatchMatMul)
 /**
 * @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
 * @par Inputs:
 * Three inputs, including:
 * @li x1: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
 * @li x2: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
 * @li bias: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
 * @par Attributes:
 * @li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
 * @li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
 * @par Outputs:
 * y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
 */
 REG_OP(BatchMatMulV2)
    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .ATTR(adj_x1, Bool, false)
    .ATTR(adj_x2, Bool, false)
    .OP_END_FACTORY_REG(BatchMatMulV2)
 /**
 *@brief Computes half the L2 norm of a tensor without the sqrt . \n
@@ -979,6 +1014,88 @@ REG_OP(MatrixDiagV2)
    .OUTPUT(output, TensorType::BasicType())
    .OP_END_FACTORY_REG(MatrixDiagV2)
 REG_OP(IndexAdd)
    .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
    .INPUT(indices, TensorType({DT_INT32}))
    .INPUT(updates, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
    .OUTPUT(var_out, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
    .ATTR(axis, Int, 0)
    .OP_END_FACTORY_REG(IndexAdd)
 /**
 *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n
 *@par Inputs:
 * Two inputs, including:
 *@li x: A Tensor. Must be one of the following types:
 *    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
 *@li diagonal:(int, optional) – the diagonal to consider。\n
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
 *@par Third-party framework compatibility
 * Compatible with the Pytorch operator Triu.
 */
 REG_OP(Triu)
    .INPUT(x, TensorType::BasicType())
    .ATTR(diagonal, Int, 0)
    .OUTPUT(y, TensorType::BasicType())
    .OP_END_FACTORY_REG(Triu)
 /**
 *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n
 *@par Inputs:
 * Two inputs, including:
 *@li x: A Tensor. Must be one of the following types:
 *    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
 *@li diagonal:(int, optional) – the diagonal to consider。\n
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
 *@par Third-party framework compatibility
 * Compatible with the Pytorch operator Tril.
 */
 REG_OP(Tril)
    .INPUT(x, TensorType::BasicType())
    .ATTR(diagonal, Int, 0)
    .OUTPUT(y, TensorType::BasicType())
    .OP_END_FACTORY_REG(Tril)
 /**
 *@brief Concatenates a list of N tensors along the first dimension.
 *@par Inputs:
 * Two inputs, including:
 * @li values: A list of Tensors. Must be one of the following types:  int32, float16, float32.
 *     Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
 *     It's a dynamic input.
 * @li shape: A Tensor of the same type as "x".
 * The final shape of the result. Should be equal to the shapes of any input
 * but with the number of input values in the first dimension . \n
 *@par Attributes:
 *equation: The subscripts for the Einstein summation. \n
 *tensor_size: tensor size of input \n
 *@par Outputs:
 *@li y: Sums the product of the elements of the input operands along dimensions specified
 using a notation based on the Einstein summation convention. \n
 *@attention Constraints:
 *Input tensor_size must be Int. \n
 *@par Third-party framework compatibility
 *Compatible with Pytorch einsum operator.
 */
 REG_OP(EinSum)
    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .REQUIRED_ATTR(equation, String)
    .REQUIRED_ATTR(tensor_size, Int)
    .OP_END_FACTORY_REG(EinSum)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -144,6 +144,64 @@ REG_OP(BatchNorm)
 /**
 *@brief Performs batch normalization . \n
 *@par Inputs:
 * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NDC1HWC0. Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NC1HWC0. Specifies the offset.
 *@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
 operation is used for training.
 *@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be
 5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
 if the operation is used for training . \n
 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
 *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
 *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
 *@par Outputs:
 * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
 *@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NDC1HWC0. Specifies the mean of "x".
 *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
 Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x".
 *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
 Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
 *@attention Constraints:
 *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
 then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
 *@par Third-party framework compatibility
 *@li Compatible with the TensorFlow operator fused_batch_norm.
 *@li Compatible with the TensorFlow operator fused_batch_norm_v2.
 */
 REG_OP(BatchNorm3D)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(scale, TensorType({DT_FLOAT}))
    .INPUT(offset, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
    .ATTR(epsilon, Float, 0.0001)
    .ATTR(data_format, String, "NCDHW")
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(BatchNorm3D)
 /**
 *@brief Performs batch normalization . \n
 *@par Inputs:
 * Five inputs, including: (NHWC or NCHW supported)
 *@li x: A 4D Tensor of type float16 or float32.
@@ -242,6 +300,52 @@ REG_OP(BatchNormGrad)
 /**
 *@brief Performs the backpropagation of BatchNorm . \n
 *@par Inputs:
 * Five inputs, including:
 *@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient.
 *@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0.
 *@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0.
 *@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm.
 *@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n
 *@par Attributes:
 *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
 *@li data_format: An optional string. Defaults to "NCDHW".
 *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n
 *@par Outputs:
 *@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
 *@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale".
 *@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset".
 *@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output.
 *@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n
 *@attention Constraints:
 * The preceding layer of this operator must be operator BatchNorm . \n
 *@see BatchNorm
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad.
 */
 REG_OP(BatchNorm3DGrad)
    .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(scale, TensorType({DT_FLOAT}))
    .INPUT(reserve_space_1, TensorType({DT_FLOAT}))
    .INPUT(reserve_space_2, TensorType({DT_FLOAT}))
    .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
    .OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
    .OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
    .OUTPUT(reserve_space_5, TensorType({DT_FLOAT}))
    .ATTR(epsilon, Float, 0.0001)
    .ATTR(data_format, String, "NCDHW")
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(BatchNorm3DGrad)
 /**
 *@brief Performs the backpropagation of BatchNorm . \n
 *@par Inputs:
 * Five inputs, including:
 *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient.
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -365,6 +365,25 @@ REG_OP(BiasAddGrad)
 * 4-D with shape [batch, out_height, out_width, out_channels]
 * or [batch, out_channels, out_height, out_width].
 * Gradients with respect to the output of the convolution.
 *\n
 *\n
 * The following are the supported data types and data formats:
 *@verbatim
    | Tensor    | out_bckprop | filter  | y
    ------------|-------------|---------|--------
    | Data Type | float16     | float16 | float16
    |           |-------------|---------|--------
    |           | float32     | float32 | float32
    |           |-------------|---------|--------
    |           | float64     | float64 | float64
    ------------|-------------|---------|--------
    | Format    | NCHW        | NCHW    | NCHW
    |           | NHWC        | HWCN    | NHWC
@endverbatim
 * For float32 and float64 type, the actual calculation on the chip is based on
 * float16.
 *\n
 *
 *@par Attributes:
 * Five attributes:
 * @li strides: A tuple/list of 4 integers. The stride of the sliding window
@@ -377,8 +396,53 @@ REG_OP(BiasAddGrad)
 * channels.
 * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
 * "NHWC". Specify the data format of the input and output data.
 *\n
 *\n
 * The following value range restrictions must be met:
 *@verbatim
    | Name             | Field    | Scope
    -------------------|----------|--------------
    | input_size       | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Filter           | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | out_backprop     | H*strideH| [1, 4096]
    |                  | W*strideW| [1, 4096]
    -------------------|----------|--------------
    | y(fmap)          | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Stride           | H        | [1, 63]
    |                  | W        | [1, 63]
    -------------------|----------|--------------
    | Padding          | Top      | [0, 255]
    |                  | Bottom   | [0, 255]
    |                  | Left     | [0, 255]
    |                  | Right    | [0, 255]
    -------------------|----------|--------------
    | Dilation         | H        | [1, 255]
    |                  | W        | [1, 255]
@endverbatim
 * In Ascend910, fmap or out_backprop's H and W not support 1 when
 * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
 * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
 *\n
 *
 *@par Outputs:
 * y: A Tensor. Has the same type as filter,and has same format as input_size.
 *\n
 *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
 *                           (dilation_h * (filter_height - 1) + 1))
 *                           / stride_h + 1
 *\n
 *     out_backprop_width = (fmap_width + pad_left + pad_right -
 *                          (dilation_w * (filter_width - 1) + 1))
 *                          / stride_w + 1
 *\n
 *
 *@par Third-party framework compatibility
 * Compatible with Tensorflow's conv2d_backprop_input
 */
@@ -454,6 +518,21 @@ REG_OP(Conv2DBackpropInputD)
 * @li bias: An optional tensor. Must have the same type as "y".
 * @li offset_w: An optional 1D tensor for quantized deconvolution.
 * Type is int8. Reserved.\n
 *\n
 *\n
 * The following are the supported data types and data formats:
 *@verbatim
    | Tensor    | x       | filter  | bias    | y
    ------------|---------|---------|---------|--------
    | Data Type | float16 | float16 | float16 | float16
    |           |---------|---------|---------|--------
    |           | int8    | int8    | int32   | int32
    ------------|---------|---------|---------|--------
    | Format    | NCHW    | NCHW    | ND      | NCHW
@endverbatim
 * For int8, a dequant or requant operator must be followed.
 *\n
 *
 *@par Attributes:
 * Six attributes:
 * @li strides: A tuple or list of 2 integers. The stride of the sliding window
@@ -467,9 +546,54 @@ REG_OP(Conv2DBackpropInputD)
 * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n
  Specify the data format of the input and output data.
 * @li offset_x: An optional integer for quantized deconvolution.
 * Defaults to "0".
 * The negative offset added to the input image for int8 type. Ensure offset_x
 * within the effective range of int8 [-128, 127]. Defaults to "0".
 *\n
 *\n
 * The following value range restrictions must be met:
 *@verbatim
    | Name             | Field    | Scope
    -------------------|----------|--------------
    | x (out_backprop) | H*strideH| [1, 4096]
    |                  | W*strideW| [1, 4096]
    -------------------|----------|--------------
    | Filter           | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | y (fmap)         | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Stride           | H        | [1, 63]
    |                  | W        | [1, 63]
    -------------------|----------|--------------
    | Padding          | Top      | [0, 255]
    |                  | Bottom   | [0, 255]
    |                  | Left     | [0, 255]
    |                  | Right    | [0, 255]
    -------------------|----------|--------------
    | Dilation         | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | Offset_x         |          | [-128, 127]
@endverbatim
 * In Ascend910, fmap or out_backprop's H and W not support 1 when
 * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
 * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
 *\n
 *
 *@par Outputs:
 * y: A Tensor. 4D tensor with shape [batch, channels, height, width].
 *\n
 *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
 *                           (dilation_h * (filter_height - 1) + 1))
 *                           / stride_h + 1
 *\n
 *     out_backprop_width = (fmap_width + pad_left + pad_right -
 *                          (dilation_w * (filter_width - 1) + 1))
 *                          / stride_w + 1
 *\n
 *
 * When type of x is float16, the type of y must be float16.
 * When type of x is int8, the type of y must be int32.
 */
@@ -502,6 +626,25 @@ REG_OP(Deconvolution)
 * [batch, out_height, out_width, out_channels] or [batch, out_channels,
 * out_height, out_width]. Gradients with respect to the output of the
 * convolution.
 *\n
 *\n
 * The following are the supported data types and data formats:
 *@verbatim
    | Tensor    | x       | out_backprop | y
    ------------|---------|--------------|---------
    | Data Type | float16 |    float16   | float16
    |           |---------|--------------|---------
    |           | float32 |    float32   | float32
    |           |---------|--------------|---------
    |           | float64 |    float64   | float64
    |-----------|---------|--------------|---------
    | Format    | NCHW    |     NCHW     | NCHW
    |           | NHWC    |     NHWC     | HWCN
@endverbatim
 * For float32 and float64 type of x and outbackprop, the actual calculation on the chip
 * is based on float16.
 *\n
 *
 *@par Attributes:
 * Five attributes:
 * @li strides: A tuple/list of 4 integers. The stride of the sliding window
@@ -514,8 +657,52 @@ REG_OP(Deconvolution)
 * channels.
 * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
 * "NHWC". Specify the data format of the input and output data.
 *\n
 *\n
 * The following value range restrictions must be met:
 *@verbatim
    | Name             | Field    | Scope
    -------------------|----------|--------------
    | x(fmap)          | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Filter Size      | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | out_backprop     | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | y                | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Stride           | H        | [1, 63]
    |                  | W        | [1, 63]
    -------------------|----------|--------------
    | Padding          | Top      | [0, 255]
    |                  | Bottom   | [0, 255]
    |                  | Left     | [0, 255]
    |                  | Right    | [0, 255]
    -------------------|----------|--------------
    | Dilation         | H        | [1, 255]
    |                  | W        | [1, 255]
@endverbatim
 * In Ascend910, out_backprop's H and W not support 1 when
 * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
 *\n
 *
 *@par Outputs:
 * y: A Tensor. Has the same type as x, has the same format as filter_size.
 *\n
 *     out_backprop_height = (in_height + pad_top + pad_bottom -
 *                           (dilation_h * (filter_height - 1) + 1))
 *                           / stride_h + 1
 *\n
 *     out_backprop_width = (in_width + pad_left + pad_right -
 *                          (dilation_w * (filter_width - 1) + 1))
 *                          / stride_w + 1
 *\n
 *
 *@par Third-party framework compatibility
 * Compatible with Tensorflow's conv2d_backprop_filter
 */
@@ -617,8 +804,7 @@ REG_OP(Conv2DBackpropFilterD)
 * (top, bottom, left, right) side of the input.
 *@li dilations: Optional. A list of 4 integers. The dilation factor for each
 * dimension of input. The dimension order is determined by the data format of
 * "x". The N and C dimensions must be set to 1. The H and W dimensions must be
 * set to 1 for int8 type. Defaults to [1, 1, 1, 1].
 * "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1].
 *@li groups: Optional. An integer of type int32. The number of blocked
 * connections from input channels to output channels. In_channels and
 * out_channels must both be divisible by "groups". Defaults to 1.
@@ -652,6 +838,8 @@ REG_OP(Conv2DBackpropFilterD)
    | Offset_x         |          | [-128, 127]
@endverbatim
 * The W dimension of the input image supports cases exceeding 4096, but it may
 * cause compilation errors.
 *\n
 *
 *@par Outputs:
@@ -666,21 +854,6 @@ REG_OP(Conv2DBackpropFilterD)
 *     out_width = (in_width + pad_left + pad_right -
 *                  (dilation_w * (filter_width - 1) + 1))
 *                 / stride_w + 1
 *
 *@attention Constraints:
 *@li The following restrictions on the output must be met:
 *@verbatim
    | Output  | Restrictions
    ----------|--------------------------------
    | H == 1  | H * W(input) == H * W(filter)
    | W == 1  |
    ----------|--------------------------------
    | H != 1  | W(input) == W(filter)
    | W == 1  | Only for Ascend310 Hi3796V300CS
@endverbatim
 * "H * W (input)" indicates the image size after padding and "H * W (filter)"
 * indicates the filter size after dilation."W(input)" and W(filter) indicate
 * the same rule on the W dimension.
 *\n
 *
 *@par Quantization supported or not
@@ -778,7 +951,7 @@ REG_OP(Conv2DCompress)
 * With the format "HWCN" , the data is stored in the order of: [filter_height,
 * filter_width, in_channels / groups, out_channels].
 *@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format
 * "NHWC", the data is stored in the order of: [batch, in_height, in_width,
 * "NHWC", the data is stored in the order of: [batch, out_height, out_width,
 * deformable_groups * filter_height * filter_width * 3].
 *@li bias: An optional 1D tensor of additive biases to the filter outputs.
 * The data is stored in the order of: [out_channels].
@@ -822,25 +995,12 @@ REG_OP(Conv2DCompress)
 *@verbatim
    | Name              | Field  | Scope
    --------------------|--------|----------------------------
    | Input Image Size  | H      | [1, 100000]
    |                   | W      | [1, 4096]
    --------------------|--------|----------------------------
    | Filter Size       | H      | [1, 255]
    |                   | W      | [1, 255]
    | Input Image Size  | H      | [1, 100000 / filter_height]
    |                   | W      | [1, 4096 / filter_width]
    --------------------|--------|----------------------------
    | Stride            | H      | [1, 63]
    | Filter Size       | H      | [1, 63]
    |                   | W      | [1, 63]
    --------------------|--------|----------------------------
    | Padding           | Top    | [0, 255]
    |                   | Bottom | [0, 255]
    |                   | Left   | [0, 255]
    |                   | Right  | [0, 255]
    ------------ -------|--------|----------------------------
    | Dilation          | H      | [1, 255]
    |                   | W      | [1, 255]
@endverbatim
 * "W(input)" indicate the image width after padding and W(filter) indicates the
 * filter width after dilation.
 *\n
 *
 *@par Outputs:
@@ -855,21 +1015,7 @@ REG_OP(Conv2DCompress)
 *     out_width = (in_width + pad_left + pad_right -
 *                  (dilation_w * (filter_width - 1) + 1))
 *                 / stride_w + 1
 *
 *@attention Constraints:
 *@li The following restrictions on the output must be met:
 *@verbatim
    | Output  | Restrictions
    ----------|--------------------------------
    | H == 1  | H * W(input) == H * W(filter)
    | W == 1  |
    ----------|--------------------------------
    | H != 1  | W(input) == W(filter)
    | W == 1  | Only for Ascend310 Hi3796V300CS
@endverbatim
 * "H * W(input)" indicates the image size after padding and "H * W(filter)"
 * indicates the filter size after dilation. "W(input)" and W(filter) indicate
 * the same rule on the W dimension.
 *\n
 *
 *@par Quantization supported or not
 *@li No
@@ -916,12 +1062,12 @@ REG_OP(DeformableConv2D)
 *@par Attributes:
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
 * @li dilations: A list of 5 integers. Specifies the dilation factor for each
 * dimension of "x", now only support [1,1,1,1,1]
 * The N and C dimensions must be 1. Has the same format as "x".
 * dimension of "x".
 * The N, C and D dimensions must be 1. Has the same format as "x".
 * @li offset_x: An optional int. Input offset, used for quantized inference.
 * Defaults to 0. Reserved . \n
@@ -967,8 +1113,8 @@ REG_OP(Conv3D)
 *@par Required Attributes:
 * @li strides: A list of 5 integers. Specifies the stride of the sliding window
 * for each dimension of "x".
 * The N and C dimensions must be 1. Has the same format as "x".
 * for each dimension of "out_backprop".
 * The N and C dimensions must be 1. Has the same format as "out_backprop".
 * @li pads: A list of 6 integers.
 * Supports only padding along the D, H and W dimensions in sequence of head,
 * tail, top, bottom, left and right . \n
@@ -976,14 +1122,15 @@ REG_OP(Conv3D)
 *@par Attributes:
 * Three attributes:
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
 * @li dilations: A tuple/list of 5 integers, The dilation factor for each
 * dimension of the input, now only support [1,1,1,1,1]
 * dimension of the input.
 * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
 *@par Outputs:
 * y: A Tensor. Has the same type as filter,and has same format as input_size
 * y: A Tensor. Has the same type as filter,and has same format as "input_size"
 *@par Third-party framework compatibility
 * Compatible with Tensorflow's conv3d_backprop_input
@@ -1011,8 +1158,8 @@ REG_OP(Conv3DBackpropInput)
 *@par Required Attributes:
 * @li strides: A list of 5 integers. Specifies the stride of the sliding window
 * for each dimension of "x".
 * The N and C dimensions must be 1. Has the same format as "x".
 * for each dimension of "out_backprop".
 * The N and C dimensions must be 1. Has the same format as "out_backprop".
 * @li pads: A list of 6 integers. Supports only padding along the D, H and W
 * dimensions in sequence of head, tail, top, bottom, left and right.
 * @li input_size: A tuple/list of type int32, int64. An integer vector
@@ -1023,13 +1170,14 @@ REG_OP(Conv3DBackpropInput)
 *@par Attributes:
 * Three attributes:
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
 * @li dilations: A tuple/list of 5 integers, The dilation factor for each
 * dimension of input, now only support [1,1,1,1,1]
 * dimension of input.
 * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
 *@par Outputs:
 * y: A Tensor. Has the same type and data format as out_backprop.
 * y: A Tensor. Has the same type and data format as "out_backprop".
 *@par Third-party framework compatibility
 * Compatible with Tensorflow's conv3d_backprop_input
@@ -1072,9 +1220,7 @@ REG_OP(Conv3DBackpropInputD)
 * @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n
 *@par Third-party framework compatibility:
 * Compatible with the Pytorch operator adds.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 * Compatible with the Caffe operator LSTM.
 */
 REG_OP(LSTM)
    .INPUT(x, TensorType({DT_FLOAT16}))
@@ -1121,14 +1267,15 @@ REG_OP(LSTM)
 *@par Attributes:
 * Three attributes:
 * @li dilations: A tuple/list of 5 integers, The dilation factor for each
 * dimension of input, now only support [1,1,1,1,1].
 * dimension of input.
 * The N, C and D dimensions must be 1. Has the same format as "x".
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
 *@par Outputs:
 * y: A Tensor that has the same type as x
 * y: A Tensor that has the same type as "x"
 * and the format is NDHWC, NCDHW or DHWCN.
 *@par Third-party framework compatibility
 * Compatible with Tensorflow's conv3d_backprop_filter
@@ -1172,9 +1319,10 @@ REG_OP(Conv3DBackpropFilter)
 *@par Attributes:
 * Three attributes:
 * @li dilations: A tuple/list of 5 integers, The dilation factor for each
 * dimension of input, now only support [1,1,1,1,1].
 * dimension of input.
 * The N, C and D dimensions must be 1. Has the same format as "x".
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
@@ -1224,15 +1372,16 @@ REG_OP(Conv3DBackpropFilterD)
 *@par Attributes:
 * Five attributes:
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li dilations: A tuple/list of 5 integers,
 * The dilation factor for each dimension of input, now only support [1,1,1,1,1]
 * The dilation factor for each dimension of input.
 * The N, C and D dimensions must be 1. Has the same format as "x".
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
 * @li output_padding: The size will be added in the output shape.
 * @li offset_x: Input offset_x value. Reserved.
 *@par Outputs:
 * y: A Tensor. Has the same type and format as x.
 * y: A Tensor. Has the same type and format as "x".
 */
 REG_OP(Conv3DTranspose)
    .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
@@ -1273,15 +1422,16 @@ REG_OP(Conv3DTranspose)
 *@par Attributes:
 * Five attributes:
 * @li dilations: A tuple/list of 5 integers, The dilation factor for each
 * dimension of input, now only support [1,1,1,1,1]
 * dimension of input.
 * The N, C and D dimensions must be 1. Has the same format as "x".
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
 * @li output_padding: The size will be added in the output shape.
 * @li offset_x: Input offset_x value. Reserved.
 *@par Outputs:
 * y: A Tensor. Has the same type and format as x.
 * y: A Tensor. Has the same type and format as "x".
 *@par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead.
 */
@@ -1316,6 +1466,22 @@ REG_OP(Conv3DTransposeD)
 * or [out_channels, in_channel, filter_height, filter_width].
 * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND".
 * @li offset_w: An optional 1D tensor for quantized inference. Reserved.
 *\n
 *\n
 * The following are the supported data types and data formats:
 *@verbatim
    | Tensor    | x       | filter  | bias    | y
    ------------|---------|---------|---------|--------
    | Data Type | float16 | float16 | float16 | float16
    |           |---------|---------|---------|--------
    |           | int8    | int8    | int32   | int32
    ------------|---------|---------|---------|--------
    | Format    | NCHW    | NCHW    | ND      | NCHW
    |           | NHWC    | HWCN    |         | NHWC
@endverbatim
 * For int8, a dequant or requant operator must be followed.
 *\n
 *
 *@par Required Attributes:
 * @li strides: A required tuple/list of 4 integers. The stride of the sliding
 * window for H/W dimension. The index of H/W is same as data_format.
@@ -1333,10 +1499,58 @@ REG_OP(Conv3DTransposeD)
 * @li output_padding: The size will be added in the output shape. Defaults
 * to [0, 0, 0, 0].
 * @li offset_x: An optional int. Input offset, used for quantized inference.
 * Defaults to "0".
 * The negative offset added to the input image for int8 type. Ensure offset_x
 * within the effective range of int8 [-128, 127]. Defaults to "0".
 *\n
 *\n
 * The following value range restrictions must be met:
 *@verbatim
    | Name             | Field    | Scope
    -------------------|----------|--------------
    | input_size       | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | x (out_backprop) | H*strideH| [1, 4096]
    |                  | W*strideW| [1, 4096]
    -------------------|----------|--------------
    | filter           | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | y (fmap)         | H        | [1, 4096]
    |                  | W        | [1, 4096]
    -------------------|----------|--------------
    | Stride           | H        | [1, 63]
    |                  | W        | [1, 63]
    -------------------|----------|--------------
    | Padding          | Top      | [0, 255]
    |                  | Bottom   | [0, 255]
    |                  | Left     | [0, 255]
    |                  | Right    | [0, 255]
    -------------------|----------|--------------
    | Dilation         | H        | [1, 255]
    |                  | W        | [1, 255]
    -------------------|----------|--------------
    | Offset_x         |          | [-128, 127]
@endverbatim
 * In Ascend910, fmap or out_backprop's H and W not support 1 when
 * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
 * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
 *\n
 *
 *@par Outputs:
 * y: A Tensor. A Tensor of type float16 or int32, and has same format as
 * input_size.
 *\n
 *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
 *                           (dilation_h * (filter_height - 1) + 1))
 *                           / stride_h + 1
 *\n
 *     out_backprop_width = (fmap_width + pad_left + pad_right -
 *                          (dilation_w * (filter_width - 1) + 1))
 *                          / stride_w + 1
 *\n
 *
 */
 REG_OP(Conv2DTranspose)
    .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
@@ -1405,21 +1619,22 @@ REG_OP(Conv2DTransposeD)
 /**
 *@brief Computes the deformed convolution output with the expected input
 *@par Inputs:
 * Four inputs:
 * Two inputs:
 * @li x: A Tensor of type float16,float32
 * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
 *@par Required Attributes:
 * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
 * height and width for H/W dimension.
 * @li pads: A tuple/list of 4 integers.Padding added to each dimension
 * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
 * of the input.
 * @li ksize: A tuple/list of 2 integers.kernel size.
 *@par Attributes:
 * Three attributes:
 * Four attributes:
 * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
 * of input.  Defaults to [1, 1, 1, 1]
 * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
 * @li deformable_groups: Specify the c-axis grouping number of input x.
 * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1
 *@par Outputs:
 * y: A Tensor. A Tensor of type float16, float32.
 */
@@ -1433,7 +1648,69 @@ REG_OP(DeformableOffsets)
    .ATTR(dilations, ListInt, {1, 1, 1, 1})
    .ATTR(data_format, String, "NCHW")
    .ATTR(deformable_groups, Int, 1)
    .ATTR(modulated, Bool, true)
    .OP_END_FACTORY_REG(DeformableOffsets)
 /**
 *@brief Computes the gradients of DeformableOffsets with respect to input and offsets
 *@par Inputs:
 * Three inputs:
 * @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output
 * @li x: A Tensor of type float16,float32.
 * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
 *@par Required Attributes:
 * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
 * height and width for H/W dimension.
 * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
 * of the input.
 * @li ksize: A tuple/list of 2 integers.kernel size.
 *@par Attributes:
 * Three attributes:
 * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
 * of input.  Defaults to [1, 1, 1, 1]
 * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
 * @li deformable_groups: Specify the c-axis grouping number of input x.
 * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1.
 *@par Outputs:
 * grad_x: A Tensor of type float16, float32. Gradients with respect to input_x
 * grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets
 */
 REG_OP(DeformableOffsetsGrad)
    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(grad_offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .REQUIRED_ATTR(ksize, ListInt)
    .ATTR(dilations, ListInt, {1, 1, 1, 1})
    .ATTR(data_format, String, "NCHW")
    .ATTR(deformable_groups, Int, 1)
    .ATTR(modulated, Bool, true)
    .OP_END_FACTORY_REG(DeformableOffsetsGrad)
 /**
 *@brief Computes the deformed dilation output with the expected input
 *@par Inputs:
 * One inputs:
 * @li x: A Tensor of type int8, float16, float32
 *@par Required Attributes:
 * @li dilations: A tuple/list of integers.
 *@par Attributes:
 * Two attributes:
 * @li padding_value: default value filling in blank
 * @li pads: A tuple/list of integers.
 *@par Outputs:
 * y: A Tensor. A Tensor of type int8, float16, float32.
 */
 REG_OP(Dilation)
    .INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(dilations, ListInt)
    .ATTR(pads, ListInt, {})
    .ATTR(padding_value, Float, 0.0)
    .OP_END_FACTORY_REG(Dilation)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -968,8 +968,9 @@ REG_OP(SPP)
 * Three inputs, including:
 *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
 * map.
 *@li rois: A tensor of type float16 or float32, with shape
 *@li rois: A tensor of type float16 or float32, with 3D shape
 * [batch, 5, roi_max_num], describing the RIOs.
 * roi_max_num must be less than or equal to 6000 and must be divided by 16.
 *@li roi_actual_num: A  optional tensor of type int32, with shape [batch, 8], specifying
 * the number of ROIs per batch . \n
@@ -1383,6 +1384,7 @@ REG_OP(DecodeWheelsTarget)
 *@attention Constraints:
 * Only computation of float16 data is supported.
 * Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory
 */
 REG_OP(BatchMultiClassNonMaxSuppression)
    .INPUT(boxes, TensorType({DT_FLOAT16}))
@@ -1485,7 +1487,10 @@ REG_OP(DecodeBboxV2)
 *
 *@par Outputs:
 * @li y1: A Tensor. Must have the same type as x.
 * @li y2: A Tensor. Indices of y1 in x.Dtype must be int32.
 * @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
 *
 *@attention Constraints:
 * The upper limit of data on the direction axis is 7040.
 */
 REG_OP(Sort)
    .INPUT(x, TensorType({ DT_FLOAT16 }))
@@ -1495,6 +1500,155 @@ REG_OP(Sort)
    .ATTR(descending, Bool, false)
    .OP_END_FACTORY_REG(Sort)
 REG_OP(PtIou)
    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(mode, String, "iou")
    .OP_END_FACTORY_REG(PtIou)
 /**
 *@brief Greedily selects a subset of bounding boxes in descending order of
 score . \n
 *@par Inputs:
 *Input boxes and  scores must be float16 type. Inputs include:
 *@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
 The single box data format is indicated by center_point_box.
 *@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
 *@li max_output_size: A scalar integer tensor representing the maximum number
 of boxes to be selected by non max suppression.
 *@li iou_threshold: A 0-D float tensor representing the threshold for deciding
 whether boxes overlap too much with respect to IOU.
 *@li score_threshold: A 0-D float tensor representing the threshold for
 deciding when to remove boxes based on score . \n
 *@par Attributes:
 *center_point_box:Integer indicate the format of the box data. 
 The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] 
 where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair 
 of box corners and the coordinates can be provided as normalized 
 (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
 1 - the box data is supplied as [x_center, y_center, width, height].
 Mostly used for Pytorch models. \n
 *@par Outputs:
 *@li selected_indices: A 2-D integer tensor of shape [M] representing the
 selected indices from the boxes tensor, where M <= max_output_size. \n
 *@attention Constraints:
 *Input boxes and  scores must be float16 type . \n
 *@par Third-party framework compatibility
 *Compatible with onnx NonMaxSuppression operator.
 */
 REG_OP(NonMaxSuppressionV6)
    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
    .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
    .OUTPUT(selected_indices, TensorType({DT_INT32}))
    .ATTR(center_point_box, Int, 0)
    .ATTR(max_boxes_size, Int, 0)
    .OP_END_FACTORY_REG(NonMaxSuppressionV6)
 /**
 *@brief Greedily selects a subset of bounding boxes in descending order of
 score . \n
 *@par Inputs:
 *Input boxes and  scores must be float16 type. Inputs include:
 *@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
 The single box data format is indicated by center_point_box.
 *@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
 *@li max_output_size: A scalar integer tensor representing the maximum number
 of boxes to be selected by non max suppression.
 *@li iou_threshold: A 0-D float tensor representing the threshold for deciding
 whether boxes overlap too much with respect to IOU.
 *@li score_threshold: A 0-D float tensor representing the threshold for
 deciding when to remove boxes based on score . \n
 *@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3]
 the last dim representing (batch_id,class_id,index_id)  . \n
 *@par Attributes:
 *center_point_box:Integer indicate the format of the box data. 
 The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] 
 where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair 
 of box corners and the coordinates can be provided as normalized 
 (i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
 1 - the box data is supplied as [x_center, y_center, width, height].
 Mostly used for Pytorch models. \n
 *@par Outputs:
 *@li selected_indices: A 2-D integer tensor of shape [M] representing the
 selected indices from the boxes tensor, where M <= max_output_size. \n
 *@attention Constraints:
 *Input boxes and  scores must be float16 type . \n
 *@par Third-party framework compatibility
 *Compatible with onnx NonMaxSuppression operator.
 */
 REG_OP(NonMaxSuppressionV7)
    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
    .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16}))
    .OUTPUT(selected_indices, TensorType({DT_INT32}))
    .ATTR(center_point_box, Int, 0)
    .ATTR(max_boxes_size, Int, 0)
    .OP_END_FACTORY_REG(NonMaxSuppressionV7)
 /**
 *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n
 *@par Inputs:
 * Three inputs, including:
 *@li features: A 5HD Tensor list of type float32 or float16.
 *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
 * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
 *@par Attributes:
 *@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois".
 *@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates.
 *@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features"
 * to the original image.
 *@li pooled_height: A optional attribute of type int32, specifying the H dimension.
 *@li pooled_width: A optional attribute of type int32, specifying the W dimension.
 *@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency
 * of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois",
 * which is a floating point number. Defaults to "0".
 *@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n
 *@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n
 *@par Outputs:
 * output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
 * The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height",
 * "pooled_width", and "features", respectively.
 *@par Third-party framework compatibility
 *Compatible with mmdetection SingleRoIExtractor operator.
 */
 REG_OP(RoiExtractor)
    .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(finest_scale, Int, 56)
    .ATTR(roi_scale_factor, Float, 0)
    .ATTR(spatial_scale, ListFloat, { 1.f/4, 1.f/8, 1.f/16, 1.f/32 })
    .ATTR(pooled_height, Int, 7)
    .ATTR(pooled_width, Int, 7)
    .ATTR(sample_num, Int, 0)
    .ATTR(pool_mode, String, "avg")
    .ATTR(aligned, Bool, true)
    .OP_END_FACTORY_REG(RoiExtractor)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -160,20 +160,20 @@ REG_OP(SigmoidCrossEntropyWithLogits)
    .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits)
 /**
 *@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n
 *@brief Computes the sigmoid cross entropy loss of "predict" and "target".
 *@par Inputs:
 * four inputs, including:
 *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
 *@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n
 *@li weight: An multi-dimensional Tensor, specifying the weight value. \n
 *@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value.
 *@li weight: An multi-dimensional Tensor, specifying the weight value.
 *@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n
 *@par Attributes:
 *reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n
 *reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean". \n
 *@par Outputs:
 *loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
 *loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict". \n
 *@par Third-party framework compatibility
 * Compatible with PyTorch operator BCEWithLogitsLoss.
@@ -427,6 +427,33 @@ REG_OP(MVN)
    .ATTR(eps, Float, 1e-9)
    .OP_END_FACTORY_REG(MVN)
 /**
 *@brief Normalizes the input . \n
 *@par Inputs:
 * One input:
 *x: An NCHW tensor of type float16 or float32 . \n
 *@par Attributes:
 *@li eps: An optional float32 epsilon for not dividing by zero. Defaults to "1e-9" . \n
 *@li axes: A list of Intefers, along which axis to reduce. Defaults to "[0, 2, 3]" . \n
 *@par Outputs:
 *y: An NCHW tensor of type float16 or float32 . \n
 *@attention Constraints:
 * The input tensor must have the NCHW format, whose shape length must be 4.
 *@par Third-party framework compatibility
 * Compatible with the ONNX operator MeanVarianceNormalization.
 */
 REG_OP(MVNV2)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))  /* "Result, has same element type as inputs" */
    .ATTR(eps, Float, 1e-9)
    .ATTR(axes, ListInt, {0, 2, 3})
    .OP_END_FACTORY_REG(MVNV2)
 /**
 *@brief Normalizes the input "x1" . \n
@@ -978,6 +1005,357 @@ REG_OP(InHost)
     .OUTPUT(variance_sqrt, TensorType({DT_FLOAT}))
     .ATTR(epsilon, Float, 0.00001)
     .OP_END_FACTORY_REG(InHost)
 /**
 * @brief perform instance normalization to x. \n
 * @par Inputs:
 * Three inputs, including:
 * @li x: A Tensor. Must be one of the following types: float16, float32, format is NC1HWC0.
 * @li gamma: A Tensor. Must be one of the following types: float16, float32, format is ND.
 * @li beta: A Tensor. Must be one of the following types: float16, float32, format is ND.
 * @par Attributes:
 * @li data_format: An attribute of type String \n
 * @li epsilon: An attribute of type Float, . \n
 * @par Outputs:
 * @li y: A Tensor. Has the same type as "x", format is NC1HWC0. \n
 * @li mean: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n
 * @li variance: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n
 * @par Third-party framework compatibility
 * Can be used by onnx InstanceNormalization
 */
 REG_OP(InstanceNorm)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(data_format, String)
    .REQUIRED_ATTR(epsilon, Float)
    .OP_END_FACTORY_REG(InstanceNorm)
 REG_OP(KlDivLossGrad)
    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(reduction, String, "mean")
    .ATTR(log_target, Bool, false)
    .OP_END_FACTORY_REG(KlDivLossGrad)
 /**
 * @brief Computes l1_loss_grad or l1_loss_backward. \n
 * @par Inputs:
 * Three inputs, including:
 * @li grads: A Tensor. Must be one of the following types: float16, float32.
 * Required.
 * @li predict: A Tensor. Has the same type as "grads". Required.
 * @li label: A Tensor. Has the same type as "grads". Required. \n
 * @par Attributes:
 * @li reduction: An optional attribute of type String. Defaults to "mean". \n
 * @par Outputs:
 * @li y: A Tensor. Has the same type as "x". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator L1LossGrad.
 */
 REG_OP(L1LossGrad)
    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(L1LossGrad)
 /**
 * @brief Computes loss of lp, p=1,2,3....
 * @par Inputs:
 * @li predict: An ND tensor of type float16, float32.
 * @li label: An ND tensor of type float16, float32. \n
 * @par Attributes:
 * @li p: A required int attribute that decides which loss to compute, now the p only can be 1 to compute l1_loss.
 * @li reduction: An optional string.Defaults to "mean". \n
 * @par Outputs:
 * @li y: An ND tensor tensor with the same shape and type as "predict". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator LpLoss.
 */
 REG_OP(LpLoss)
    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(p, Int)
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(LpLoss)
 /**
 * @brief Computes gradients of mse loss.
 * @par Inputs:
 * @li predict: An ND tensor of type float16, float32.
 * @li label: An ND tensor of type float16, float32.
 * @li dout: An ND tensor of type float16, float32. \n
 * @par Attributes:
 * @li reduction: An optional string.Defaults to "mean". \n
 * @par Outputs:
 * @li y: An ND tensor tensor with the same shape and type as "predict". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator MseLossGrad.
 */
 REG_OP(MseLossGrad)
    .INPUT(predict, TensorType({DT_FLOAT32, DT_FLOAT16}))
    .INPUT(label, TensorType({DT_FLOAT32, DT_FLOAT16}))
    .INPUT(dout, TensorType({DT_FLOAT32, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT32, DT_FLOAT16}))
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(MseLossGrad)
 /**
 * @brief Computes mse loss.
 * @par Inputs:
 * two inputs, including:
 *  @li predict: An ND Tensor of dtype float16 or float32.
 *  @li label: An ND Tensor of dtype float16 or float32.\n
 *
 * @par Attributes:
 *  @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n
 *
 * @par Outputs:
 *  @li y: when reduction=sum/mean, y is scale. when reduction=none, y has
 *    same type and shape as "predict".\n
 */
 REG_OP(MseLoss)
    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(MseLoss)
 /**
 * @brief Calculates the reversed outputs of the function "smooth_l1_loss_v2". \n
 * @par Inputs:
 * Three Inputs, including:
 * @li predict: A Tensor. Must be one of the following types:
 *     float16, float32.
 * @li label: A Tensor. Has the same type as "predict".
 * @li dout: A Tensor. Has the same type as "predict". \n
 * @par Attributes:
 * Two Attributes, including:
 * @li sigma: An optional float. Defaults to 1.0. \n
 * @li reduction: An optional string. Defaults to "mean",
 *    Must be one of the following: "none", "mean", "sum". \n
 * @par Outputs:
 * @li gradient: A Tensor. Has the same type as "predict". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SmoothL1LossBackward.
 */
 REG_OP(SmoothL1LossGradV2)
    .INPUT(predict, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(label, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(dout, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(gradient, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(sigma, Float, 1.0)
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(SmoothL1LossGradV2)
 /**
 * @brief Creates a criterion that uses a squared term if the absolute
 * element-wise error falls below beta and an L1 term otherwise. It is
 * less sensitive to outliers than the MSELoss and in some cases prevents
 * exploding gradients.
 * @par Inputs:
 * @li predict: A multi-dimensional Tensor of type float16 or float32,
 * specifying the predictive value. \n
 * @li label: A multi-dimensional Tensor of type float16 or float32,
 * specifying the target value. \n
 * @par Attributes:
 * @li sigma: An optional int. Specifies the threshold of loss. Defaults
 * to "1.0". \n
 * @li reduction: An optional str. Specifies the reduction to apply to
 * the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
 * 'mean': the sum of the output will be divided by the number of elements in
 * the output,'sum': the output will be summed. Default: 'mean'. \n
 * @par Outputs:
 * @li loss: Indicates the loss between the predictive value and target value.
 * Has the same dimensions as "predict". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator smooth_l1_loss. \n
 */
 REG_OP(SmoothL1LossV2)
    .INPUT(predict, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(label, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(loss, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .ATTR(sigma, Float, 1.0)
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(SmoothL1LossV2)
 /**
 * @brief Computes Centralization. result = x - mean(x, axes)
 * @par Inputs:
 * @li x: An ND tensor of type float16, float32.
 * @par Attributes:
 * @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
 * Must be in the range [-rank(x), rank(x)).
 * @par Outputs:
 * @li y: A Tensor. Has the same type as "x". \n
 * @par Third-party framework compatibility
 * custom operator \n
 */
 REG_OP(Centralization)
    .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .ATTR(axes, ListInt, {-1})
    .OP_END_FACTORY_REG(Centralization)
 /**
 *@brief Roll the tensor along the given dimension(s).
 * Elements that are shifted beyond the last position are re-introduced at the first position.
 * If a dimension is not specified, the tensor will be flattened before rolling and then restored to the original shape. \n
 *@par Inputs:
 *One inputs, including:
 * @li x: A tensor . Must be one of the following types:
 *     float16, float32, int32, uint32, int8, uint8. \n
 *@par Attributes:
 * @li shifts: The number of places by which the elements of the tensor are shifted. \n
 * @li dims: Axis along which to roll. \n
 *@par Outputs:
 * y: A Tensor with the same type and shape of x's. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Roll. \n
 */
 REG_OP(Roll)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8}))
    .REQUIRED_ATTR(shifts, ListInt)
    .ATTR(dims, ListInt, {})
    .OP_END_FACTORY_REG(Roll)
 /**
 *@brief Calculate the loss. Creates a criterion that optimizes a two-class classification
 logistic loss between input_x and input_y (containing 1 or -1). \n
 *@par Inputs:
 *One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @li input_y: A tensor. Must be one of the following types:
 *     float16, float32. \n
 *@par Attributes:
 *@li lambd: An optional string.Defaults to "mean". \n
 *@par Outputs:
 *output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n
 *          while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,)
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator SoftMarginLoss. \n
 */
 REG_OP(SoftMarginLoss)
    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(reduction, String, "mean")
    .OUTPUT(output_z, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OP_END_FACTORY_REG(SoftMarginLoss)
 /**
 * @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2.
 * @par Inputs:
 * @li predict: An ND tensor of type float16, float32.
 * @li target: An ND tensor of type float16, float32.
 * @li dout: An ND tensor of type float16, float32.
 * @li weight: An optional ND tensor of type float16, float32.
 * @li pos_weight: An optional ND tensor of type float16, float32. \n
 * @par Attributes:
 * @li reduction: An optional string.Defaults to "mean". \n
 * @par Outputs:
 * @li gradient: An ND tensor tensor with the same shape and type as "predict". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad.
 */
 REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2)
 /**
 * @brief Calculate the PoissonNllLoss function. 
 *        target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n
 * @par Inputs:
 * Two inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * 
 * @par Inputs:
 * @li target: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @par Attributes:
 * four Attributes, including:
 * @li log_input: An optional bool. Defaults to "True" \n
 * 
 *  @par Attributes:
 * @li full: An optional bool. Defaults to "False" \n
 * 
 *  @par Attributes:
 * @li eps: An optional float. Defaults to "1e-8" \n
 * 
 *  @par Attributes:
 * @li reduction: An optional string. Defaults to "mean" \n
 * @par Outputs:
 * loss: A Tensor has same element type as two inputs. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator PoissonNllLoss. \n
 */
 REG_OP(PoissonNllLoss)
    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(log_input, Bool, true)
    .ATTR(full, Bool, false)
    .ATTR(eps, Float, 1e-8)
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(PoissonNllLoss)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -20,7 +20,89 @@
 */
 #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
 #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
 #include "graph/operator_reg.h"
 #include "nn_pooling_ops.h"
 namespace ge {
 /**
 * @brief Says whether the targets are in the top "k" predictions . \n
 * @par Inputs:
 * Three inputs, including:
 * @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor.
 * @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
 * @li k: A 1D Tensor of the same type as "targets".
 * Specifies the number of top elements to look at for computing precision . \n
 * @par Outputs:
 * precision: A Tensor of type bool . \n
 * @attention Constraints:
 * @li targets must be non-negative tensor.
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator InTopKV2.
 */
 REG_OP(InTopKV2)
    .INPUT(predictions, TensorType({DT_FLOAT}))
    .INPUT(targets, TensorType(IndexNumberType))
    .INPUT(k, TensorType({IndexNumberType}))
    .OUTPUT(precision, TensorType({DT_BOOL}))
    .OP_END_FACTORY_REG(InTopKV2)
 /**
 *@brief Performs batch normalization . \n
 *@par Inputs:
 * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the offset.
 *@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
 operation is used for training.
 *@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
 5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
 if the operation is used for training . \n
 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
 *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
 *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
 *@par Outputs:
 * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
 if input "x" is with format NC1HWC0. Specifies the mean of "x".
 *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
 *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
 *@attention Constraints:
 *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
 then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
 */
 REG_OP(FusedBatchNormV2)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(scale, TensorType({DT_FLOAT}))
    .INPUT(offset, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
    .ATTR(epsilon, Float, 0.0001)
    .ATTR(data_format, String, "NHWC")
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(FusedBatchNormV2)
 }// namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -182,6 +182,125 @@ REG_OP(AvgPool3D)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(AvgPool3D)
 /**
 *@brief Performs average pooling on the input.
 *@par Inputs:
 *@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.
 *@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout.
 *@li multiplier: An optional tensor of float16, float32, double.
 *@par Attributes:
 *@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
 *@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
 *@li pads: List of ints, implicit zero paddings on both sides of the input.
 *@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
 *@li count_include_pad: When true, will include the zero-padding in the averaging calculation.
 *@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
 *@li data_format: A string, format of input data . \n
 *@par Outputs:
 *y: The average pooled output tensor . \n
 *@attention Constraints:
 *@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3D.
 */
 REG_OP(AvgPool3DD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .REQUIRED_ATTR(ksize, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(ceil_mode, Bool, false)
    .ATTR(count_include_pad, Bool, true)
    .ATTR(divisor_override, Int, 0)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(AvgPool3DD)
 /**
 * @brief Computes AvgPool3DGrad function.
 * @par Inputs:
 * @li orig_input_shape: An NDHWC tensor of type float16, float32, or double.
 * @li grads: An NDHWC tensor of type int32.
 * @par Attributes:
 * @li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
 * @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
 * @li pads: List of ints, implicit zero paddings on both sides of the input.
 * @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
 * @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
 * @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
 * @li data_format: A string, format of input data . 
 * @par Outputs:
 * @output: A mutable tensor with the same shape and type as "orig_input".
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator AvgPoolGrad.
 */
 REG_OP(AvgPool3DGrad)
    .INPUT(orig_input_shape, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .INPUT(grads, TensorType({DT_INT32}))
    .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .REQUIRED_ATTR(ksize, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(ceil_mode, Bool, false)
    .ATTR(count_include_pad, Bool, true)
    .ATTR(divisor_override, Int, 0)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(AvgPool3DGrad)
 /**
 * @brief Performs average pooling on the input.
 * @par Inputs:
 * @li grads: An NDHWC tensor of type float16.
 * @li filter: An optional tensor of type float16, fractal_z_3d layout.
 * @li multiplier: An optional tensor of float16.
 * @par Attributes:
 * @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor.
 * @li ksize: List of ints that has length 3. The size of the window for each dimension of the input tensor.
 * @li strides:List of ints that has length 3. The stride of the sliding window for each dimension of the input tensor.
 * @li pads: List of ints, implicit zero paddings on both sides of the input.
 * @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
 * @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
 * @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
 * @li data_format: A string, format of input data . \n
 * @par Outputs:
 * @output: The average pooled output tensor . \n
 * @attention Constraints:
 * @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3DGradD.
 */
 REG_OP(AvgPool3DGradD)
    .INPUT(grads, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16}))
    .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .REQUIRED_ATTR(orig_input_shape, ListInt)
    .REQUIRED_ATTR(ksize, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(ceil_mode, Bool, false)
    .ATTR(count_include_pad, Bool, true)
    .ATTR(divisor_override, Int, 0)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(AvgPool3DGradD)
 /**
 *@brief Performs max_pool_ext2 on the input . \n
@@ -308,6 +427,31 @@ REG_OP(MaxPool3D)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(MaxPool3D)
 /**
 *@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n
 * The output is of size H x W, for any input size. 
 * @par Inputs:
 * One input, including:
 * @li x: A Tensor. Must be one of the following data types:
 *     float16, float32, float64. \n
 * @par Attributes:
 * @li output_size: A required list of 2 ints
 *    specifying the size (H,W) of the output tensor. \n
 * @par Outputs:
 * @li y: A Tensor. Has the same data type as "x" \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator AdaptiveMaxPool2d.
 */
 REG_OP(AdaptiveMaxPool2d)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
    .OUTPUT(argmax, TensorType::IndexNumberType())
    .REQUIRED_ATTR(output_size, ListInt)
    .OP_END_FACTORY_REG(AdaptiveMaxPool2d)
 /**
 * @brief Computes second-order gradients of the maxpooling3d function . \n
@@ -477,8 +621,9 @@ REG_OP(MaxPoolV2)
 *@par Inputs:
 * One input:
 *x: An NC1HWC0 Tensor. Supported type: float, double, int32,
 * uint8, int16, int8, int64, uint16, half, uint32, uint64 . \n
 *x: An 4D Tensor. Supported type: float, double, int32,
 * uint8, int16, int8, int64, uint16, half, uint32, uint64.
 * Must set the format, supported format list ["NCHW, NHWC"]. \n
 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values,
@@ -517,10 +662,12 @@ REG_OP(MaxPoolWithArgmax)
 *@par Inputs:
 * Three inputs, including:
 *@li x: An NC1HWC0 tensor. Supported type: float, double, int32,
 *@li x: An 4d tensor. Supported type: float, double, int32,
 * uint8, int16, int8, int64, uint16, half, uint32, uint64.
 *@li grad: An NC1HWC0 tensor. Supported type: float, double, int32,
 * Must set the format, supported format list ["NCHW, NHWC"]
 *@li grad: An 4d tensor. Supported type: float, double, int32,
 * uint8, int16, int8, int64, uint16, half, uint32, uint64.
 * Must set the format, supported format list ["NCHW, NHWC"]
 *@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n
 *@par Attributes:
@@ -1107,7 +1254,7 @@ REG_OP(AvgPool1DD)
 *@par Inputs:
 * One input:
 *x: An NC1HWC0 Tensor of type float16.
 *x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"].
 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
 * each dimension of the input tensor. No default value.
@@ -1148,9 +1295,9 @@ REG_OP(MaxPoolWithArgmaxV2)
 *@par Inputs:
 * Three inputs, including:
 *@li x: An NC1HWC0 tensor of type float16.
 *@li grad: An NC1HWC0 tensor of type float16.
 *@li argmx: An NC1HWC0 tensor of type uint16 or int64 . \n
 *@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]
 *@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]
 *@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n
 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
@@ -1291,5 +1438,171 @@ REG_OP(MaxPoolV3Grad)
    .ATTR(global_pooling, Bool, false)
    .ATTR(ceil_mode, Bool, false)
    .OP_END_FACTORY_REG(MaxPoolV3Grad)
 /**
 *@brief Performs dilation2d on the input . \n
 *@par Inputs:
 *x: A tensor of shape is 4d, format is support NHWC.
 *filter: A tensor of shape is 3d, the type is same with x,
 and the c dimension is same with x. \n
 *@par Attributes:
 *@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1.
 *@li rates: A required list of 4 ints. The rates of the N and C dimensions are 1.
 *@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
 *@li pads: An optional list of 4 ints.
 *@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
 *@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n
 *@par Outputs:
 *y: The output tensor. Has the same type and format as input "x" . \n
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator Dilation2D.
 */
 REG_OP(Dilation2D)
    .INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
    .INPUT(filter,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
    .OUTPUT(y,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(rates, ListInt)
    .ATTR(padding_mode, String, "SAME")
    .ATTR(pads, ListInt, {0,0,0,0})
    .ATTR(ceil_mode, Bool, false)
    .ATTR(data_format, String, "NHWC")
    .OP_END_FACTORY_REG(Dilation2D)
 /**
 * @brief Applies a 2D adaptive average pooling over  
 *       an input signal composed of several input planes.  \n
 * @par Inputs:
 * One input, including:
 * @li x: A Tensor. Must be one of the following data types:
 *     float16, float32. \n
 * @par Attributes:
 * @li output_size: A required list of 2 ints
 *    specifying the size (H,W) of the output tensor. \n
 * @par Outputs:
 * @li y: A Tensor. Has the same data type as "x" \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator AdaptiveAvgPool2d.
 */
 REG_OP(AdaptiveAvgPool2d)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .REQUIRED_ATTR(output_size, ListInt)
    .OP_END_FACTORY_REG(AdaptiveAvgPool2d)
 /**
 * @brief Compute gradients of adaptive averagev2 pooling function.
 * @par Inputs:
 * @li input_grad: A Tensor. Must be one of the following data types:
 * float16, float32.
 * @par Attributes:
 * @li orig_input_shape: A required tuple or list of type int32.
 * @par Outputs:
 * @li output_grad: A tensor with the same type as "input_grad".
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator AdaptiveAvgPool2dGrad.
 */
 REG_OP(AdaptiveAvgPool2dGrad)
    .INPUT(input_grad, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(output_grad, TensorType({DT_FLOAT, DT_FLOAT16}))
    .REQUIRED_ATTR(orig_input_shape, ListInt)
    .OP_END_FACTORY_REG(AdaptiveAvgPool2dGrad)
 /**
 * @brief Performs the backpropagation of MaxPoolWithGradArgmaxV1.
 * @par Inputs:
 * Three inputs, including:
 * @li x: An NC1HWC0 tensor of type float16.
 * @li grad: An NC1HWC0 tensor of type float16.
 * @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n
 * @par Attributes:
 * @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
 * each dimension of the input tensor. No default value.
 * @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
 * each dimension of the input tensor. No default value.
 * @li pads: A required listint. \n
 * @par Outputs:
 * y: A Tensor. Has the same type and format as input "x". \n
 * @attention Constraints:
 * @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 * @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
 * @li "pads" is listint.
 * @li "ceil_mode" defaults to False.
 * @li "data_format" defaults to "NC1HWC0". \n
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1.
 */
 REG_OP(MaxPoolGradWithArgmaxV1)
    .INPUT(x, TensorType({DT_FLOAT16}))
    .INPUT(grad, TensorType({DT_FLOAT16}))
    .INPUT(argmax, TensorType({DT_UINT16}))
    .OUTPUT(y, TensorType({DT_FLOAT16}))
    .REQUIRED_ATTR(ksize, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(dtype, Int, 3)
    .ATTR(dilation, ListInt, {1, 1, 1, 1})
    .ATTR(ceil_mode, Bool, false)
    .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV1)
 /**
 * @brief Performs max pooling on the input and outputs both max values and indices.
 * @par Inputs:
 * One input:
 * x: An NC1HWC0 Tensor of type float16. \n
 * @par Attributes:
 * @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
 * each dimension of the input tensor. No default value.
 * @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
 * each dimension of the input tensor. No default value.
 * @li pads: A required string. No default value. \n
 * @par Outputs:
 * y: A Tensor. Has the same type and format as input "x".
 * argmax:  A Tensor. type:uint16, format:NC1HWC0. \n
 * @attention Constraints:
 * @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 * @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
 * strides[2] <= 63, strides[2] >= 1.
 * @li "pads" is listint.
 * @li "ceil_mode" defaults to False.
 * @li "data_format" defaults to "NC1HWC0". \n
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolWithArgmaxV1.
 */
 REG_OP(MaxPoolWithArgmaxV1)
    .INPUT(x, TensorType({DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT16}))
    .OUTPUT(argmax, TensorType({DT_UINT16}))
    .REQUIRED_ATTR(ksize, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(pads, ListInt)
    .ATTR(dtype, Int, 3)
    .ATTR(dilation, ListInt, {1, 1, 1, 1})
    .ATTR(ceil_mode, Bool, false)
    .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/no_op.h
+++ b/third_party/fwkacllib/inc/ops/no_op.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -223,7 +223,29 @@ REG_OP(Relu6Grad)
    .INPUT(features, TensorType::RealNumberType())
    .OUTPUT(backprops, TensorType::RealNumberType())
    .OP_END_FACTORY_REG(Relu6Grad)
 /**
 *@brief Calculate the elu_grad_v2 function. 
 *Applies the element-wise function:
 * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
 *@par Inputs:
 *One inputs, including:
 * @li grads: A tensor. Must be one of the following types:
 *     float16, float32. 
 * @li activations: A tensor. Must be one of the following types:
 *     float16, float32. 
 *
 *@par Outputs:
 *y: A Tensor with the same type and shape of grads's.
 * 
 *@par Attributes:
 *@li alpha: scalar parameter, default value = 1.0
 */	
 REG_OP(EluGradV2)
    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(alpha, Float, 1.0)
    .OP_END_FACTORY_REG(EluGradV2)
 /**
 * @brief Compute sigmoid of "x" element-wise . \n
@@ -508,6 +530,34 @@ REG_OP(Elu)
    .ATTR(alpha, Float, 1.0)
    .OP_END_FACTORY_REG(Elu)
 /**
 *@brief Continuously Differentiable Exponential Linear Uints:
 *       Perform the linear uint element-wise on the input tensor X using formula:
 *       max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n
 *@par Inputs:
 *x: A float16, float32 or double, for the input data type . \n
 *@par Attributes:
 *alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
 *@par Outputs:
 *y: A float16, float32 or double, for the normalized result . \n
 *@attention Constraints:
 *@li The input is of type float16 or float32 . \n
 *@par Multiple batches supported or not
 *Supported
 *@par Third-party framework compatibility
 *@li Compatible with ONNX's Celu operator
 */
 REG_OP(Celu)
    .INPUT(x, TensorType::FloatingDataType())
    .OUTPUT(y, TensorType::FloatingDataType())
    .ATTR(alpha, Float, 1.0)
    .OP_END_FACTORY_REG(Celu)
 /**
 *@brief Computes gradients for the exponential linear (Elu) operation.
 *
@@ -640,6 +690,286 @@ REG_OP(Mish)
    .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 }))
    .OP_END_FACTORY_REG(Mish)
 /**
 * @brief pytorch hardtanh_backward operator.
 *
 * @par Inputs:
 * 2 inputs, including:
 * @li result, minimum tensor of the linear region range,
 * datatype: float16/float32, format:ND/5HD.
 * @li grad, maximum tensor of the linear region range,
 * datatype:float16/float32, format:ND/5HD. \n
 * @par Attributes:
 * 2 attributes, including:
 * @li min_val, minimum value of the linear region range, datatype:float.
 * @li max_val, maximum value of the linear region range, datatype:float. \n
 * @par Outputs:
 * 1 output, including:
 * @li y, hardtanh_backward output tensor, datatype and format is same as
 * input result. \n
 * @attention Constraints:
 * This operator only supports dataType: float16/float32, format: ND/5HD. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator HardtanhGrad.
 */
 REG_OP(HardtanhGrad)
    .INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */
    .INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT }))   /* "Second operand." */
    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT }))     /* "Result, has same element type as two inputs" */
    .ATTR(min_val, Float, -1.0)
    .ATTR(max_val, Float, 1.0)
    .OP_END_FACTORY_REG(HardtanhGrad)
 /**
 * @brief Calculates the softplus loss function with attributes of beta and threshold. \n
 * @par Inputs:
 * One inputs, including:
 * @li x: A mutable Tensor. Must be one of the following types:
 *     float16, float32. \n
 * @par Attributes:
 * @li beta: An optional float. Defaults to "1.0" \n
 * @li threshold: An optional float. Defaults to "20.0" \n
 * @par Outputs:
 * @li y: A mutable Tensor. Has the same type as "x" \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Softplus.
 */
 REG_OP(SoftplusV2)
    .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .ATTR(beta, Float, 1.0)
    .ATTR(threshold, Float, 20.0)
    .OP_END_FACTORY_REG(SoftplusV2)
 /**
 * @brief Calculates the reversed outputs of the function "softplus_v2". \n
 * @par Inputs:
 * Two inputs, including:
 * @li input_gradients: A mutable Tensor. Must be one of the following types:
 *     float16, float32.
 * @li input_features: A mutable Tensor of the same type as "input_gradients" \n
 * @par Attributes:
 * @li beta: An optional float. Defaults to "1.0" \n
 * @li threshold: An optional float. Defaults to "20.0" \n
 * @par Outputs:
 * @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SoftplusGrad.
 */
 REG_OP(SoftplusV2Grad)
    .INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .ATTR(beta, Float, 1.0)
    .ATTR(threshold, Float, 20.0)
    .OP_END_FACTORY_REG(SoftplusV2Grad)
 /**
 * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
 *  where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
 * 
 * @par inputs
 * one input including:
 * @li x: input A Tensor. Must be one of the following types: float32, float16
 * 
 * @par output
 * one output including:
 * @li y:A Tensor of the same type as x
 * 
 */
 REG_OP(ThresholdedRelu)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(alpha, Float, 1.0)
    .OP_END_FACTORY_REG(ThresholdedRelu)
 /**
 * @brief Calculate the hard shrinkage function. \n
 * @par Inputs:
 * One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @par Attributes:
 * @li lambd: An optional float. Defaults to 0.5. \n
 * @par Outputs:
 * y: A Tensor with the same dtype and shape of input_x's. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Hardshrink. \n
 */
 REG_OP(HardShrink)
    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(lambd, Float, 0.5)
    .OP_END_FACTORY_REG(HardShrink)
 /**
 *@brief Calculate the hard shrink grad function. \n
 *
 * Computes the gradient for the HardShrink: if x > lambda or x < -lambda, x,otherwise 0
 *
 *@par Inputs:
 *Two inputs, including:
 * @li gradients: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @li features: A tensor. Must be one of the following types:
 *     float16, float32. \n
 *
 *@par Outputs:
 *backprops: A Tensor with the same type and shape of features's. \n
 *
 *@par Attributes:
 *@li lambda: An optional float.Defaults to 0.5. \n
 *
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Hardshrink_backward. \n
 */
  REG_OP(HardShrinkGrad)
  .INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT}))
  .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
  .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT}))
  .ATTR(lambda, Float, 0.5)
  .OP_END_FACTORY_REG(HardShrinkGrad)
 /**
 * @brief Calculate the hard sigmoid function. \n
 * @par Inputs:
 * One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 * @par Attributes:
 * @li alpha: An optional float. Defaults to 0.16666666. \n
 * @li beta: An optional float. Defaults to 0.5. \n
 * @par Outputs:
 * y: A Tensor with the same dtype and shape of input_x's. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Hardsigmoid. \n
 */    
 REG_OP(HardSigmoid)
    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(alpha, Float, 0.16666666)
    .ATTR(beta, Float, 0.5)
    .OP_END_FACTORY_REG(HardSigmoid)
 /**
 * @brief Calculate the soft shrinkage function. \n
 * @par Inputs:
 * One inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @par Attributes:
 * @li lambd: An optional float. Defaults to 0.5. \n
 * @par Outputs:
 * y: A Tensor with the same dtype and shape of input_x's. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Softshrink. \n
 */
 REG_OP(SoftShrink)
     .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(lambd, Float, 0.5)
     .OP_END_FACTORY_REG(SoftShrink)
 /**
 * @brief Calculate the reversed outputs of the function "soft_shrink". \n
 * @par Inputs:
 * Two inputs, including:
 * @li input_grad: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @li input_x: A tensor of the same dtype as "input_grad". \n
 * @par Attributes:
 * @li lambd: An optional float. Defaults to 0.5. \n
 * @par Outputs:
 * y: A Tensor of the same dtype and shape as "input_graxd". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SoftShrinkGrad. \n
 */
 REG_OP(SoftShrinkGrad)
     .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(lambd, Float, 0.5)
     .OP_END_FACTORY_REG(SoftShrinkGrad)
 /**
 *@brief Calculate -ln(1+e^(-x)). \n
 *@par Inputs:
 *One inputs, including:
 * @li x: A tensor. Must be one of the following types:
 *       float16, float32. \n
 *@par Outputs:
 *One outputs, including:
 * @li y: A tensor with the same type and shape of x's. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator LogSigmoid. \n
 */
 REG_OP(LogSigmoid)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))  /* "output:y" */
    .OP_END_FACTORY_REG(LogSigmoid)
 /**
 *@brief Calculate the backward outputs of the function "hard_sigmoid" \n
 *@par Inputs:
 *One inputs, including:
 * @li grads: A tensor. Must be one of the following types:
 *       float16, float32. \n
 * @li input_x: A tensor. Must be one of the following types:
 *       float16, float32. \n
 *@par Outputs:
 *One outputs, including:
 * @li y: A tensor with the same type and shape of x's. \n
 * @par Attributes:
 * @li alpha: An optional float. Defaults to 0.16666666. \n
 * @li beta: An optional float. Defaults to 0.5. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator LogSigmoidGrad. \n
 */
 REG_OP(HardSigmoidGrad)
    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(alpha, Float, 0.16666666)
    .ATTR(beta, Float, 0.5)
    .OP_END_FACTORY_REG(HardSigmoidGrad)
 } // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
--- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
+++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/outfeed_ops.h
+++ b/third_party/fwkacllib/inc/ops/outfeed_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -101,7 +101,7 @@ REG_OP(FillD)
 */
 REG_OP(BroadcastTo)
    .INPUT(x, TensorType::BasicType())
    .INPUT(shape, TensorType({DT_INT32}))
    .INPUT(shape, TensorType({DT_INT32,DT_INT64}))
    .OUTPUT(y, TensorType::BasicType())
    .OP_END_FACTORY_REG(BroadcastTo)
@@ -161,7 +161,7 @@ REG_OP(Pad)
 *@brief Pads a tensor . \n
 *@par Inputs:
 *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n
 *x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
 *@par Attributes:
 *paddings: An optional "vector<vector<int>>". Defaults to "{}".
@@ -180,8 +180,8 @@ REG_OP(Pad)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
 */
 REG_OP(PadD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT}))
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .REQUIRED_ATTR(paddings, ListListInt)
    .OP_END_FACTORY_REG(PadD)
@@ -213,7 +213,7 @@ REG_OP(PadV2)
 *@brief Pads a tensor . \n
 *@par Inputs:
 *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n
 *x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
 *constant_values: A Tensor. Must have the same type as input.
 *@par Attributes:
@@ -227,10 +227,7 @@ REG_OP(PadV2)
 *y: A Tensor of the same type as "x" . \n
 *@par Third-party framework compatibility:
 * Compatible with TensorFlow operator Pad.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
 * Compatible with TensorFlow operator PadV2.
 */
 REG_OP(PadV2D)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
@@ -403,5 +400,46 @@ REG_OP(EmbeddingRankId)
    .ATTR(mode, String, "mod")
    .OP_END_FACTORY_REG(EmbeddingRankId)
 /**
 * @brief Fill the value to a tensor has the specified shape.
 * @par Inputs:
 * One inputs, including:
 * @li dims: An Tensor, specify the shape that the value to fill.
 * @par Attributes:
 * @li value: An optional float value. Defaults to 0.0.
 * @par Outputs:
 * @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
 * @par Third-party framework compatibility
 * Compatible with the ONNX operator ConstantOfShape.
 */
 REG_OP(FillV2)
    .INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
    .ATTR(value, Float, 0)
    .OP_END_FACTORY_REG(FillV2)
 /**
 * @brief Fill the value to a tensor has the specified shape.
 * @par Attributes:
 * @li value: An optional float value. Defaults to 0.0.
 * @li dims: An required listInt to specify the shape that the value to fill.
 * @par Outputs:
 * @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
 * @par Third-party framework compatibility
 * Compatible with the ONNX operator ConstantOfShape.
 */
 REG_OP(FillV2D)
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64}))
    .ATTR(value, Float, 0)
    .REQUIRED_ATTR(dims, ListInt)
    .OP_END_FACTORY_REG(FillV2D)
 } // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_
--- a/third_party/fwkacllib/inc/ops/parsing_ops.h
+++ b/third_party/fwkacllib/inc/ops/parsing_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -495,6 +495,60 @@ REG_OP(ShuffleChannel)
                           DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64}))
    .ATTR(group, Int, 1)
    .OP_END_FACTORY_REG(ShuffleChannel)
 /**
 * @briefGenerate a tensor of samples from a multinomial 
 * distribution according to the probabilities of each of 
 * the possible outcomes.
 * 
 * @par inputs
 * one input including:
 * @li x:Input tensor with shape [batch_size, class_size], 
 * where class_size is the number of all possible outcomes.
 * Each value along the axis zero represents the unnormalized 
 * log-probability of each corresponding outcome in a batch.
 * 
 * @par output
 * one output including:
 * @li y:Output tensor with shape [batch_size, sample_size], 
 * where sample_size is the number of times to sample. 
 * Each value along the axis zero represents the outcome of 
 * the corresponding sample in a batch.
 * 
 */
 REG_OP(MultinomialFuss)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64}))
    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
    .ATTR(dtype, Int, 6)
    .ATTR(sample_size, Int, 1)
    .ATTR(seed, Float, 0)
    .OP_END_FACTORY_REG(MultinomialFuss)
 /**
 * @brief During training, randomly zeroes some of the elements of the input tensor
 * with probability
 *
 * @par Inputs:
 * @li x: A ND Tensor. Must be one of the following data types: Float, Float16
 * @li seed: A ND Tensor. Must be one of the following data types: Float
 *
 * @par Attributes:
 * @li p: probability of an element to be zeroed
 *
 * @par Outputs:
 * @li y: A tensor with the same shape and type as "x".
 * @li mask: A tensor with the same shape and type as "x".
 * @li new_seed: A tensor with the same shape and type as "seed".
 */
 REG_OP(DropoutV2)
    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT }))
    .INPUT(seed, TensorType({ DT_FLOAT }))
    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT }))
    .OUTPUT(mask, TensorType({ DT_FLOAT }))
    .OUTPUT(seed, TensorType({ DT_FLOAT }))
    .REQUIRED_ATTR(p, Float)
    .OP_END_FACTORY_REG(DropoutV2)
 }   // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -37,7 +37,7 @@ namespace ge {
 *@attention Constraints:
 * This operator is a BatchNorm fusion operator for updating the moving
 * averages for training.
 * This operator is used in conjunction with BNTrainingUpdate.
 * This operator is used in conjunction with BNTrainingReduce.
 */
 REG_OP(BNTrainingReduce)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -45,6 +45,27 @@ REG_OP(BNTrainingReduce)
    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BNTrainingReduce)
 /**
 *@brief Performs reduced batch normalization . \n
 *@par Inputs:
 *x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n
 *@par Outputs:
 *@li sum: A 3D Tensor of type float32 for SUM reduced "x".
 *@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n
 *@attention Constraints:
 * This operator is a BatchNorm fusion operator for updating the moving
 * averages for training.
 * This operator is used in conjunction with BN3DTrainingReduce.
 */
 REG_OP(BN3DTrainingReduce)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(sum, TensorType({DT_FLOAT}))
    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BN3DTrainingReduce)
 /**
 *@brief Performs the backpropagation of BatchNorm . \n
@@ -88,6 +109,49 @@ REG_OP(BNTrainingReduceGrad)
    .ATTR(epsilon, Float, 0.0001)
    .OP_END_FACTORY_REG(BNTrainingReduceGrad)
 /**
 *@brief Performs the backpropagation of BatchNorm . \n
 *@par Inputs:
 * Seven inputs, including:
 *@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for
 * the gradient.
 *@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
 *@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the mean of "x".
 *@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the variance of "x".
 *@li scale: A 6D Tensor of type float32, with format NDC1HWC0.
 *@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the mean of "x".
 *@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the variance of "x" . \n
 *@par Attributes:
 *epsilon: An optional float32. Defaults to "0.0001". A small float number
 * added to the variance of "x" . \n
 *@par Outputs:
 *y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset
 * of "x" . \n
 *@attention Constraints:
 * The preceding layer of this operator must be BN3DTrainingReduceGrad . \n
 *@see BN3DTrainingReduceGrad
 */
 REG_OP(BN3DTrainingReduceGrad)
    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(diff_scale, TensorType({DT_FLOAT}))
    .INPUT(diff_offset, TensorType({DT_FLOAT}))
    .INPUT(scale, TensorType({DT_FLOAT}))
    .INPUT(batch_mean, TensorType({DT_FLOAT}))
    .INPUT(batch_variance, TensorType({DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
    .ATTR(epsilon, Float, 0.0001)
    .OP_END_FACTORY_REG(BN3DTrainingReduceGrad)
 /**
 *@brief Performs reduced batch normalization . \n
@@ -120,7 +184,7 @@ REG_OP(BNTrainingReduceGrad)
 *@attention Constraints:
 *@li This operator is a BatchNorm fusion operator for updating the moving
 averages for training.
 *This operator is used in conjunction with BNTrainingReduce.
 *This operator is used in conjunction with BNTrainingUpdate.
 *@li For Ascend 310, the result accuracy fails to reach 1â€° due to the square
 * root instruction.
 */
@@ -141,6 +205,59 @@ REG_OP(BNTrainingUpdate)
    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BNTrainingUpdate)
 /**
 *@brief Performs reduced batch normalization . \n
 *@par Inputs:
 * Seven inputs, including: (NDC1HWC0 supported)
 *@li x: A 6D Tensor of type float16 or float32.
 *@li sum: A 6D Tensor of type float32 for the output of operator
 * BN3DTrainingUpdate.
 *@li square_sum: A 6D Tensor of type float32 for the output of operator
 * BN3DTrainingUpdate.
 *@li scale: A 6D Tensor of type float32, for the scaling factor.
 *@li offset: A 6D Tensor of type float32, for the scaling offset.
 *@li mean: A 6D Tensor of type float32, for the updated mean.
 *@li variance: A 6D Tensor of type float32, for the updated variance . \n
 *@par Attributes:
 *@li epsilon: A required float32, specifying the small value added to variance
 * to avoid dividing by zero.
 *@li factor: A required float32, specifying the weight for updating the mean
 * and variance . \n
 *@par Outputs:
 * Five outputs, including: (NDC1HWC0 supported)
 *@li y: A 6D Tensor of type float16 or float32, for normalized "x".
 *@li mean: A 6D Tensor of type float32, for the updated mean.
 *@li variance: A 6D Tensor of type float32, for the updated variance.
 *@li batch_mean: A 6D Tensor of type float32, for the mean of "x".
 *@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n
 *@attention Constraints:
 *@li This operator is a BatchNorm fusion operator for updating the moving
 averages for training.
 *This operator is used in conjunction with BN3DTrainingUpdate.
 *@li For Ascend 310, the result accuracy fails to reach 1â€° due to the square
 * root instruction.
 */
 REG_OP(BN3DTrainingUpdate)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(sum, TensorType({DT_FLOAT}))
    .INPUT(square_sum, TensorType({DT_FLOAT}))
    .INPUT(scale, TensorType({DT_FLOAT}))
    .INPUT(offset, TensorType({DT_FLOAT}))
    .INPUT(mean, TensorType({DT_FLOAT}))
    .INPUT(variance, TensorType({DT_FLOAT}))
    .REQUIRED_ATTR(factor, Float)
    .REQUIRED_ATTR(epsilon, Float)
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(mean, TensorType({DT_FLOAT}))
    .OUTPUT(variance, TensorType({DT_FLOAT}))
    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BN3DTrainingUpdate)
 /**
 *@brief Performs batch normalization for inference . \n
@@ -284,6 +401,40 @@ REG_OP(BNTrainingUpdateGrad)
    .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BNTrainingUpdateGrad)
 /**
 *@brief Performs the backpropagation of BatchNorm . \n
 *@par Inputs:
 * Four inputs, including:
 *@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0,
 * for the gradient.
 *@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
 *@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the mean of "x".
 *@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the variance of "x" . \n
 *@par Attributes:
 *epsilon: An optional float32. Defaults to "0.0001". A small float number
 * added to the variance of "x" . \n
 *@par Outputs:
 *@li diff_scale: A Tensor of type float32, with format NDC1HWC0,
 * for the offset of "scale".
 *@li diff_offset: A Tensor of type float32, with format NDC1HWC0,
 * for the offset of "offset" . \n
 */
 REG_OP(BN3DTrainingUpdateGrad)
    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(batch_mean, TensorType({DT_FLOAT}))
    .INPUT(batch_variance, TensorType({DT_FLOAT}))
    .ATTR(epsilon, Float, 0.0001)
    .OUTPUT(diff_scale, TensorType({DT_FLOAT}))
    .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad)
 /**
 *@brief Performs the backpropagation of BatchNorm for inference . \n
@@ -635,8 +786,8 @@ REG_OP(ReduceMin)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead.
 */
 REG_OP(ReduceMinD)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32}))
    .REQUIRED_ATTR(axes, ListInt)
    .ATTR(keep_dims, Bool, false)
    .OP_END_FACTORY_REG(ReduceMinD)
@@ -821,7 +972,7 @@ Defaults to "0.00001" . \n
 *batch_ variance: A Tensor of type float32 for the result variance . \n
 *@attention Constraints:
 *For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
 *For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction.
 */
 REG_OP(INInferV2)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -882,7 +1033,7 @@ REG_OP(INTrainingReduceV2)
 *@attention Constraints:
 *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
 * This operator is used in conjunction with INTrainingReduceV2.
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
 *@li For Ascend 310, the result accuracy fails to reach 1â€° due to the square root instruction.
 */
 REG_OP(INTrainingUpdateV2)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -965,7 +1116,7 @@ for the updated variance.
 *@attention Constraints:
 *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
 * This operator is used in conjunction with GNTrainingUpdate.
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
 *@li For Ascend 310, the result accuracy fails to reach 1â€° due to the square root instruction.
 */
 REG_OP(GNTrainingUpdate)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -982,6 +1133,41 @@ REG_OP(GNTrainingUpdate)
    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(GNTrainingUpdate)
 /**
 * @brief Calculates the standard deviation and average value of Tensors.
 * @par Inputs:
 * @li x: A Tensor. Must be one of the following types:
 *     float16, float32. \n
 * @par Attributes:
 * Three Attributes, including:
 * @li dim: An optional listint, Defaults to "None". \n
 * @li unbiased: An optional bool. Defaults to "True".
 *     If "True", Use Bessel Correction.
 *     If "False", Do not use Bessel Correction. \n
 * @li keepdim: An optional bool. Defaults to "False".
 *     If "True", Keep the original tensor dimension.
 *     If "False", Do not keep the original tensor dimension. \n
 * @par Outputs:
 * Two Outputs, including:
 * @li y1: A Tensor. Has the same type as "x".
 * @li y2: A Tensor. Has the same type as "x". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator ReduceStd.
 */
 REG_OP(ReduceStd)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(dim, ListInt, {})
    .ATTR(unbiased, Bool, true)
    .ATTR(keepdim, Bool, false)
    .OP_END_FACTORY_REG(ReduceStd)
 } //namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_
--- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h
+++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -187,16 +187,16 @@ REG_OP(DynamicRNNGrad)
 *@brief: DynamicRNN calculation.
 *@par Inputs:
 *ten inputs:
 *@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND.
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
 *@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
 *@par Attributes:
 *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
@@ -221,6 +221,8 @@ REG_OP(DynamicRNNGrad)
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@par Third-party framework compatibility:
 * Compatible with the TF operator LSTM.
 */
 REG_OP(DynamicRNN)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -254,6 +256,63 @@ REG_OP(DynamicRNN)
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(DynamicRNN)
 /**
 *@brief: DynamicLSTMV2 calculation.
 *@par Inputs:
 *ten inputs:
 *@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND .
 *@par Attributes:
 *@li num_output:An integer identifying the num projection in the op. Default to 0.
 *@li expose_hidden:An bool identifying the expose_hidden in the op. Default to flase.
 *@li need_output_last:An bool identifying the time major in the op. Default to true.
 *@li forget_bias:An float identifying the forget bias in the op. Default to 0.
 *@par Outputs:
 *eight outputs:
 *@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@par Third-party framework compatibility:
 * Compatible with the Caffe operator LSTM.
 *@par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicLSTMV2)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(cont, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(w_xc_x_static, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(h0, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(c0, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(last_output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(last_output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(num_output, Int, 0)
    .ATTR(expose_hidden, Bool, false)
    .ATTR(need_output_last, Bool, false)
    .ATTR(forget_bias, Float, 0.0)
    .OP_END_FACTORY_REG(DynamicLSTMV2)
 /**
 *@brief: LSTMInputGrad calculation.
 *@par Inputs:
@@ -475,9 +534,9 @@ REG_OP(BasicRNNCell)
    .OP_END_FACTORY_REG(BasicRNNCell)
 /**
 *@brief: DynamicGRU calculation.
 *@brief DynamicGRU calculation.
 *@par Inputs:
 *seven inputs: \n
 *seven inputs: 
 *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li w:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li b:Must be one of the following types: float16, float32. The format must be ND.
@@ -497,7 +556,7 @@ REG_OP(BasicRNNCell)
 *@li is_training:An bool identifying is training in the op. Default to true.
 *@par Outputs:
 *five outputs: \n
 *five outputs: 
 *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -531,9 +590,9 @@ REG_OP(DynamicGRU)
    .OP_END_FACTORY_REG(DynamicGRU)
 /**
 *@brief: DynamicGRUV2 calculation.
 *@brief DynamicGRUV2 calculation.
 *@par Inputs:
 *seven inputs: \n
 *seven inputs: 
 *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
@@ -555,7 +614,7 @@ REG_OP(DynamicGRU)
 *@li is_training:An bool identifying is training in the op. Default to true.
 *@par Outputs:
 *six outputs: \n
 *six outputs: 
 *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -592,6 +651,68 @@ REG_OP(DynamicGRUV2)
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(DynamicGRUV2)
 /**
 *@brief DynamicGRUV2Hidden calculation.
 *@par Inputs:
 *five inputs: 
 *@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ.
 *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
 *@li seq_length:Must be one of the following types: int32. The format must be ND.
 *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@par Attributes:
 *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". 
 Only UNIDIRECTIONAL is currently supported.
 *@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
 *@li keep_prob:An float identifying the keep prob in the op. Default to 1.
 *@li cell_clip:An float identifying the cell clip in the op. Default to -1.
 *@li num_proj:An integer identifying the num projection in the op. Default to 0.
 *@li time_major:An bool identifying the time major in the op. Default to true.
 *@li activation:An string identifying the type of activation function in the op. Default to "tanh". 
 Only tanh is currently supported.
 *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
 *@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
 *@li is_training:An bool identifying is training in the op. Default to true.
 *@par Outputs:
 *six outputs: 
 *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicGRUV2Hidden)
    .INPUT(x_weight_input, TensorType({DT_FLOAT32}))
    .INPUT(weight_hidden, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(direction, String, "UNIDIRECTIONAL")
    .ATTR(cell_depth, Int, 1)
    .ATTR(keep_prob, Float, 1.0)
    .ATTR(cell_clip, Float, -1.0)
    .ATTR(num_proj, Int, 0)
    .ATTR(time_major, Bool, true)
    .ATTR(activation, String, "tanh")
    .ATTR(gate_order, String, "zrh")
    .ATTR(reset_after, Bool, true)
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(DynamicGRUV2Hidden)
 /**
 *@brief: DynamicGRUV2Grad calculation.
 *@par Inputs:
@@ -618,7 +739,6 @@ REG_OP(DynamicGRUV2)
 *@li cell_clip:An float identifying the cell clip in the op. Default to -1.
 *@li num_proj:An integer identifying the num projection in the op. Default to 0.
 *@li time_major:An bool identifying the time major in the op. Default to true.
 *@li bias_type:An string identifying the type of bias_type function in the op. Default to "double_bias".
 *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
 *@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
@@ -630,6 +750,9 @@ REG_OP(DynamicGRUV2)
 *@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicGRUV2Grad)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -658,7 +781,6 @@ REG_OP(DynamicGRUV2Grad)
    .ATTR(cell_clip, Float, -1.0)
    .ATTR(num_proj, Int, 0)
    .ATTR(time_major, Bool, true)
    .ATTR(bias_type, String, "double_bias")
    .ATTR(gate_order, String, "zrh")
    .ATTR(reset_after, Bool, true)
    .OP_END_FACTORY_REG(DynamicGRUV2Grad)
@@ -667,7 +789,7 @@ REG_OP(DynamicGRUV2Grad)
 *@brief: GRUV2HiddenGrad calculation.
 *@par Inputs:
 *nine inputs: \n
 *@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -678,6 +800,7 @@ REG_OP(DynamicGRUV2Grad)
 *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@par Attributes:
 *@li t_state:An Int identifying the current t state. Default to [0, 4].
 *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
 *@par Outputs:
@@ -685,10 +808,12 @@ REG_OP(DynamicGRUV2Grad)
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(GRUV2HiddenGrad)
    .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
 REG_OP(GRUV2HiddenGradCell)
    .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -699,8 +824,142 @@ REG_OP(GRUV2HiddenGrad)
    .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(t_state, Int, 0)
    .ATTR(gate_order, String, "zrh")
    .OP_END_FACTORY_REG(GRUV2HiddenGrad)
    .OP_END_FACTORY_REG(GRUV2HiddenGradCell)
 /**
 * @brief Calculates the reversed outputs of the function "embedding". \n
 * @par Inputs:
 * Two inputs, including:
 * @li grad: A mutable Tensor of word grad. Must be one of the following types:
 *     float32.
 * @li indices: A mutable word index Tensor of the int32 type.\n
 * @par Attributes:
 * @li num_weights: An int attr which use to judge how many words in dict. \n
 * @li padding_idx: An int attr judge which word to fill zeros. Defaults to "-1". \n
 * @li scale_grad_by_freq: An optional bool. Defaults to "False".
 *     If "True", "grad_weight" will be scale by word_frequency.
 *     If "False", "grad_weight" will not be scale by word_frequency. \n
 * @par Outputs:
 * @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator EmbeddingDenseGrad.
 */
 REG_OP(EmbeddingDenseGrad)
    .INPUT(grad, TensorType({ DT_FLOAT32 }))  /* "First operand." */
    .INPUT(indices, TensorType({ DT_INT32 })) /* "Second operand." */
    .OUTPUT(y, TensorType({ DT_FLOAT32 }))    /* "Result, has same element type as two inputs" */
    .REQUIRED_ATTR(num_weights, Int)
    .ATTR(padding_idx, Int, -1)
    .ATTR(scale_grad_by_freq, Bool, false)
    .OP_END_FACTORY_REG(EmbeddingDenseGrad)
 /**
 *@brief CommonLSTM calculation.
 *@par Inputs:
 *eight inputs: \n
 *@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
 *@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
 *@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND.
 *@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND.
 *@par Attributes:
 *@li activation_alpha:Optional scaling values used by some activation functions. Empty is currently supported.
 *@li activation_beta:Optional scaling values used by some activation functions. Empty is currently supported.
 *@li activations:The list of activation functions. Empty is currently supported.
 *@li clip:An float identifying the cell clip in the op. Default to -1.
 *@li direction:Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward(default), reverse, or bidirectional.
 *@li hidden_size:Number of neurons in the hidden layer. Reserved.
 *@li input_forget:Couple the input and forget gates if 1. Reserved.
 *@par Outputs:
 *three outputs: \n
 *@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 */
 REG_OP(CommonLSTM)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
    .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(initial_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(p, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(activation_alpha, ListFloat, {})
    .ATTR(activation_beta, ListFloat, {})
    .ATTR(activations, ListString, {})
    .ATTR(clip, Float, -1.0)
    .ATTR(direction, String, "forward")
    .REQUIRED_ATTR(hidden_size, Int)
    .ATTR(input_forget, Int, 0)
    .OP_END_FACTORY_REG(CommonLSTM)
 /**
 * @brief Common GRU calculation.
 * @par Inputs:
 * Eight inputs, including:
 * @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). The format must be FRACTAL_NZ 
 * @li w: The weight tensor for the gates is 3D Tensor(float16). The format must be FRACTAL_Z
 * @li r: The recurrence weight tesnor is 3D Tensor(float16). The format must be FRACTAL_Z
 * @li b: The bias tensor for the gates. The format must be ND
 * @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND
 * @li init_h: Optional initial value of the hidden(float16,float32). The format must be FRACTAL_NZ
 * @par Attributes:
 * @li activation_alpha: Optional scaling values used by some activation functions.  \n
 * @li activation_beta: Optional scaling values used by some activation functions.  \n
 * @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates.  \n
 * @li clip: Cell clip threshold. \n
 * @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n
 * @li hidden_size: Number of neurons in the hidden layer. \n
 * @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n
 * @par Outputs:
 * @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ
 * @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ
 */
 REG_OP(CommonGRU)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
    .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(activation_alpha, ListFloat, {})
    .ATTR(activation_beta , ListFloat, {})
    .ATTR(activations , ListString, {})
    .ATTR(clip, Float, -1.0)
    .ATTR(direction, String, "forward")
    .REQUIRED_ATTR(hidden_size, Int)
    .ATTR(linear_before_reset , Int, 0)
    .OP_END_FACTORY_REG(CommonGRU)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_
--- a/third_party/fwkacllib/inc/ops/rpn_ops.h
+++ b/third_party/fwkacllib/inc/ops/rpn_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/save_ops.h
+++ b/third_party/fwkacllib/inc/ops/save_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/sdca_ops.h
+++ b/third_party/fwkacllib/inc/ops/sdca_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -239,6 +239,30 @@ REG_OP(GatherV2D)
    .REQUIRED_ATTR(axis, Int)
    .OP_END_FACTORY_REG(GatherV2D)
 /**
 *@Gathers values along an axis specified by dim . \n
 *@par Inputs:
 *@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
 *@li index: A Tensor. Must be one of the following types: int64 . \n
 *@par Attributes:
 * dim: the axis along which to index . \n
 *@par Outputs:
 * y: A Tensor. Has the same type as "x" . \n
 *@par Third-party framework compatibility
 *Compatible with the PyTorch operator Gather.
 */
 REG_OP(GatherElements)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
    .INPUT(index, TensorType({DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
    .ATTR(dim, Int, 0)
    .OP_END_FACTORY_REG(GatherElements)
 /**
 *@brief Extracts a strided slice of a tensor. Roughly speaking, this op
    extracts a slice of size (end-begin)/stride from the given input tensor.
@@ -486,6 +510,38 @@ REG_OP(UnsortedSegmentSum)
    .OUTPUT(y, TensorType::NumberType())
    .OP_END_FACTORY_REG(UnsortedSegmentSum)
 /**
 *@brief Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to 
 *	end, inclusive, on a logarithmic scale with base base. \n
 *@par Inputs:
 *One inputs, including:
 * @li assist: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @par Attributes:
 * @li start: An required float. Used to select the start. \n
 * @li end: An required float. Used to select the end. \n
 * @li steps: An optional int.Defaults to 100. \n
 * @li base: An optional float.Defaults to 10.0. \n
 * @li dtype: An optional int.Defaults to 1. \n
 *@par Outputs:
 *y: A Tensor with the same type and shape of input_x's. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator logspaced. \n
 */
 REG_OP(LogSpaceD)
    .INPUT(assist, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .REQUIRED_ATTR (start, Float)
    .REQUIRED_ATTR (end, Float)
    .ATTR(steps, Int, 100)
    .ATTR(base, Float, 10.0)
    .ATTR(dtype, Int, 1)
    .OP_END_FACTORY_REG(LogSpaceD)
 /**
 *@brief Computes the sum along segments of a tensor . \n
@@ -796,6 +852,34 @@ REG_OP(SliceD)
    .REQUIRED_ATTR(size, ListInt)
    .OP_END_FACTORY_REG(SliceD)
 /**
 *@brief Extracts a slice from a tensor.
 *       This operation extracts a slice of size "size" from a tensor "x"
 *		starting at the location specified by "begin" . \n
 *@par Inputs:
 *@li x: A Tensor. Must be one of the following types:
 * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
 * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n
 *@par Inputs:
 *@li offsets: The starting location for the slice.
 *@par Attributes:
 *@li size: The tensor shape . \n
 *@par Outputs:
 *y: A Tensor. Has the same type as "x". The slice extracted from the tensor.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead.
 */
 REG_OP(SliceDV2)
    .INPUT(x, TensorType::BasicType())
    .INPUT(offsets, TensorType::IndexNumberType())
    .OUTPUT(y, TensorType::BasicType())
    .REQUIRED_ATTR(size, ListInt)
    .OP_END_FACTORY_REG(SliceDV2)
 /**
 * @brief Finds values and indices of the "k" largest elements for the last
 * dimension . \n
@@ -829,8 +913,8 @@ REG_OP(SliceD)
 * @li sorted = true
 * @li It's unstable sorted indices on the platform of Ascend310
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator TopK.
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead.
 */
 REG_OP(TopKD)
    .INPUT(x, TensorType::RealNumberType())
@@ -855,6 +939,44 @@ REG_OP(TopKD)
 * Number of top elements to look for along the last dimension (along each row
 * for matrices) . \n
 * @par Attributes:
 * @li sorted: An optional bool. Defaults to true.
 * If true, the resulting "k" elements will be sorted by the values in descending
 * order.
 * @li dim: An optional int. Defaults to -1. For reserved use.
 * @li largest: An optional bool. Defaults to true. For reserved use. \n
 * @par Outputs:
 * @li values: A Tensor, specifying the sorted data. Has the same type as
 * "input".
 * @li indices: A Tensor of type int32, specifying the indices of sorted data . \n
 * @see TopK()
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator TopKV2.
 */
 REG_OP(TopKV2)
    .INPUT(x, TensorType::RealNumberType())
    .INPUT(k, TensorType({DT_INT32}))
    .OUTPUT(values, TensorType::RealNumberType())
    .OUTPUT(indices, TensorType({DT_INT32}))
    .ATTR(sorted, Bool, true)
    .ATTR(dim, Int, -1)
    .ATTR(largest, Bool, true)
    .OP_END_FACTORY_REG(TopKV2)
 /**
 * @brief Finds values and indices of the "k" largest elements for the last
 * dimension . \n
 * @par Inputs:
 * Two inputs, including:
 * @li x: A 1D or higher tensor of type BasicType, with the last dimension
 * at least "k".
 * @li k: A 0D Tensor of type int32.
 * Number of top elements to look for along the last dimension (along each row
 * for matrices) . \n
 * @par Attributes:
 * @li sorted: An optional bool. Defaults to true.
 * If true, the resulting "k" elements will be sorted by the values in descending
@@ -876,6 +998,8 @@ REG_OP(TopK)
    .OUTPUT(values, TensorType::RealNumberType())
    .OUTPUT(indices, TensorType({DT_INT32}))
    .ATTR(sorted, Bool, true)
    .ATTR(largest, Bool, true)
    .ATTR(dim, Int, -1)
    .OP_END_FACTORY_REG(TopK)
 /**
 *@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices" . \n
@@ -1921,6 +2045,188 @@ REG_OP(CumulativeLogsumexpD)
    .ATTR(exclusive, Bool, false)
    .ATTR(reverse, Bool, false)
    .OP_END_FACTORY_REG(CumulativeLogsumexpD)
 /**
 * @brief Add updates to var according to axis and indices.
 * @par Inputs:
 * Three inputs, including:
 * @li var: A Tensor. Must be one of the following types:
 *     float16, float32, int16, int32, int8, uint8.
 * @li indices: A Tensor of the indices, type should be int32.
 * @li updates: A Tensor of the same type as "var". \n
 * @par Attributes:
 * @li axis: An required int to specify the axis to perform indices add. \n
 * @par Outputs:
 * @li var: A Tensor. Same as input "var".
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator index_add_.
 */
 REG_OP(InplaceIndexAdd)
    .INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8,
                            DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
    .INPUT(indices, TensorType({DT_INT32}))
    .INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8,
                                DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
    .OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8,
                            DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
    .REQUIRED_ATTR(axis, Int)
    .OP_END_FACTORY_REG(InplaceIndexAdd)
 /**
 * @brief Replace the value of X with value according to mask.
 * @par Inputs:
 * three inputs, including:
 *  @li x: A Tensor of dtype is float16 or float32 or int32 or int8.
 *  @li mask: A Tensor of dtype float16 or float32 or int32 or int8.
 *  @li value: A Tensor or scalar of dtype float16 or float32 or int32 or int8. \n
 * @par Outputs:
 *  @li y: A tensor. Must be one of the following dtypes:
 *   float16, float32, int32, int8.
 */
 REG_OP(MaskedFill)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
    .INPUT(mask, TensorType({DT_BOOL}))
    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32}))
    .OP_END_FACTORY_REG(MaskedFill)
 /**
 * @brief Choose the value of X with value according to mask.
 * @par Inputs:
 * two inputs, including:
 *  @li x: A Tensor of dtype is float16 or float32.
 *  @li mask: A Tensor of dtype is bool. \n
 * @par Outputs:
 *  @li y: A tensor with the same type as x. \n
 * @par Third-party framework compatibility
 * Compatible with the Numpy operator select.
 * Replaces the pytorch operator masked_select in some scenarios.\n
 */
 REG_OP(MaskedSelectV2)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(mask, TensorType({DT_BOOL}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OP_END_FACTORY_REG(MaskedSelectV2)
 /**
 * @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n
 * @par Inputs:
 * One inputs, including:
 * @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32.
 * @par Attributes:
 * @li start: An  attribute of type Int, start index of last dim. \n
 * @li end: An  attribute of type Int, end index of last dim. \n
 * @li stride: An  attribute of type Int, stride of slice. \n
 * @par Outputs:
 * @li y: A Tensor. Has the same type as "x". \n
 * @par Third-party framework compatibility
 * No compatibility
 */
 REG_OP(SliceLastDim)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
    .REQUIRED_ATTR(start, Int)
    .REQUIRED_ATTR(end, Int)
    .ATTR(stride, Int, 1)
    .OP_END_FACTORY_REG(SliceLastDim)
 /**
 * @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n
 *   extracts a slice of size (end-begin)/stride from the given input tensor. \n
 *   Starting at the location specified by begin the slice continues by \n
 *   adding stride to the index until all dimensions are not less than end. \n
 *
 * @par Inputs:
 * Four inputs, including:
 * @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n
 *     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n
 *     complex128, float16, uint32, uint64, complex64, complex128. \n
 * @li begin: A Tensor of type int32 or int64, for the index of the first value to select.
 *
 * @li end: A Tensor of type int32 or int64, for the index of the last value to select.
 *
 * @li axes: A Tensor of type int32 or int64, indicate axis to be select.
 *
 * @li strides: A Tensor of type int32 or int64, for the increment.
 *
 * @par Attributes:
 * @li begin_mask: A Tensor of type int32. \n
 *     A bitmask where a bit "i" being "1" means to ignore the begin \n
 *     value and instead use the largest interval possible.
 * @li end_mask: A Tensor of type int32. \n
 *     Analogous to "begin_mask".
 * @li ellipsis_mask: A Tensor of type int32. \n
 *     A bitmask where bit "i" being "1" means the "i"th position \n
 *     is actually an ellipsis.
 * @li new_axis_mask: A Tensor of type int32. \n
 *     A bitmask where bit "i" being "1" means the "i"th \n
 *     specification creates a new shape 1 dimension.
 * @li shrink_axis_mask: A Tensor of type int32. \n
 *     A bitmask where bit "i" implies that the "i"th \n
 *     specification should shrink the dimensionality.
 *
 * @par Outputs:
 * y: A Tensor. Has the same type as "x".
 *
 * @attention Constraints:
 *
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSliceV2.
 */
 REG_OP(StridedSliceV2)
    .INPUT(x, TensorType::BasicType())
    .INPUT(begin, TensorType::IndexNumberType())
    .INPUT(end, TensorType::IndexNumberType())
    .OPTIONAL_INPUT(axes, TensorType::IndexNumberType())
    .OPTIONAL_INPUT(strides, TensorType::IndexNumberType())
    .ATTR(begin_mask, Int, 0)
    .ATTR(end_mask, Int, 0)
    .ATTR(ellipsis_mask, Int, 0)
    .ATTR(new_axis_mask, Int, 0)
    .ATTR(shrink_axis_mask, Int, 0)
    .OUTPUT(y, TensorType::BasicType())
    .OP_END_FACTORY_REG(StridedSliceV2)
 /**
 *@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n
 *@par Inputs:
 *Three inputs, including:
 * @li x: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 *@li assist1: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 *@li assist2: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 * @par Attributes:
 * @li dim: A required int. Used to select the dimension of this tensor. \n
 *@par Outputs:
 *y: A Tensor with the same type and shape of input_x's. \n
 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator IndexFill. \n
 */
 REG_OP(IndexFillD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .REQUIRED_ATTR(dim, Int)
    .OP_END_FACTORY_REG(IndexFillD)
 } // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/set_ops.h
+++ b/third_party/fwkacllib/inc/ops/set_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/sparse_ops.h
+++ b/third_party/fwkacllib/inc/ops/sparse_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -383,11 +383,11 @@ REG_OP(SparseFillEmptyRowsGrad)
 REG_OP(SparseTensorDenseMatMul)
    .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64}))
    .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \
        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16}))
        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64}))
    .INPUT(x1_shape, TensorType({DT_INT64}))
    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
        DT_COMPLEX128, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
        DT_COMPLEX128, DT_FLOAT16}))
    .ATTR(adjoint_a, Bool, false)
    .ATTR(adjoint_b, Bool, false)
--- a/third_party/fwkacllib/inc/ops/spectral_ops.h
+++ b/third_party/fwkacllib/inc/ops/spectral_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -26,6 +26,24 @@
 namespace ge {
 /**
 *@brief Computes the inverse 1-dimensional discrete Fourier transform over the
 inner-most dimension of `x`. \n
 *@par Inputs:
 *@li x: A Tensor. Must be the following types: complex64, complex128. \n
 *@par Outputs:
 *@li y: A complex tensor of the same rank as `x`. \n
 *@par Third-party framework compatibility
 * Compatible with TensorFlow IFFT operator.
 */
 REG_OP(IFFT)
    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OP_END_FACTORY_REG(IFFT)
 /**
 *@brief Real-valued fast Fourier transform . \n
@@ -47,6 +65,84 @@ REG_OP(RFFT)
    .OUTPUT(y, TensorType({DT_COMPLEX64}))
    .OP_END_FACTORY_REG(RFFT)
 /**
 *@brief Inverse real-valued fast Fourier transform. \n
 *@par Inputs:
 *@li x: A complex64 tensor.
 *@li fft_length: An int32 tensor of shape [1]. The FFT length. \n
 *@par Outputs:
 *@li y: A float32 tensor of the same rank as `input`. The inner-most
  dimension of `input` is replaced with the `fft_length` samples of its inverse
  1D Fourier transform. \n
 *@par Third-party framework compatibility
 * Compatible with TensorFlow IRFFT operator.
 */
 REG_OP(IRFFT)
    .INPUT(x, TensorType({DT_COMPLEX64}))
    .INPUT(fft_length, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(IRFFT)
 /**
 *@brief 2D fast Fourier transform. \n
 *@par Inputs:
 *@li x: A complex64 tensor.
 *@par Outputs:
 *@li y: A complex64 tensor of the same shape as `input`. The inner-most 2
  dimensions of `input` are replaced with their 2D Fourier transform. \n
 *@par Third-party framework compatibility
 * Compatible with TensorFlow FFT2D operator.
 */
 REG_OP(FFT2D)
    .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
    .OP_END_FACTORY_REG(FFT2D)
 /**
 *@brief Calculate the one-dimensional discrete Fourier transform on the
 innermost dimension of the input. \n
 *@par Inputs:
 *@li x: A Tensor. Must be the following types: complex64, complex128. \n
 *@par Outputs:
 *@li y: A complex tensor with the same shape as input. The innermost dimension
 of the input is replaced by its 1-dimensional Fourier transform. \n
 *@par Third-party framework compatibility
 * Compatible with TensorFlow FFT operator.
 */
 REG_OP(FFT)
    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OP_END_FACTORY_REG(FFT)
 /**
 *@brief Calculate the inverse 1-dimensional discrete Fourier transform on the
 innermost dimension of the input. \n
 *@par Inputs:
 *@li x: A Tensor. Must be the following types: complex64, complex128. \n
 *@par Outputs:
 *@li y: A complex tensor with the same shape as input. The innermost dimension
 of the input is replaced by its inverse two-dimensional Fourier transform. \n
 *@par Third-party framework compatibility
 * Compatible with TensorFlow IFFT2D operator.
 */
 REG_OP(IFFT2D)
    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OP_END_FACTORY_REG(IFFT2D)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -62,8 +62,8 @@ REG_OP(Split)
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 *@par Attributes:
 *@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
 *@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
 *@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
 *@li num_split: A required int32. Specifies the number of output tensors. No default value . \n
 *@par Outputs:
 *y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n
@@ -94,12 +94,12 @@ REG_OP(SplitD)
 *@par Inputs:
 * Three inputs, including:
 *@li x: An ND Tensor.
 *Must be one of the following types:
 *@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
 *@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n
 *Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
 *@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension.
 *@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n
 *@par Attributes:
 *num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
 *num_split: A required int32. Specifies the number of output tensors. No default value . \n
 *@par Outputs:
 *y:  Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
@@ -129,9 +129,9 @@ REG_OP(SplitV)
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 *@par Attributes:
 *@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
 *@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
 *@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
 *@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension.
 *@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
 *@li num_split: A required int32. Specifies the number of output tensors. No default value . \n
 *@par Outputs:
 *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
--- a/third_party/fwkacllib/inc/ops/state_ops.h
+++ b/third_party/fwkacllib/inc/ops/state_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/string_ops.h
+++ b/third_party/fwkacllib/inc/ops/string_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -488,7 +488,7 @@ include:
 */
 REG_OP(AsString)
    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \
        DT_DOUBLE, DT_BOOL}))
        DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_STRING}))
    .ATTR(precision, Int, -1)
    .ATTR(scientific, Bool, false)
--- a/third_party/fwkacllib/inc/ops/swap_co_ops.h
+++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
+++ b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -141,7 +141,7 @@ support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW"
 *@par Attributes:
 *@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc.
 *@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc.
 *@li group: A required int32, default value is 1. \n
 *@li group: A optional int32, default value is 1. \n
 *@par Outputs:
 *dst: A Tensor dtype of all types.
@@ -151,7 +151,7 @@ REG_OP(TransData)
    .OUTPUT(dst, TensorType::BasicType())
    .REQUIRED_ATTR(src_format, String)
    .REQUIRED_ATTR(dst_format, String)
    .ATTR(group, Int, 1)
    .ATTR(groups, Int, 1)
    .OP_END_FACTORY_REG(TransData)
 /**
@@ -357,7 +357,7 @@ REG_OP(DepthToSpace)
 *@brief Permutes data into spatial data blocks and then prunes them . \n
 *@par Inputs:
 *@li x: A 4D Tensor with format NHWC.
 *@li x: A 4D Tensor with format. Must set the format, supported format list ["NCHW, NHWC"]
 *@li crops: A 1D list or tuple of int32 or int64 . \n
 *Must be one of the following types: float16, float32
@@ -434,9 +434,10 @@ REG_OP(BatchToSpaceD)
 *@par Inputs:
 * Two inputs, including:
 *@li x: An NHWC Tensor. Must be one of the following types:
 *@li x: An 4D Tensor. Must be one of the following types:
 * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
 * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
 * Must set the format, supported format list ["NCHW, NHWC"]
 *@li paddings: A 2D tensor of type int, specifying the input . \n
 *@par Attributes:
@@ -518,7 +519,8 @@ REG_OP(Unpack)
 * @par Inputs:
 * x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
 *    following types:float32, double, int32, uint8, int16, int8, int64, uint16,
 *    float16, uint32, uint64
 *    float16, uint32, uint64. The inputs must have data_format with one of follows:
 *    NHWC, NCHW.
 * @par Attributes:
 * @li ksizes: A required list or tuple. The size of the sliding window for each
@@ -533,7 +535,6 @@ REG_OP(Unpack)
 * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
 * @li padding: A required string. The type of padding algorithm to use,
  support "SAME" or "VALID". \n
 * @li data_format: A required string. The format of input, only supported NHWC. \n
 * @par Outputs:
 * y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
@@ -554,7 +555,6 @@ REG_OP(ExtractImagePatches)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(rates, ListInt)
    .REQUIRED_ATTR(padding, String)
    .ATTR(data_format, String, "NHWC")
    .OP_END_FACTORY_REG(ExtractImagePatches)
 /**
@@ -563,6 +563,7 @@ REG_OP(ExtractImagePatches)
 * @par Inputs:
 * x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n
 *    The inputs must have data_format with one of follows: NDHWC, NCDHW. \n
 * @par Attributes:
 * @li ksizes: A required list or tuple. The size of the sliding window for each
@@ -571,7 +572,6 @@ REG_OP(ExtractImagePatches)
 * patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1].
 * @li padding: A required string. The type of padding algorithm to use ,
 * support "SAME" or "VALID" . \n
 * @li data_format: An optional string. The format of input, only supported NDHWC. \n
 * @par Outputs:
 * Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes *
@@ -590,7 +590,6 @@ REG_OP(ExtractVolumePatches)
    .REQUIRED_ATTR(ksizes, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .REQUIRED_ATTR(padding, String)
    .ATTR(data_format, String, "NDHWC")
    .OP_END_FACTORY_REG(ExtractVolumePatches)
 /**
@@ -717,6 +716,72 @@ REG_OP(CompressFcOp)
 .OUTPUT(compress_index, TensorType({DT_INT8}))
 .REQUIRED_ATTR(compress_parameters, ListInt)
 .OP_END_FACTORY_REG(CompressFcOp)
 /**
 *@brief Performs Col2im for each batch entry. \n
 *@par Inputs:
 *@li input_x: The Col Tensor. 5-D, shape: `(n, c1, kernel_h*kernel_w, ho*wo, c0)`. 
 where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1     \n
 *@par Outputs:
 *@li output_y: The img Tensor. 5-D, shape: `(n, c1, output_h, output_w, c0)`. \n
 *@par Attributes:
 *@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution.
 *@li dilation: ListInt, value: `(dilation_h, dilation_w)`, the dilation in convolution.
 *@li padding: ListInt, value: `(padding_h, padding_w)`, the dilation in convolution.
 *@li stride:  ListInt, value: `(stride_h, stride_w)`, the dilation in convolution.  \n
 *@par Third-party framework compatibility
 * Compatible with Pytorch col2im/im2col_backward operator.
 */
 REG_OP(Col2im)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(output_size, TensorType({DT_INT32, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .REQUIRED_ATTR(kernel_size, ListInt)
    .REQUIRED_ATTR(dilation, ListInt)
    .REQUIRED_ATTR(padding, ListInt)
    .REQUIRED_ATTR(stride, ListInt)
    .OP_END_FACTORY_REG(Col2im)
 /**
 *@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine
 matrices theta. \n
 *@par Inputs:
 *Input theta must be float16 or float, output_size must be int32 type.Inputs
 include:
 *@li theta: input batch of affine matrices with shape (N,2,3) for 2D or (N,3,4)
 for 3D
 *@li output_size: the target output image size. (N×C×H×W for 2D or N×C×D×H×W for
 3D) Example: torch.Size((32, 3, 24, 24)) . \n
 *@par Attributes:
 *align_corners: if True, consider -1 and 1 to refer to the centers of the corner
 pixels rather than the image corners.Refer to grid_sample() for a more complete
 description. A grid generated by affine_grid() should be passed to grid_sample()
 with the same setting for this option. Default: False \n
 *@par Outputs:
 *@li y: A 2-D integer tensor of shape [M] representing the
 selected indices from the boxes tensor, where M <= max_output_size. \n
 *@attention Constraints:
 *Input theta must be float16 or float, output_size must be int32 type . \n
 *@par Third-party framework compatibility
 *Compatible with Pytorch affine_grid operator.
 */
 REG_OP(AffineGrid)
    .INPUT(theta, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(output_size, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(align_corners, Bool, false)
    .OP_END_FACTORY_REG(AffineGrid)
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
+++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -339,7 +339,7 @@ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream);
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream);
 RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream);
 /**
 * @ingroup dvrt_base
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -46,6 +46,12 @@ typedef enum tagRtChipType {
    CHIP_END,
 } rtChipType_t;
 typedef enum tagRtAicpuScheType {
    SCHEDULE_SOFTWARE = 0, /* Software Schedule */
    SCHEDULE_SOFTWARE_OPT,
    SCHEDULE_HARDWARE, /* HWTS Schedule */
 } rtAicpuScheType;
 typedef enum tagRtVersion {
    VER_BEGIN = 0,
    VER_NA = VER_BEGIN,
@@ -126,6 +132,11 @@ typedef struct tagRtPlatformConfig {
    uint32_t platformConfig;
 } rtPlatformConfig_t;
 typedef enum tagRTTaskTimeoutType {
    RT_TIMEOUT_TYPE_OP_WAIT = 0,
    RT_TIMEOUT_TYPE_OP_EXECUTE,
 } rtTaskTimeoutType_t;
 /**
 * @ingroup
 * @brief get AI core count
@@ -184,6 +195,37 @@ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
 */
 RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
 /**
 * @ingroup
 * @brief get device feature ability by device id, such as task schedule ability.
 * @param [in] deviceId
 * @param [in] moduleType
 * @param [in] featureType
 * @param [out] value
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value);
 /**
 * @ingroup
 * @brief set event wait task timeout time.
 * @param [in] timeout
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout);
 /**
 * @ingroup
 * @brief set op execute task timeout time.
 * @param [in] timeout
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -62,6 +62,11 @@ typedef enum tagRtFeatureType {
    FEATURE_TYPE_RSV
 } rtFeatureType_t;
 typedef enum tagRtDeviceFeatureType {
  FEATURE_TYPE_SCHE,
  FEATURE_TYPE_END,
 } rtDeviceFeatureType_t;
 typedef enum tagMemcpyInfo {
    MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
    MEMCPY_INFO_RSV
--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -30,6 +30,11 @@ extern "C" {
 #define RT_EVENT_DEFAULT (0x00)
 #define RT_EVENT_WITH_FLAG (0x01)
 #define RT_EVENT_DDSYNC_NS    0x01U
 #define RT_EVENT_STREAM_MARK  0x02U
 #define RT_EVENT_DDSYNC       0x04U
 #define RT_EVENT_TIME_LINE    0x08U
 /**
 * @ingroup dvrt_event
 * @brief create event instance
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -173,13 +173,7 @@ typedef void (*rtCallback_t)(void *fnData);
 * @ingroup rt_kernel
 * @brief magic number of elf binary for aicube
 */
 #define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41415247
 /**
 * @ingroup rt_kernel
 * @brief magic number of elf binary for aivector
 */
 #define RT_DEV_BINARY_MAGIC_ELF_AIVECTOR 0x41415248
 #define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343
 /**
 * @ingroup rt_kernel_flags
@@ -194,7 +188,7 @@ typedef void (*rtCallback_t)(void *fnData);
 /**
 * @ingroup rt_kernel
 * @brief kernel mode
 */
 **/
 #define RT_DEFAULT_KERNEL_MODE (0x00)
 #define RT_NORMAL_KERNEL_MODE (0x01)
 #define RT_ALL_KERNEL_MODE (0x02)
@@ -217,7 +211,7 @@ RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);
 /**
 * @ingroup rt_kernel
 * @brief register device binary
 * @brief register device binary with all kernel
 * @param [in] bin   device binary description
 * @param [out] handle   device binary handle
 * @return RT_ERROR_NONE for ok
@@ -336,7 +330,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
 * @ingroup rt_kernel
 * @brief launch kernel with handle to device
 * @param [in] handle   program
 * @param [in] devFunc    device function description
 * @param [in] devFunc   device function description.
 * @param [in] blockDim   block dimentions
 * @param [in] args   argments address for kernel function
 * @param [in] argsSize   argements size
@@ -347,7 +341,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
                                           rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo);
                                            rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);
 /**
 * @ingroup rt_kernel
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -116,6 +116,9 @@ typedef enum tagRtMemInfoType {
 typedef enum tagRtRecudeKind {
    RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10,  // D2D, SDMA inline reduce, include 1P, and P2P
    RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11,
    RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12,
    RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13,
    RT_RECUDE_KIND_END
 } rtRecudeKind_t;
@@ -123,6 +126,14 @@ typedef enum tagRtDataType {
    RT_DATA_TYPE_FP32 = 0,  // fp32
    RT_DATA_TYPE_FP16 = 1,  // fp16
    RT_DATA_TYPE_INT16 = 2, // int16
    RT_DATA_TYPE_INT4 = 3,  // int4
    RT_DATA_TYPE_INT8 = 4,  // int8
    RT_DATA_TYPE_INT32 = 5, // int32
    RT_DATA_TYPE_BFP16 = 6, // bfp16
    RT_DATA_TYPE_BFP32 = 7, // bfp32
    RT_DATA_TYPE_UINT8 = 8, // uint8
    RT_DATA_TYPE_UINT16= 9, // uint16
    RT_DATA_TYPE_UINT32= 10,// uint32
    RT_DATA_TYPE_END
 } rtDataType_t;
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -133,12 +133,13 @@ typedef struct tagAllKernelTaskInfo {
    uint16_t argsCount;
    uint16_t argsSize;
    uint16_t reserved;
    const void *dev_func;
    void *devfunc;
    void *handle;
    uint8_t *smDesc;
    uint8_t *args;
    uint16_t *argsOffset;
 } rtAllKernelTaskInfo_t;
 typedef struct tagKernelTaskInfoEx {
    uint32_t flags;
    uint32_t argsSize;
@@ -263,7 +264,7 @@ typedef struct tagTaskInfo {
    union {
        rtKernelTaskInfoEx_t kernelTaskEx;
        rtKernelTaskInfo_t kernelTask;
        rtAllKernelTaskInfo_t allkernelTask;
        rtAllKernelTaskInfo_t allKernelTask;
        rtEventTaskInfo_t eventTask;
        rtStreamSwitchTaskInfo_t streamSwitchTask;
        rtStreamActiveTaskInfo_t streamActiveTask;
@@ -285,10 +286,27 @@ typedef struct tagTaskInfo {
    } u;
 } rtTaskInfo_t;
 typedef struct tagNodeInfo_t {
    uint32_t nodeIdx;
    uint32_t reserved[1];
 } rtNodeInfo;
 typedef struct tagHwtsInfo_t {
    uint16_t taskId;
    uint16_t sqExeHead;
    uint16_t streamExeHead;
    uint16_t reserved[2];
 } rtHwtsInfo;
 typedef struct tagLabelDevInfo_t {
    uint16_t modelId;
    uint16_t streamId;
    uint16_t labelId;
    union {
        rtNodeInfo nodeInfo;
        rtHwtsInfo hwtsInfo;
        uint16_t reserved[5];
    }u;
 }rtLabelDevInfo;
 typedef rtError_t (*rtTaskGenCallback)(rtModel_t model, rtTaskInfo_t *taskInfo);
--- a/third_party/fwkacllib/inc/runtime/stream.h
+++ b/third_party/fwkacllib/inc/runtime/stream.h
@@ -189,6 +189,28 @@ RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stream);
 */
 RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr,
                                  uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType);
 /*
 * @ingroup dvrt_stream
 * @brief enable debug for dump overflow exception with stream
 * @param [in] addr: ddr address of kernel exception dumpped
 * @param [in] stream: stream handle
 * @param [in] flag: debug flag
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr,
                                   uint32_t *streamId, uint32_t *taskId);
 /*
 * @ingroup rt_model
 * @brief disable debug for dump overflow exception with stream
 * @param [in] stream: stream handle
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream);
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
--- a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
+++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
@@ -1,12 +1,18 @@
 /**
 * @file adx_datadump_server.h
 *
 * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef ADX_DATADUMP_SERVER_H
 #define ADX_DATADUMP_SERVER_H
--- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
@@ -14,151 +14,99 @@
 * limitations under the License.
 */
 #ifndef MSPROF_ENGINE_PROF_ACL_API_H_
 #define MSPROF_ENGINE_PROF_ACL_API_H_
 #define MSVP_MAX_DEV_NUM 64
 #define MSVP_PROF_API __attribute__((visibility("default")))
 #ifndef MSPROFILER_API_PROF_ACL_API_H_
 #define MSPROFILER_API_PROF_ACL_API_H_
 // DataTypeConfig
 #define PROF_ACL_API                0x0001
 #define PROF_TASK_TIME              0x0002
 #define PROF_AICORE_METRICS         0x0004
 #define PROF_AICPU_TRACE            0x0008
 #define PROF_MODEL_EXECUTE          0x0010
 #define PROF_RUNTIME_API            0x0020
 #define PROF_RUNTIME_TRACE          0x0040
 #define PROF_SCHEDULE_TIMELINE      0x0080
 #define PROF_SCHEDULE_TRACE         0x0100
 #define PROF_AIVECTORCORE_METRICS   0x0200
 #define PROF_SUBTASK_TIME           0x0400
 #define PROF_TRAINING_TRACE         0x0800
 #define PROF_HCCL_TRACE             0x1000
 #define PROF_DATA_PROCESS           0x2000
 #define PROF_TASK_TRACE             0x3842
 #define PROF_ACL_API                0x00000001
 #define PROF_TASK_TIME              0x00000002
 #define PROF_AICORE_METRICS         0x00000004
 #define PROF_AICPU_TRACE            0x00000008
 #define PROF_MODEL_EXECUTE          0x00000010
 #define PROF_RUNTIME_API            0x00000020
 #define PROF_RUNTIME_TRACE          0x00000040
 #define PROF_SCHEDULE_TIMELINE      0x00000080
 #define PROF_SCHEDULE_TRACE         0x00000100
 #define PROF_AIVECTORCORE_METRICS   0x00000200
 #define PROF_SUBTASK_TIME           0x00000400
 #define PROF_TRAINING_TRACE         0x00000800
 #define PROF_HCCL_TRACE             0x00001000
 #define PROF_TASK_TRACE             0x00001852
 // system profilinig switch
 #define PROF_CPU                    0x00010000
 #define PROF_HARDWARE_MEMORY        0x00020000
 #define PROF_IO                     0x00040000
 #define PROF_INTER_CONNECTION       0x00080000
 #define PROF_DVPP                   0x00100000
 #define PROF_SYS_AICORE_SAMPLE      0x00200000
 #define PROF_AIVECTORCORE_SAMPLE    0x00400000
 #define PROF_MODEL_LOAD             0x8000000000000000
 // DataTypeConfig MASK
 #define PROF_ACL_API_MASK                0x0001
 #define PROF_TASK_TIME_MASK              0x0002
 #define PROF_AICORE_METRICS_MASK         0x0004
 #define PROF_AICPU_TRACE_MASK            0x0008
 #define PROF_MODEL_EXECUTE_MASK          0x0010
 #define PROF_RUNTIME_API_MASK            0x0020
 #define PROF_RUNTIME_TRACE_MASK          0x0040
 #define PROF_SCHEDULE_TIMELINE_MASK      0x0080
 #define PROF_SCHEDULE_TRACE_MASK         0x0100
 #define PROF_AIVECTORCORE_METRICS_MASK   0x0200
 #define PROF_SUBTASK_TIME_MASK           0x0400
 #define PROF_TRAINING_TRACE_MASK         0x0800
 #define PROF_HCCL_TRACE_MASK             0x1000
 #define PROF_DATA_PROCESS_MASK           0x2000
 #define PROF_ACL_API_MASK                0x00000001
 #define PROF_TASK_TIME_MASK              0x00000002
 #define PROF_AICORE_METRICS_MASK         0x00000004
 #define PROF_AICPU_TRACE_MASK            0x00000008
 #define PROF_MODEL_EXECUTE_MASK          0x00000010
 #define PROF_RUNTIME_API_MASK            0x00000020
 #define PROF_RUNTIME_TRACE_MASK          0x00000040
 #define PROF_SCHEDULE_TIMELINE_MASK      0x00000080
 #define PROF_SCHEDULE_TRACE_MASK         0x00000100
 #define PROF_AIVECTORCORE_METRICS_MASK   0x00000200
 #define PROF_SUBTASK_TIME_MASK           0x00000400
 #define PROF_TRAINING_TRACE_MASK         0x00000800
 #define PROF_HCCL_TRACE_MASK             0x00001000
 // system profilinig mask
 #define PROF_CPU_MASK                    0x00010000
 #define PROF_HARDWARE_MEMORY_MASK        0x00020000
 #define PROF_IO_MASK                     0x00040000
 #define PROF_INTER_CONNECTION_MASK       0x00080000
 #define PROF_DVPP_MASK                   0x00100000
 #define PROF_SYS_AICORE_SAMPLE_MASK      0x00200000
 #define PROF_AIVECTORCORE_SAMPLE_MASK    0x00400000
 #define PROF_MODEL_LOAD_MASK             0x8000000000000000
 #include <cstdint>
 #include <string>
 /**
 * @name  ProrErrorCode
 * @brief error code enum of prof_acl_apis
 */
 enum ProfErrorCode {
    PROF_ERROR_NONE = 0,            // ok
    PROF_ERROR_PARAM_INVALID,       // param invalid, for example nullptr
    PROF_ERROR_REPEAT_INIT,         // profiling has already been inited
    PROF_ERROR_CONFIG_INVALID,      // config invalid, for example invalid json string
    PROF_ERROR_DIR_NO_ACCESS,       // dir is not accessable
    PROF_ERROR_FAILURE,             // failed to init or start profiling
    PROF_ERROR_NOT_INITED,          // profiling has not been inited
    PROF_ERROR_DEVICE_INVALID,      // device id invalid
    PROF_ERROR_UNSUPPORTED,         // unsupported data type or ai core metrics
    PROF_ERROR_REPEAT_START,        // profiilng has already been started
    PROF_ERROR_NOT_STARTED,         // profiling has not been started
 };
 /**
 * @brief transfer profiling config in acl.json to sample config
 * @param aclCfg       [IN]  profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
 * @param sampleCfg    [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
 * @return ProfErrorCode
 */
 MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);
 #ifndef OS_TYPE
 #define OS_TYPE 0
 #endif // OS_TYPE
 /**
 * @name  ProfInit
 * @brief init profiling
 * @param profInitCfg [IN] config of init profiling of json format
 * @return ProfErrorCode
 */
 MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);
 /**
 * @name  ProfAicoreMetrics
 * @brief aicore metrics enum
 */
 enum ProfAicoreMetrics {
    PROF_AICORE_ARITHMATIC_THROUGHPUT = 0,
    PROF_AICORE_PIPELINE = 1,
    PROF_AICORE_SYNCHRONIZATION = 2,
    PROF_AICORE_MEMORY = 3,
    PROF_AICORE_INTERNAL_MEMORY = 4,
    PROF_AICORE_STALL = 5,
    PROF_AICORE_EVENT = 255
 };
 #if (OS_TYPE != LINUX)
 #define MSVP_PROF_API __declspec(dllexport)
 #else
 #define MSVP_PROF_API __attribute__((visibility("default")))
 #endif
 /**
 * @name  ProfConfig
 * @brief struct of ProfStart
 */
 struct ProfConfig {
    uint32_t devNums;                     // length of device id list
    uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list
    ProfAicoreMetrics aicoreMetrics;      // aicore metric
    uint64_t dataTypeConfig;              // data type to start profiling
 };
 #include <cstdint>
 namespace Msprofiler {
 namespace Api {
 /**
 * @name  ProfStartProfiling
 * @brief start profiling
 * @param profStartCfg [IN] config to start profiling
 * @return ProfErrorCode
 * @name  ProfGetOpExecutionTime
 * @brief get op execution time of specific part of data
 * @param data  [IN] data read from pipe
 * @param len   [IN] data length
 * @param index [IN] index of part(op)
 * @return op execution time (us)
 */
 MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);
 MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
 }
 }
 /**
 * @name  ProfStopConfig
 * @brief struct of ProfStop
 */
 struct ProfStopConfig {
    uint64_t padding;
 };
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * @name  ProfStopProfiling
 * @brief stop profiling
 * @param profStopCfg [IN] config to stop profiling
 * @return ProfErrorCode
 */
 MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);
 /**
 * @name  ProfFinalize
 * @brief finalize profiling task
 * @return ProfErrorCode
 */
 MSVP_PROF_API int32_t ProfFinalize();
 MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
 /**
 * @name  ProfGetDataTypeConfig
 * @brief get dataTypeConfig started with of one device
 * @param deviceId          [IN] deviceId to get dataTypeConfig
 * @param dataTypeConfig    [OUT] result get
 * @return ProfErrorCode
 */
 MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig);
 #ifdef __cplusplus
 }
 #endif
 #endif  // MSPROF_ENGINE_PROF_ACL_API_H_
 #endif  // MSPROFILER_API_PROF_ACL_API_H_
--- a/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h
@@ -16,7 +16,16 @@
 #ifndef MSPROF_ENGINE_PROF_MGR_CORE_H_
 #define MSPROF_ENGINE_PROF_MGR_CORE_H_
 #ifndef OS_TYPE
 #define OS_TYPE 0
 #endif // OS_TYPE
 #if (OS_TYPE != LINUX)
 #define MSVP_PROF_API __declspec(dllexport)
 #else
 #define MSVP_PROF_API __attribute__((visibility("default")))
 #endif
 #include <string>
 #include <vector>
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -120,15 +120,15 @@ typedef struct tagKV {
 } KeyValue;
 typedef enum {
  APPLICATION = 0,
  SYSTEM
    APPLICATION = 0,
    SYSTEM
 } ProcessType;
 typedef struct {
  ProcessType type;
  unsigned int pid;
  unsigned int deviceId;
  char reserved[RESERVERD_LENGTH];
    ProcessType type;
    unsigned int pid;
    unsigned int deviceId;
    char reserved[RESERVERD_LENGTH];
 } LogAttr;
 /**
@@ -141,7 +141,7 @@ enum {
  IDEDD,         /**< IDE daemon device */
  IDEDH,         /**< IDE daemon host */
  HCCL,          /**< HCCL */
  FMK,           /**< Framework */
  FMK,           /**< Adapter */
  HIAIENGINE,    /**< Matrix */
  DVPP,          /**< DVPP */
  RUNTIME,       /**< Runtime */
@@ -162,11 +162,11 @@ enum {
  MDCDEFAULT, /**< MDC undefine */
  MDCSC,      /**< MDC spatial cognition */
  MDCPNC,
  MLL,
  MLL,      /**< abandon */
  DEVMM,    /**< Dlog memory managent */
  KERNEL,   /**< Kernel */
  LIBMEDIA, /**< Libmedia */
  CCECPU,   /**< ai cpu */
  CCECPU,   /**< aicpu shedule */
  ASCENDDK, /**< AscendDK */
  ROS,      /**< ROS */
  HCCP,
@@ -179,7 +179,7 @@ enum {
  TSDUMP,    /**< TSDUMP module */
  AICPU,     /**< AICPU module */
  LP,        /**< LP module */
  TDT,
  TDT,       /**< tsdaemon or aicpu shedule */
  FE,
  MD,
  MB,
@@ -381,13 +381,13 @@ DLL_EXPORT void DlogFlush(void);
 * @ingroup slog
 * @brief Internal log interface, other modules are not allowed to call this interface
 */
 void DlogErrorInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 void DlogWarnInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 void DlogInfoInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 void DlogDebugInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 void DlogEventInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 void DlogInner(int moduleId, int level, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
 void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...) __attribute__((format(printf, 5, 6)));
 void DlogErrorInner(int moduleId, const char *fmt, ...);
 void DlogWarnInner(int moduleId, const char *fmt, ...);
 void DlogInfoInner(int moduleId, const char *fmt, ...);
 void DlogDebugInner(int moduleId, const char *fmt, ...);
 void DlogEventInner(int moduleId, const char *fmt, ...);
 void DlogInner(int moduleId, int level, const char *fmt, ...);
 void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
 #ifdef __cplusplus
 #ifndef LOG_CPP
@@ -500,8 +500,8 @@ DLL_EXPORT void DlogFlushForC(void);
 * @ingroup slog
 * @brief Internal log interface, other modules are not allowed to call this interface
 */
 void DlogInnerForC(int moduleId, int level, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
 void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...) __attribute__((format(printf, 5, 6)));
 void DlogInnerForC(int moduleId, int level, const char *fmt, ...);
 void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
 #ifdef __cplusplus
 }
--- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
+++ b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
@@ -1,72 +1,75 @@
 /**
 * @file tune_api.h
 *
 * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
 * 描述：mstune调优接口头文件
 */
 /** @defgroup mstune mstune调优接口 */
 #ifndef TUNE_API_H
 #define TUNE_API_H
 #include <vector>
 #include <map>
 #include <string>
 #include "graph/graph.h"
 #include "ge/ge_api.h"
 /**
 * @ingroup mstune
 *
 * mstune status
 */
 enum MsTuneStatus {
    MSTUNE_SUCCESS,  /** tune success */
    MSTUNE_FAILED,   /** tune failed */
 };
 // Option key: for train options sets
 const std::string MSTUNE_SELF_KEY = "mstune";
 const std::string MSTUNE_GEINIT_KEY = "initialize";
 const std::string MSTUNE_GESESS_KEY = "session";
 /**
 * @ingroup mstune
 * @par 描述: 命令行调优
 *
 * @attention 无
 * @param  option [IN] 调优参数
 * @param  msg [OUT] 调优异常下返回信息
 * @retval #MSTUNE_SUCCESS 执行成功
 * @retval #MSTUNE_FAILED 执行失败
 * @par 依赖:
 * @li tune_api.cpp：该接口所属的开发包。
 * @li tune_api.h：该接口声明所在的头文件。
 * @see 无
 * @since
 */
 MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg);
 /**
 * @ingroup mstune
 * @par 描述: 梯度调优
 *
 * @attention 无
 * @param  tuningGraph [IN] 调优图
 * @param  dependGraph [IN] 调优依赖图
 * @param  session [IN] ge连接会话
 * @param  option [IN] 参数集. 包含调优参数及ge参数
 * @retval #MSTUNE_SUCCESS 执行成功
 * @retval #MSTUNE_FAILED 执行失败
 * @par 依赖:
 * @li tune_api.cpp：该接口所属的开发包。
 * @li tune_api.h：该接口声明所在的头文件。
 * @see 无
 * @since
 */
 extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
    ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);
 #endif
 /**
 * @file tune_api.h
 *
 * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.\n
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
 * 描述：aoe调优接口头文件
 */
 /** @defgroup aoe aoe调优接口 */
 #ifndef TUNE_API_H
 #define TUNE_API_H
 #include <map>
 #include <string>
 #include "ge/ge_api.h"
 #include "aoe_types.h"
 /**
 * @ingroup aoe
 * @par 描述: 命令行调优
 *
 * @attention 无
 * @param  option [IN] 调优参数
 * @param  msg [OUT] 调优异常下返回信息
 * @retval #MSTUNE_SUCCESS 执行成功
 * @retval #MSTUNE_FAILED 执行失败
 * @par 依赖:
 * @li tune_api.cpp：该接口所属的开发包。
 * @li tune_api.h：该接口声明所在的头文件。
 * @see 无
 * @since
 */
 AoeStatus AoeOfflineTuning(const std::map<std::string, std::string> &option, std::string &msg);
 /**
 * @ingroup aoe
 * @par 描述: 梯度调优
 *
 * @attention 无
 * @param  tuningGraph [IN] 调优图
 * @param  dependGraph [IN] 调优依赖图
 * @param  session [IN] ge连接会话
 * @param  option [IN] 参数集. 包含调优参数及ge参数
 * @retval #MSTUNE_SUCCESS 执行成功
 * @retval #MSTUNE_FAILED 执行失败
 * @par 依赖:
 * @li tune_api.cpp：该接口所属的开发包。
 * @li tune_api.h：该接口声明所在的头文件。
 * @see 无
 * @since
 */
 extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
    ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);
 /**
 * @ingroup aoe
 * @par 描述: 梯度调优
 *
 * @attention 无
 * @param  tuningGraph [IN] 调优图
 * @param  dependGraph [IN] 调优依赖图
 * @param  session [IN] ge连接会话
 * @param  option [IN] 参数集. 包含调优参数及ge参数
 * @retval #AOE_SUCCESS 执行成功
 * @retval #AOE_FAILED 执行失败
 * @par 依赖:
 * @li tune_api.cpp：该接口所属的开发包。
 * @li tune_api.h：该接口声明所在的头文件。
 * @see 无
 * @since
 */
 extern "C" AoeStatus AoeOnlineTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
    ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);
 #endif
--- a/third_party/prebuild/aarch64/libalog.so
+++ b/third_party/prebuild/aarch64/libalog.so
--- a/third_party/prebuild/aarch64/liberror_manager.so
+++ b/third_party/prebuild/aarch64/liberror_manager.so