diff --git a/CMakeLists.txt b/CMakeLists.txt
index 60509838..5e58eeba 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -125,7 +125,6 @@ else ()
                 message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
             endif()
         endif()
-
         set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef)
         set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/parser)
         set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..)
@@ -158,6 +157,7 @@ else ()
     elseif(ENABLE_MS_TESTCASES)
         include(cmake/external_libs/protobuf_static.cmake)
         include(cmake/external_libs/protoc.cmake)
+        include(cmake/external_libs/json.cmake)
         include(cmake/external_libs/securec.cmake)
         include(cmake/FindModule.cmake)
         include(cmake/intf_pub_linux.cmake)
@@ -175,5 +175,4 @@ else ()
     endif()
 
     add_subdirectory(ge)
-
 endif ()
diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake
index 3c1cd012..04659ebc 100755
--- a/cmake/external_libs/json.cmake
+++ b/cmake/external_libs/json.cmake
@@ -9,10 +9,6 @@ if (GE_PB_PKG)
     set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip")
     set(MD5 "0dc903888211db3a0f170304cd9f3a89")
     set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
-#elseif (ENABLE_GITEE)
-#    set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
-#    set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
-#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
 else()
     set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
     set(MD5 "0dc903888211db3a0f170304cd9f3a89")
diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt
index 3243766f..ffea784b 100644
--- a/ge/ge_runtime/CMakeLists.txt
+++ b/ge/ge_runtime/CMakeLists.txt
@@ -16,6 +16,7 @@ set(GE_SRC_LIST
     "task/label_goto_task.cc"
     "task/label_set_task.cc"
     "task/label_switch_task.cc"
+    "task/label_manager.cc"
 )
 
 add_library(ge_runtime SHARED ${GE_SRC_LIST})
diff --git a/ge/ge_runtime/task/hccl_task.cc b/ge/ge_runtime/task/hccl_task.cc
index b1c7158c..bfe0d0f3 100644
--- a/ge/ge_runtime/task/hccl_task.cc
+++ b/ge/ge_runtime/task/hccl_task.cc
@@ -53,15 +53,7 @@ HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<Hccl
   }
 }
 
-HcclTask::~HcclTask() {
-  if (workspace_mem_ != nullptr) {
-    rtError_t rt_ret = rtFree(workspace_mem_);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "rtFree workspace_mem_ failed! ret: 0x%X.", rt_ret);
-    }
-    workspace_mem_ = nullptr;
-  }
-}
+HcclTask::~HcclTask() {}
 
 bool HcclTask::Distribute() {
   // Ops kernel info store
@@ -80,11 +72,7 @@ bool HcclTask::Distribute() {
   SetSecondaryStream();
 
   if (task_info_->workspace_size() > 0) {
-    rtError_t rt_ret = rtMalloc(&workspace_mem_, task_info_->workspace_size(), RT_MEMORYINFO_HBM);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
-      return false;
-    }
+    workspace_mem_ = task_info_->workspace_addr();
   }
 
   GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl.");
diff --git a/ge/ge_runtime/task/label_goto_task.cc b/ge/ge_runtime/task/label_goto_task.cc
index 7cb6d556..a3b70971 100644
--- a/ge/ge_runtime/task/label_goto_task.cc
+++ b/ge/ge_runtime/task/label_goto_task.cc
@@ -16,33 +16,46 @@
 
 #include "ge_runtime/task/label_goto_task.h"
 #include "ge_runtime/task/task_factory.h"
-#include "framework/common/util.h"
 
 namespace ge {
 namespace model_runner {
 LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info)
-    : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), task_info_(task_info) {
+    : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info),
+      task_info_(task_info),
+      stream_(nullptr),
+      index_value_(nullptr) {
   if (task_info_ == nullptr) {
     GELOGW("task_info_ is null!");
     return;
   }
   auto stream_list = model_context.stream_list();
   auto label_list = model_context.label_list();
+  rt_model_handle_ = model_context.rt_model_handle();
   uint32_t stream_id = task_info->stream_id();
-  uint32_t label_id = task_info->label_id();
+  label_id_ = task_info->label_id();
   GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id);
-  GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id);
-  if (stream_id >= stream_list.size() || label_id >= label_list.size()) {
+  GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id_);
+  if (stream_id >= stream_list.size() || label_id_ >= label_list.size()) {
     GELOGW("Stream/Label id invalid.");
     return;
   }
   stream_ = stream_list[stream_id];
-  label_ = label_list[label_id];
+  label_manager_ = LabelManager::GetInstance();
+  if (label_manager_ == nullptr) {
+    GELOGW("Get label manager instance failed.");
+    return;
+  }
+  label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, {label_id_}, label_list);
 }
 
 LabelGotoTask::~LabelGotoTask() {
-  GE_FREE_RT_LOG(label_info_);
-  GE_FREE_RT_LOG(index_value_);
+  if (index_value_ != nullptr) {
+    rtError_t rt_ret = rtFree(index_value_);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "rtFree index_value_ failed! ret: 0x%X.", rt_ret);
+    }
+    index_value_ = nullptr;
+  }
 }
 
 bool LabelGotoTask::Distribute() {
@@ -94,21 +107,34 @@ bool LabelGotoTask::CheckParamValid() {
     return false;
   }
 
-  if (label_ == nullptr) {
-    GELOGE(PARAM_INVALID, "label is null!");
+  if (label_info_ == nullptr) {
+    GELOGE(PARAM_INVALID, "label info is null!");
     return false;
   }
 
-  if (label_info_ != nullptr) {
-    GELOGE(PARAM_INVALID, "label_info_ has dirty data.");
-    return false;
+  if (index_value_ == nullptr) {
+    rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      return false;
+    }
+
+    uint64_t index = 0;
+    rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &index, sizeof(index), RT_MEMCPY_HOST_TO_DEVICE);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+      return false;
+    }
   }
 
-  if (index_value_ != nullptr) {
-    GELOGE(PARAM_INVALID, "index_value_ has dirty data.");
+  void *label_info = label_info_->GetLabelInfo();
+  rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, 1, label_info, stream_);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
     return false;
   }
 
+  GELOGI("DistributeTask end.");
   return true;
 }
 
diff --git a/ge/ge_runtime/task/label_goto_task.h b/ge/ge_runtime/task/label_goto_task.h
index addbb700..e579c683 100644
--- a/ge/ge_runtime/task/label_goto_task.h
+++ b/ge/ge_runtime/task/label_goto_task.h
@@ -18,7 +18,11 @@
 #define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_
 
 #include <memory>
+#include <vector>
+#include <map>
+#include <mutex>
 #include "ge_runtime/task/task.h"
+#include "ge_runtime/task/label_manager.h"
 
 namespace ge {
 namespace model_runner {
@@ -31,13 +35,13 @@ class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> {
   bool Distribute() override;
 
  private:
-  bool CheckParamValid();
-
   std::shared_ptr<LabelGotoTaskInfo> task_info_;
-  void *stream_{nullptr};
-  void *label_{nullptr};
-  void *label_info_{nullptr};
-  void *index_value_{nullptr};
+  void *stream_;
+  std::shared_ptr<LabelGuard> label_info_;
+  void *index_value_;
+  uint32_t label_id_;
+  rtModel_t rt_model_handle_;
+  std::shared_ptr<LabelManager> label_manager_;
 };
 }  // namespace model_runner
 }  // namespace ge
diff --git a/ge/ge_runtime/task/label_manager.cc b/ge/ge_runtime/task/label_manager.cc
new file mode 100644
index 00000000..a2b0c3aa
--- /dev/null
+++ b/ge/ge_runtime/task/label_manager.cc
@@ -0,0 +1,119 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ge_runtime/task/label_manager.h"
+#include <algorithm>
+#include <string>
+#include "runtime/mem.h"
+#include "runtime/rt_model.h"
+#include "common/ge_inner_error_codes.h"
+#include "framework/common/debug/ge_log.h"
+
+namespace ge {
+namespace model_runner {
+std::weak_ptr<LabelManager> LabelManager::instance_;
+std::mutex LabelManager::instance_mutex_;
+
+template <class T>
+static std::string GetVectorString(const std::vector<T> &vec) {
+  std::string ret;
+  for (size_t i = 0; i < vec.size(); ++i) {
+    if (i != 0) {
+      ret.push_back(',');
+    }
+    ret += std::to_string(vec[i]);
+  }
+  return ret;
+}
+
+LabelGuard::~LabelGuard() {
+  void *label_info = GetLabelInfo();
+  if (label_info != nullptr) {
+    rtError_t rt_ret = rtFree(label_info);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "rtFree label_info failed! ret: 0x%X.", rt_ret);
+    }
+  }
+}
+
+std::shared_ptr<LabelManager> LabelManager::GetInstance() {
+  std::lock_guard<std::mutex> lock(instance_mutex_);
+  auto instance = instance_.lock();
+  if (instance != nullptr) {
+    return instance;
+  }
+
+  instance = std::make_shared<LabelManager>();
+  instance_ = instance;
+  return instance;
+}
+
+std::shared_ptr<LabelGuard> LabelManager::GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids,
+                                                       const std::vector<void *> &all_label) {
+  std::lock_guard<std::mutex> lock(model_info_mapping_mutex_);
+  rtError_t rt_ret;
+  auto model_iter = model_info_mapping_.find(model);
+  if (model_iter == model_info_mapping_.end()) {
+    model_info_mapping_.emplace(model, std::map<std::string, std::weak_ptr<LabelGuard>>());
+    model_iter = model_info_mapping_.find(model);
+  }
+
+  std::string label_id_str = GetVectorString(label_ids);
+  auto &label_map = model_iter->second;
+  auto label_iter = label_map.find(label_id_str);
+  if (label_iter != label_map.end()) {
+    auto label_guard = label_iter->second.lock();
+    if (label_guard != nullptr) {
+      GELOGI("model %p find same label id %s.", model, label_id_str.c_str());
+      return label_guard;
+    }
+  }
+
+  GELOGI("Alloc label id %s for model %p.", label_id_str.c_str(), model);
+  void *label_info;
+  std::vector<void *> label_list;
+  bool status = true;
+  std::transform(label_ids.begin(), label_ids.end(), std::back_inserter(label_list),
+                 [&all_label, &status](uint32_t idx) -> void * {
+                   if (idx >= all_label.size()) {
+                     GELOGE(PARAM_INVALID, "Invalid label id %u, all label list size %zu.", idx, all_label.size());
+                     status = false;
+                     return nullptr;
+                   }
+                   return all_label[idx];
+                 });
+  if (!status) {
+    GELOGE(PARAM_INVALID, "Get label info failed.");
+    return nullptr;
+  }
+  uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size();
+  rt_ret = rtMalloc(&label_info, label_info_size, RT_MEMORY_HBM);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    return nullptr;
+  }
+
+  rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info, label_info_size);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+    return nullptr;
+  }
+
+  auto label_guard = std::make_shared<LabelGuard>(label_info);
+  label_map.emplace(label_id_str, label_guard);
+  return label_guard;
+}
+}  // namespace model_runner
+}  // namespace ge
diff --git a/ge/ge_runtime/task/label_manager.h b/ge/ge_runtime/task/label_manager.h
new file mode 100644
index 00000000..f2c42c29
--- /dev/null
+++ b/ge/ge_runtime/task/label_manager.h
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_
+#define GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_
+
+#include <vector>
+#include <memory>
+#include <mutex>
+#include <map>
+#include <runtime/base.h>
+
+namespace ge {
+namespace model_runner {
+class LabelGuard {
+ public:
+  explicit LabelGuard(void *label_info) : label_info_(reinterpret_cast<uintptr_t>(label_info)) {}
+  ~LabelGuard();
+  void *GetLabelInfo() { return reinterpret_cast<void *>(label_info_); }
+
+ private:
+  uintptr_t label_info_;
+};
+
+class LabelManager {
+ public:
+  static std::shared_ptr<LabelManager> GetInstance();
+  std::shared_ptr<LabelGuard> GetLabelInfo(rtModel_t model, const std::vector<uint32_t> &label_ids,
+                                           const std::vector<void *> &all_label);
+
+ private:
+  std::mutex model_info_mapping_mutex_;
+  std::map<rtModel_t, std::map<std::string, std::weak_ptr<LabelGuard>>> model_info_mapping_;
+
+  static std::weak_ptr<LabelManager> instance_;
+  static std::mutex instance_mutex_;
+};
+
+
+}  // namespace model_runner
+}  // namespace ge
+#endif  // GE_GE_RUNTIME_TASK_LABEL_MANAGER_H_
\ No newline at end of file
diff --git a/ge/ge_runtime/task/label_switch_task.cc b/ge/ge_runtime/task/label_switch_task.cc
index 8c795da9..cde278d9 100644
--- a/ge/ge_runtime/task/label_switch_task.cc
+++ b/ge/ge_runtime/task/label_switch_task.cc
@@ -24,14 +24,14 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context,
     : TaskRepeater<LabelSwitchTaskInfo>(model_context, task_info),
       task_info_(task_info),
       stream_(nullptr),
-      all_label_resource_(),
       label_info_(nullptr) {
   if (task_info_ == nullptr) {
     GELOGW("task_info_ is null!");
     return;
   }
 
-  all_label_resource_ = model_context.label_list();
+  rt_model_handle_ = model_context.rt_model_handle();
+  auto all_label_resource = model_context.label_list();
   auto stream_list = model_context.stream_list();
   uint32_t stream_id = task_info->stream_id();
   GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id);
@@ -40,18 +40,16 @@ LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context,
     return;
   }
   stream_ = stream_list[stream_id];
-}
-
-LabelSwitchTask::~LabelSwitchTask() {
-  if (label_info_ != nullptr) {
-    rtError_t rt_ret = rtFree(label_info_);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret);
-    }
-    label_info_ = nullptr;
+  label_manager_ = LabelManager::GetInstance();
+  if (label_manager_ == nullptr) {
+    GELOGW("Get label manager instance failed.");
+    return;
   }
+  label_info_ = label_manager_->GetLabelInfo(rt_model_handle_, task_info_->label_list(), all_label_resource);
 }
 
+LabelSwitchTask::~LabelSwitchTask() {}
+
 bool LabelSwitchTask::Distribute() {
   GELOGI("LabelSwitchTask Distribute start.");
   if (!CheckParamValid()) {
@@ -117,8 +115,8 @@ bool LabelSwitchTask::CheckParamValid() {
     return false;
   }
 
-  if (label_info_ != nullptr) {
-    GELOGE(PARAM_INVALID, "label_info_ has dirty data.");
+  if (label_info_ == nullptr) {
+    GELOGE(PARAM_INVALID, "CopyLabelList failed, label info is null.");
     return false;
   }
 
@@ -126,6 +124,5 @@ bool LabelSwitchTask::CheckParamValid() {
 }
 
 REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo);
-
 }  // namespace model_runner
 }  // namespace ge
diff --git a/ge/ge_runtime/task/label_switch_task.h b/ge/ge_runtime/task/label_switch_task.h
index 463faa31..cfa6877c 100644
--- a/ge/ge_runtime/task/label_switch_task.h
+++ b/ge/ge_runtime/task/label_switch_task.h
@@ -19,6 +19,7 @@
 
 #include <memory>
 #include "ge_runtime/task/task.h"
+#include "ge_runtime/task/label_manager.h"
 
 namespace ge {
 namespace model_runner {
@@ -35,8 +36,9 @@ class LabelSwitchTask : public TaskRepeater<LabelSwitchTaskInfo> {
 
   std::shared_ptr<LabelSwitchTaskInfo> task_info_;
   void *stream_;
-  std::vector<void *> all_label_resource_;
-  void *label_info_;
+  rtModel_t rt_model_handle_;
+  std::shared_ptr<LabelGuard> label_info_;
+  std::shared_ptr<LabelManager> label_manager_;
 };
 }  // namespace model_runner
 }  // namespace ge
diff --git a/inc/external/acl/acl.h b/inc/external/acl/acl.h
new file mode 100644
index 00000000..8d261201
--- /dev/null
+++ b/inc/external/acl/acl.h
@@ -0,0 +1,82 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_H_
+#define INC_EXTERNAL_ACL_ACL_H_
+
+#include "acl_rt.h"
+#include "acl_op.h"
+#include "acl_mdl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Current version is 1.0.0
+#define ACL_MAJOR_VERSION 1
+#define ACL_MINOR_VERSION 0
+#define ACL_PATCH_VERSION 0
+
+/**
+ * @ingroup AscendCL
+ * @brief acl initialize
+ *
+ * @par Restriction
+ * The aclInit interface can be called only once in a process
+ * @param configPath [IN]    the config path,it can be NULL
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath);
+
+/**
+ * @ingroup AscendCL
+ * @brief acl finalize
+ *
+ * @par Restriction
+ * Need to call aclFinalize before the process exits.
+ * After calling aclFinalize,the services cannot continue to be used normally.
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclFinalize();
+
+/**
+ * @ingroup AscendCL
+ * @brief query ACL interface version
+ *
+ * @param majorVersion[OUT] ACL interface major version
+ * @param minorVersion[OUT] ACL interface minor version
+ * @param patchVersion[OUT] ACL interface patch version
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *minorVersion, int32_t *patchVersion);
+
+/**
+ * @ingroup AscendCL
+ * @brief get recent error message
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY const char *aclGetRecentErrMsg();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_H_
diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h
new file mode 100644
index 00000000..64d4bd81
--- /dev/null
+++ b/inc/external/acl/acl_base.h
@@ -0,0 +1,638 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_BASE_H_
+#define INC_EXTERNAL_ACL_ACL_BASE_H_
+
+#include <stdint.h>
+#include <stddef.h>
+#include "error_codes/rt_error_codes.h"
+#include "error_codes/ge_error_codes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define ACL_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define ACL_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define ACL_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define ACL_FUNC_VISIBILITY
+#endif
+#endif
+
+#ifdef __GNUC__
+#define ACL_DEPRECATED __attribute__((deprecated))
+#define ACL_DEPRECATED_MESSAGE(message) __attribute__((deprecated(message)))
+#elif defined(_MSC_VER)
+#define ACL_DEPRECATED __declspec(deprecated)
+#define ACL_DEPRECATED_MESSAGE(message) __declspec(deprecated(message))
+#else
+#define ACL_DEPRECATED
+#define ACL_DEPRECATED_MESSAGE(message)
+#endif
+
+typedef void *aclrtStream;
+typedef void *aclrtEvent;
+typedef void *aclrtContext;
+typedef int aclError;
+typedef uint16_t aclFloat16;
+typedef struct aclDataBuffer aclDataBuffer;
+typedef struct aclTensorDesc aclTensorDesc;
+
+static const int ACL_ERROR_NONE = 0;
+static const int ACL_SUCCESS = 0;
+
+static const int ACL_ERROR_INVALID_PARAM = 100000;
+static const int ACL_ERROR_UNINITIALIZE = 100001;
+static const int ACL_ERROR_REPEAT_INITIALIZE = 100002;
+static const int ACL_ERROR_INVALID_FILE = 100003;
+static const int ACL_ERROR_WRITE_FILE = 100004;
+static const int ACL_ERROR_INVALID_FILE_SIZE = 100005;
+static const int ACL_ERROR_PARSE_FILE = 100006;
+static const int ACL_ERROR_FILE_MISSING_ATTR = 100007;
+static const int ACL_ERROR_FILE_ATTR_INVALID = 100008;
+static const int ACL_ERROR_INVALID_DUMP_CONFIG = 100009;
+static const int ACL_ERROR_INVALID_PROFILING_CONFIG = 100010;
+static const int ACL_ERROR_INVALID_MODEL_ID = 100011;
+static const int ACL_ERROR_DESERIALIZE_MODEL = 100012;
+static const int ACL_ERROR_PARSE_MODEL = 100013;
+static const int ACL_ERROR_READ_MODEL_FAILURE = 100014;
+static const int ACL_ERROR_MODEL_SIZE_INVALID = 100015;
+static const int ACL_ERROR_MODEL_MISSING_ATTR = 100016;
+static const int ACL_ERROR_MODEL_INPUT_NOT_MATCH = 100017;
+static const int ACL_ERROR_MODEL_OUTPUT_NOT_MATCH = 100018;
+static const int ACL_ERROR_MODEL_NOT_DYNAMIC = 100019;
+static const int ACL_ERROR_OP_TYPE_NOT_MATCH = 100020;
+static const int ACL_ERROR_OP_INPUT_NOT_MATCH = 100021;
+static const int ACL_ERROR_OP_OUTPUT_NOT_MATCH = 100022;
+static const int ACL_ERROR_OP_ATTR_NOT_MATCH = 100023;
+static const int ACL_ERROR_OP_NOT_FOUND = 100024;
+static const int ACL_ERROR_OP_LOAD_FAILED = 100025;
+static const int ACL_ERROR_UNSUPPORTED_DATA_TYPE = 100026;
+static const int ACL_ERROR_FORMAT_NOT_MATCH = 100027;
+static const int ACL_ERROR_BIN_SELECTOR_NOT_REGISTERED = 100028;
+static const int ACL_ERROR_KERNEL_NOT_FOUND = 100029;
+static const int ACL_ERROR_BIN_SELECTOR_ALREADY_REGISTERED = 100030;
+static const int ACL_ERROR_KERNEL_ALREADY_REGISTERED = 100031;
+static const int ACL_ERROR_INVALID_QUEUE_ID = 100032;
+static const int ACL_ERROR_REPEAT_SUBSCRIBE = 100033;
+static const int ACL_ERROR_STREAM_NOT_SUBSCRIBE = 100034;
+static const int ACL_ERROR_THREAD_NOT_SUBSCRIBE = 100035;
+static const int ACL_ERROR_WAIT_CALLBACK_TIMEOUT = 100036;
+static const int ACL_ERROR_REPEAT_FINALIZE = 100037;
+static const int ACL_ERROR_NOT_STATIC_AIPP = 100038;
+static const int ACL_ERROR_COMPILING_STUB_MODE = 100039;
+static const int ACL_ERROR_GROUP_NOT_SET = 100040;
+static const int ACL_ERROR_GROUP_NOT_CREATE = 100041;
+static const int ACL_ERROR_PROF_ALREADY_RUN = 100042;
+static const int ACL_ERROR_PROF_NOT_RUN = 100043;
+static const int ACL_ERROR_DUMP_ALREADY_RUN = 100044;
+static const int ACL_ERROR_DUMP_NOT_RUN = 100045;
+static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046;
+static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
+static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
+static const int ACL_ERROR_INVALID_OPP_PATH = 148049;
+static const int ACL_ERROR_OP_UNSUPPORTED_DYNAMIC = 148050;
+
+static const int ACL_ERROR_BAD_ALLOC = 200000;
+static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
+static const int ACL_ERROR_INVALID_DEVICE = 200002;
+static const int ACL_ERROR_MEMORY_ADDRESS_UNALIGNED = 200003;
+static const int ACL_ERROR_RESOURCE_NOT_MATCH = 200004;
+static const int ACL_ERROR_INVALID_RESOURCE_HANDLE = 200005;
+static const int ACL_ERROR_FEATURE_UNSUPPORTED = 200006;
+static const int ACL_ERROR_PROF_MODULES_UNSUPPORTED = 200007;
+
+static const int ACL_ERROR_STORAGE_OVER_LIMIT = 300000;
+
+static const int ACL_ERROR_INTERNAL_ERROR = 500000;
+static const int ACL_ERROR_FAILURE = 500001;
+static const int ACL_ERROR_GE_FAILURE = 500002;
+static const int ACL_ERROR_RT_FAILURE = 500003;
+static const int ACL_ERROR_DRV_FAILURE = 500004;
+static const int ACL_ERROR_PROFILING_FAILURE = 500005;
+
+#define ACL_TENSOR_SHAPE_RANGE_NUM 2
+#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE
+
+typedef enum {
+  ACL_DT_UNDEFINED = -1,
+  ACL_FLOAT = 0,
+  ACL_FLOAT16 = 1,
+  ACL_INT8 = 2,
+  ACL_INT32 = 3,
+  ACL_UINT8 = 4,
+  ACL_INT16 = 6,
+  ACL_UINT16 = 7,
+  ACL_UINT32 = 8,
+  ACL_INT64 = 9,
+  ACL_UINT64 = 10,
+  ACL_DOUBLE = 11,
+  ACL_BOOL = 12,
+  ACL_STRING = 13,
+} aclDataType;
+
+typedef enum {
+  ACL_FORMAT_UNDEFINED = -1,
+  ACL_FORMAT_NCHW = 0,
+  ACL_FORMAT_NHWC = 1,
+  ACL_FORMAT_ND = 2,
+  ACL_FORMAT_NC1HWC0 = 3,
+  ACL_FORMAT_FRACTAL_Z = 4,
+  ACL_FORMAT_NC1HWC0_C04 = 12,
+  ACL_FORMAT_NDHWC = 27,
+  ACL_FORMAT_FRACTAL_NZ = 29,
+  ACL_FORMAT_NCDHW = 30,
+  ACL_FORMAT_NDC1HWC0 = 32,
+  ACL_FRACTAL_Z_3D = 33
+} aclFormat;
+
+typedef enum {
+  ACL_DEBUG = 0,
+  ACL_INFO = 1,
+  ACL_WARNING = 2,
+  ACL_ERROR = 3,
+} aclLogLevel;
+
+typedef enum {
+  ACL_MEMTYPE_DEVICE = 0,
+  ACL_MEMTYPE_HOST = 1,
+} aclMemType;
+
+/**
+ * @ingroup AscendCL
+ * @brief Converts data of type aclFloat16 to data of type float
+ *
+ * @param value [IN]   Data to be converted
+ *
+ * @retval Transformed data
+ */
+ACL_FUNC_VISIBILITY float aclFloat16ToFloat(aclFloat16 value);
+
+/**
+ * @ingroup AscendCL
+ * @brief Converts data of type float to data of type aclFloat16
+ *
+ * @param value [IN]   Data to be converted
+ *
+ * @retval Transformed data
+ */
+ACL_FUNC_VISIBILITY aclFloat16 aclFloatToFloat16(float value);
+
+/**
+ * @ingroup AscendCL
+ * @brief create data of aclDataBuffer
+ *
+ * @param data [IN]    pointer to data
+ * @li Need to be managed by the user,
+ *  call aclrtMalloc interface to apply for memory,
+ *  call aclrtFree interface to release memory
+ *
+ * @param size [IN]    size of data in bytes
+ *
+ * @retval pointer to created instance. nullptr if run out of memory
+ *
+ * @see aclrtMalloc | aclrtFree
+ */
+ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of aclDataBuffer
+ *
+ * @par Function
+ *  Only the aclDataBuffer type data is destroyed here.
+ *  The memory of the data passed in when the aclDataDataBuffer interface
+ *  is called to create aclDataBuffer type data must be released by the user
+ *
+ * @param  dataBuffer [IN]   pointer to the aclDataBuffer
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclCreateDataBuffer
+ */
+ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief update new data of aclDataBuffer
+ *
+ * @param dataBuffer [OUT]    pointer to aclDataBuffer
+ * @li The old data need to be released by the user, otherwise it may occur memory leak leakage
+ *  call aclGetDataBufferAddr interface to get old data address
+ *  call aclrtFree interface to release memory
+ *
+ * @param data [IN]    pointer to new data
+ * @li Need to be managed by the user,
+ *  call aclrtMalloc interface to apply for memory,
+ *  call aclrtFree interface to release memory
+ *
+ * @param size [IN]    size of data in bytes
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtMalloc | aclrtFree | aclGetDataBufferAddr
+ */
+ACL_FUNC_VISIBILITY aclError aclUpdateDataBuffer(aclDataBuffer *dataBuffer, void *data, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data address from aclDataBuffer
+ *
+ * @param dataBuffer [IN]    pointer to the data of aclDataBuffer
+ *
+ * @retval data address
+ */
+ACL_FUNC_VISIBILITY void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data size of aclDataBuffer
+ *
+ * @param  dataBuffer [IN]    pointer to the data of aclDataBuffer
+ *
+ * @retval data size
+ */
+ACL_DEPRECATED_MESSAGE("aclGetDataBufferSize is deprecated, use aclGetDataBufferSizeV2 instead")
+ACL_FUNC_VISIBILITY uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data size of aclDataBuffer to replace aclGetDataBufferSize
+ *
+ * @param  dataBuffer [IN]    pointer to the data of aclDataBuffer
+ *
+ * @retval data size
+ */
+ACL_FUNC_VISIBILITY size_t aclGetDataBufferSizeV2(const aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief get size of aclDataType
+ *
+ * @param  dataType [IN]    aclDataType data the size to get
+ *
+ * @retval size of the aclDataType
+ */
+ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);
+
+// interfaces of tensor desc
+/**
+ * @ingroup AscendCL
+ * @brief create data aclTensorDesc
+ *
+ * @param  dataType [IN]    Data types described by tensor
+ * @param  numDims [IN]     the number of dimensions of the shape
+ * @param  dims [IN]        the size of the specified dimension
+ * @param  format [IN]      tensor format
+ *
+ * @retval aclTensorDesc pointer.
+ * @retval nullptr if param is invalid or run out of memory
+ */
+ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
+                                                       aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data aclTensorDesc
+ *
+ * @param desc [IN]     pointer to the data of aclTensorDesc to destroy
+ */
+ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief set tensor shape range for aclTensorDesc
+ *
+ * @param  desc [OUT]     pointer to the data of aclTensorDesc
+ * @param  dimsCount [IN]     the number of dimensions of the shape
+ * @param  dimsRange [IN]     the range of dimensions of the shape
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount,
+                                                    int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data type specified by the tensor description
+ *
+ * @param desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval data type specified by the tensor description.
+ * @retval ACL_DT_UNDEFINED if description is null
+ */
+ACL_FUNC_VISIBILITY aclDataType aclGetTensorDescType(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get data format specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval data format specified by the tensor description.
+ * @retval ACL_FORMAT_UNDEFINED if description is null
+ */
+ACL_FUNC_VISIBILITY aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get tensor size specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval data size specified by the tensor description.
+ * @retval 0 if description is null
+ */
+ACL_FUNC_VISIBILITY size_t aclGetTensorDescSize(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get element count specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval element count specified by the tensor description.
+ * @retval 0 if description is null
+ */
+ACL_FUNC_VISIBILITY size_t aclGetTensorDescElementCount(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief get number of dims specified by the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval number of dims specified by the tensor description.
+ * @retval 0 if description is null
+ * @retval ACL_UNKNOWN_RANK if the tensor dim is -2
+ */
+ACL_FUNC_VISIBILITY size_t aclGetTensorDescNumDims(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified dim in the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ * @param  index [IN]       index of dims, start from 0.
+ *
+ * @retval dim specified by the tensor description and index.
+ * @retval -1 if description or index is invalid
+ */
+ACL_DEPRECATED_MESSAGE("aclGetTensorDescDim is deprecated, use aclGetTensorDescDimV2 instead")
+ACL_FUNC_VISIBILITY int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified dim in the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ * @param  index [IN]       index of dims, start from 0.
+ * @param  dimSize [OUT]    size of the specified dim.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, size_t index, int64_t *dimSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the range of the specified dim in the tensor description
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ * @param  index [IN]       index of dims, start from 0.
+ * @param  dimRangeNum [IN]     number of dimRange.
+ * @param  dimRange [OUT]       range of the specified dim.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum,
+                                                      int64_t *dimRange);
+
+/**
+ * @ingroup AscendCL
+ * @brief set tensor description name
+ *
+ * @param desc [OUT]       pointer to the instance of aclTensorDesc
+ * @param name [IN]        tensor description name
+ */
+ACL_FUNC_VISIBILITY void aclSetTensorDescName(aclTensorDesc *desc, const char *name);
+
+/**
+ * @ingroup AscendCL
+ * @brief get tensor description name
+ *
+ * @param  desc [IN]        pointer to the instance of aclTensorDesc
+ *
+ * @retval tensor description name.
+ * @retval empty string if description is null
+ */
+ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Convert the format in the source aclTensorDesc according to
+ * the specified dstFormat to generate a new target aclTensorDesc.
+ * The format in the source aclTensorDesc remains unchanged.
+ *
+ * @param  srcDesc [IN]     pointer to the source tensor desc
+ * @param  dstFormat [IN]   destination format
+ * @param  dstDesc [OUT]    pointer to the pointer to the destination tensor desc
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
+                                                      aclTensorDesc **dstDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the storage format specified by the tensor description
+ *
+ * @param  desc [OUT]     pointer to the instance of aclTensorDesc
+ * @param  format [IN]    the storage format
+ *
+ * @retval ACL_SUCCESS    The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_DEPRECATED_MESSAGE("aclSetTensorStorageFormat is deprecated, use aclSetTensorFormat instead")
+ACL_FUNC_VISIBILITY aclError aclSetTensorStorageFormat(aclTensorDesc *desc, aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the storage shape specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  numDims [IN]    the number of dimensions of the shape
+ * @param  dims [IN]       the size of the specified dimension
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_DEPRECATED_MESSAGE("aclSetTensorStorageShape is deprecated, use aclSetTensorShape instead")
+ACL_FUNC_VISIBILITY aclError aclSetTensorStorageShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the format specified by the tensor description
+ *
+ * @param  desc [OUT]     pointer to the instance of aclTensorDesc
+ * @param  format [IN]    the storage format
+ *
+ * @retval ACL_SUCCESS    The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorFormat(aclTensorDesc *desc, aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the shape specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  numDims [IN]    the number of dimensions of the shape
+ * @param  dims [IN]       the size of the specified dimension
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the original format specified by the tensor description
+ *
+ * @param  desc [OUT]     pointer to the instance of aclTensorDesc
+ * @param  format [IN]    the storage format
+ *
+ * @retval ACL_SUCCESS    The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorOriginFormat(aclTensorDesc *desc, aclFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the original shape specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  numDims [IN]    the number of dimensions of the shape
+ * @param  dims [IN]       the size of the specified dimension
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int numDims, const int64_t *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op description info
+ *
+ * @param desc [IN]     pointer to tensor description
+ * @param index [IN]    index of tensor
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get address of tensor
+ *
+ * @param desc [IN]    pointer to tensor description
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the dynamic input name specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  dynamicInputName [IN]       pointer to the dynamic input name
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const char *dynamicInputName);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set const data specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  dataBuffer [IN]       pointer to the const databuffer
+ * @param  length [IN]       the length of const databuffer
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set tensor memory type specified by the tensor description
+ *
+ * @param  desc [OUT]      pointer to the instance of aclTensorDesc
+ * @param  memType [IN]       ACL_MEMTYPE_DEVICE means device, ACL_MEMTYPE_HOST means host
+ *
+ * @retval ACL_SUCCESS     The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemType memType);
+
+/**
+ * @ingroup AscendCL
+ * @brief an interface for users to output  APP logs
+ *
+ * @param logLevel [IN]    the level of current log
+ * @param func [IN]        the function where the log is located
+ * @param file [IN]        the file where the log is located
+ * @param line [IN]        Number of source lines where the log is located
+ * @param fmt [IN]         the format of current log
+ * @param ... [IN]         the value of current log
+ */
+ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
+                                   const char *fmt, ...);
+
+/**
+ * @ingroup AscendCL
+ * @brief get soc name
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY const char *aclrtGetSocName();
+
+#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_BASE_H_
diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h
new file mode 100644
index 00000000..2bf85e29
--- /dev/null
+++ b/inc/external/acl/acl_mdl.h
@@ -0,0 +1,1225 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_MODEL_H_
+#define INC_EXTERNAL_ACL_ACL_MODEL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "acl_base.h"
+#include "acl_rt.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACL_MAX_DIM_CNT 128
+#define ACL_MAX_TENSOR_NAME_LEN 128
+#define ACL_MAX_BATCH_NUM 128
+#define ACL_MAX_HW_NUM 128
+#define ACL_MAX_SHAPE_COUNT 128
+#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF
+
+#define ACL_MDL_LOAD_FROM_FILE 1
+#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2
+#define ACL_MDL_LOAD_FROM_MEM 3
+#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4
+#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5
+#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6
+
+#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
+#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"
+#define ACL_ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES "_datadump_original_op_names"
+
+typedef struct aclmdlDataset aclmdlDataset;
+typedef struct aclmdlDesc aclmdlDesc;
+typedef struct aclmdlAIPP aclmdlAIPP;
+typedef struct aclAippExtendInfo aclAippExtendInfo;
+typedef struct aclmdlConfigHandle aclmdlConfigHandle;
+
+typedef enum {
+  ACL_YUV420SP_U8 = 1,
+  ACL_XRGB8888_U8,
+  ACL_RGB888_U8,
+  ACL_YUV400_U8,
+  ACL_NC1HWC0DI_FP16,
+  ACL_NC1HWC0DI_S8,
+  ACL_ARGB8888_U8,
+  ACL_YUYV_U8,
+  ACL_YUV422SP_U8,
+  ACL_AYUV444_U8,
+  ACL_RAW10,
+  ACL_RAW12,
+  ACL_RAW16,
+  ACL_RAW24,
+  ACL_AIPP_RESERVED = 0xffff,
+} aclAippInputFormat;
+
+typedef enum {
+  ACL_MDL_PRIORITY_INT32 = 0,
+  ACL_MDL_LOAD_TYPE_SIZET,
+  ACL_MDL_PATH_PTR,     /**< pointer to model load path with deep copy */
+  ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */
+  ACL_MDL_MEM_SIZET,
+  ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */
+  ACL_MDL_WEIGHT_SIZET,
+  ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */
+  ACL_MDL_WORKSPACE_SIZET,
+  ACL_MDL_INPUTQ_NUM_SIZET,
+  ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */
+  ACL_MDL_OUTPUTQ_NUM_SIZET,
+  ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */
+} aclmdlConfigAttr;
+
+typedef enum {
+  ACL_DATA_WITHOUT_AIPP = 0,
+  ACL_DATA_WITH_STATIC_AIPP,
+  ACL_DATA_WITH_DYNAMIC_AIPP,
+  ACL_DYNAMIC_AIPP_NODE
+} aclmdlInputAippType;
+
+typedef struct aclmdlIODims {
+  char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
+  size_t dimCount;                    /**< dim array count */
+  int64_t dims[ACL_MAX_DIM_CNT];      /**< dim data array */
+} aclmdlIODims;
+
+typedef struct aclAippDims {
+  aclmdlIODims srcDims;     /**< input dims before model transform */
+  size_t srcSize;           /**< input size before model transform */
+  aclmdlIODims aippOutdims; /**< aipp output dims */
+  size_t aippOutSize;       /**< aipp output size */
+} aclAippDims;
+
+typedef struct aclmdlBatch {
+  size_t batchCount;                 /**< batch array count */
+  uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
+} aclmdlBatch;
+
+typedef struct aclmdlHW {
+  size_t hwCount;                 /**< height&width array count */
+  uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
+} aclmdlHW;
+
+typedef struct aclAippInfo {
+  aclAippInputFormat inputFormat;
+  int32_t srcImageSizeW;
+  int32_t srcImageSizeH;
+  int8_t cropSwitch;
+  int32_t loadStartPosW;
+  int32_t loadStartPosH;
+  int32_t cropSizeW;
+  int32_t cropSizeH;
+  int8_t resizeSwitch;
+  int32_t resizeOutputW;
+  int32_t resizeOutputH;
+  int8_t paddingSwitch;
+  int32_t leftPaddingSize;
+  int32_t rightPaddingSize;
+  int32_t topPaddingSize;
+  int32_t bottomPaddingSize;
+  int8_t cscSwitch;
+  int8_t rbuvSwapSwitch;
+  int8_t axSwapSwitch;
+  int8_t singleLineMode;
+  int32_t matrixR0C0;
+  int32_t matrixR0C1;
+  int32_t matrixR0C2;
+  int32_t matrixR1C0;
+  int32_t matrixR1C1;
+  int32_t matrixR1C2;
+  int32_t matrixR2C0;
+  int32_t matrixR2C1;
+  int32_t matrixR2C2;
+  int32_t outputBias0;
+  int32_t outputBias1;
+  int32_t outputBias2;
+  int32_t inputBias0;
+  int32_t inputBias1;
+  int32_t inputBias2;
+  int32_t meanChn0;
+  int32_t meanChn1;
+  int32_t meanChn2;
+  int32_t meanChn3;
+  float minChn0;
+  float minChn1;
+  float minChn2;
+  float minChn3;
+  float varReciChn0;
+  float varReciChn1;
+  float varReciChn2;
+  float varReciChn3;
+  aclFormat srcFormat;
+  aclDataType srcDatatype;
+  size_t srcDimNum;
+  size_t shapeCount;
+  aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
+  aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
+} aclAippInfo;
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclmdlDesc
+ *
+ * @retval the aclmdlDesc pointer
+ */
+ACL_FUNC_VISIBILITY aclmdlDesc *aclmdlCreateDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlDesc
+ *
+ * @param modelDesc [IN]   Pointer to almdldlDesc to be destroyed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyDesc(aclmdlDesc *modelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get aclmdlDesc data of the model according to the model ID
+ *
+ * @param  modelDesc [OUT]   aclmdlDesc pointer
+ * @param  modelId [IN]      model id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetDesc(aclmdlDesc *modelDesc, uint32_t modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the number of the inputs of
+ *        the model according to data of aclmdlDesc
+ *
+ * @param  modelDesc [IN]   aclmdlDesc pointer
+ *
+ * @retval input size with aclmdlDesc
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetNumInputs(aclmdlDesc *modelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the number of the output of
+ *        the model according to data of aclmdlDesc
+ *
+ * @param  modelDesc [IN]   aclmdlDesc pointer
+ *
+ * @retval output size with aclmdlDesc
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetNumOutputs(aclmdlDesc *modelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified input according to
+ *        the data of type aclmdlDesc
+ *
+ * @param  modelDesc [IN]  aclmdlDesc pointer
+ * @param  index [IN] the size of the number of inputs to be obtained,
+ *         the index value starts from 0
+ *
+ * @retval Specify the size of the input
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetInputSizeByIndex(aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of the specified output according to
+ *        the data of type aclmdlDesc
+ *
+ * @param modelDesc [IN]   aclmdlDesc pointer
+ * @param index [IN]  the size of the number of outputs to be obtained,
+ *        the index value starts from 0
+ *
+ * @retval Specify the size of the output
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetOutputSizeByIndex(aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclmdlDataset
+ *
+ * @retval the aclmdlDataset pointer
+ */
+ACL_FUNC_VISIBILITY aclmdlDataset *aclmdlCreateDataset();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlDataset
+ *
+ * @param  dataset [IN]  Pointer to aclmdlDataset to be destroyed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyDataset(const aclmdlDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Add aclDataBuffer to aclmdlDataset
+ *
+ * @param dataset [OUT]    aclmdlDataset address of aclDataBuffer to be added
+ * @param dataBuffer [IN]  aclDataBuffer address to be added
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclDataBuffer *dataBuffer);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set aclTensorDesc to aclmdlDataset
+ *
+ * @param dataset [OUT]    aclmdlDataset address of aclDataBuffer to be added
+ * @param tensorDesc [IN]  aclTensorDesc address to be added
+ * @param index [IN]       index of tensorDesc which to be added
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, aclTensorDesc *tensorDesc,
+                                                        size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the number of aclDataBuffer in aclmdlDataset
+ *
+ * @param dataset [IN]   aclmdlDataset poiter
+ *
+ * @retval the number of aclDataBuffer
+ */
+ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the aclDataBuffer in aclmdlDataset by index
+ *
+ * @param dataset [IN]   aclmdlDataset poiter
+ * @param index [IN]     the index of aclDataBuffer
+ *
+ * @retval Get successfully, return the address of aclDataBuffer
+ * @retval Failure return NULL
+ */
+ACL_FUNC_VISIBILITY aclDataBuffer *aclmdlGetDatasetBuffer(const aclmdlDataset *dataset, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from files
+ * and manage memory internally by the system
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations
+ *
+ * @param modelPath [IN]   Storage path for offline model files
+ * @param modelId [OUT]    Model ID generated after
+ *        the system finishes loading the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t *modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from memory and manage the memory of
+ * model running internally by the system
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations
+ *
+ * @param model [IN]      Model data stored in memory
+ * @param modelSize [IN]  model data size
+ * @param modelId [OUT]   Model ID generated after
+ *        the system finishes loading the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from a file,
+ * and the user manages the memory of the model run by itself
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations.
+ * @param modelPath [IN]   Storage path for offline model files
+ * @param modelId [OUT]    Model ID generated after finishes loading the model
+ * @param workPtr [IN]     A pointer to the working memory
+ *                         required by the model on the Device,can be null
+ * @param workSize [IN]    The amount of working memory required by the model
+ * @param weightPtr [IN]   Pointer to model weight memory on Device
+ * @param weightSize [IN]  The amount of weight memory required by the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr,
+                                                       size_t workSize, void *weightPtr, size_t weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load offline model data from memory,
+ * and the user can manage the memory of model running
+ *
+ * @par Function
+ * After the system finishes loading the model,
+ * the model ID returned is used as a mark to identify the model
+ * during subsequent operations
+ * @param model [IN]      Model data stored in memory
+ * @param modelSize [IN]  model data size
+ * @param modelId [OUT]   Model ID generated after finishes loading the model
+ * @param workPtr [IN]    A pointer to the working memory
+ *                        required by the model on the Device,can be null
+ * @param workSize [IN]   work memory size
+ * @param weightPtr [IN]  Pointer to model weight memory on Device,can be null
+ * @param weightSize [IN] The amount of weight memory required by the model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId,
+                                                      void *workPtr, size_t workSize, void *weightPtr,
+                                                      size_t weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief load model from file with async queue
+ *
+ * @param modelPath  [IN] model path
+ * @param modelId [OUT]   return model id if load success
+ * @param inputQ [IN]     input queue pointer
+ * @param inputQNum [IN]  input queue num
+ * @param outputQ [IN]    output queue pointer
+ * @param outputQNum [IN] output queue num
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint32_t *modelId, const uint32_t *inputQ,
+                                                     size_t inputQNum, const uint32_t *outputQ, size_t outputQNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief load model from memory with async queue
+ *
+ * @param model [IN]      model memory which user manages
+ * @param modelSize [IN]  model size
+ * @param modelId [OUT]   return model id if load success
+ * @param inputQ [IN]     input queue pointer
+ * @param inputQNum [IN]  input queue num
+ * @param outputQ [IN]    output queue pointer
+ * @param outputQNum [IN] output queue num
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId,
+                                                    const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ,
+                                                    size_t outputQNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Execute model synchronous inference until the inference result is returned
+ *
+ * @param  modelId [IN]   ID of the model to perform inference
+ * @param  input [IN]     Input data for model inference
+ * @param  output [OUT]   Output data for model inference
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output);
+
+/**
+ * @ingroup AscendCL
+ * @brief Execute model asynchronous inference until the inference result is returned
+ *
+ * @param  modelId [IN]   ID of the model to perform inference
+ * @param  input [IN]     Input data for model inference
+ * @param  output [OUT]   Output data for model inference
+ * @param  stream [IN]    stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output,
+                                                aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief unload model with model id
+ *
+ * @param  modelId [IN]   model id to be unloaded
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlUnload(uint32_t modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the weight memory size and working memory size
+ * required for model execution according to the model file
+ *
+ * @param  fileName [IN]     Model path to get memory information
+ * @param  workSize [OUT]    The amount of working memory for model executed
+ * @param  weightSize [OUT]  The amount of weight memory for model executed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlQuerySize(const char *fileName, size_t *workSize, size_t *weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Obtain the weights required for
+ * model execution according to the model data in memory
+ *
+ * @par Restriction
+ * The execution and weight memory is Device memory,
+ * and requires user application and release.
+ * @param  model [IN]        model memory which user manages
+ * @param  modelSize [IN]    model data size
+ * @param  workSize [OUT]    The amount of working memory for model executed
+ * @param  weightSize [OUT]  The amount of weight memory for model executed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlQuerySizeFromMem(const void *model, size_t modelSize, size_t *workSize,
+                                                    size_t *weightSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief In dynamic batch scenarios,
+ * it is used to set the number of images processed
+ * at one time during model inference
+ *
+ * @param  modelId [IN]     model id
+ * @param  dataset [IN|OUT] data for model inference
+ * @param  index [IN]       index of dynamic tensor
+ * @param  batchSize [IN]   Number of images processed at a time during model
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicBatchSize(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                       uint64_t batchSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Sets the H and W of the specified input of the model
+ *
+ * @param  modelId [IN]     model id
+ * @param  dataset [IN|OUT] data for model inference
+ * @param  index [IN]       index of dynamic tensor
+ * @param  height [IN]      model height
+ * @param  width [IN]       model width
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicHWSize(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                    uint64_t height, uint64_t width);
+
+/**
+ * @ingroup AscendCL
+ * @brief Sets the dynamic dims of the specified input of the model
+ *
+ * @param  modelId [IN]     model id
+ * @param  dataset [IN|OUT] data for model inference
+ * @param  index [IN]       index of dynamic dims
+ * @param  dims [IN]        value of dynamic dims
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetInputDynamicDims(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                       const aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input dims info
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  input tensor index
+ * @param dims [OUT]  dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlGetInputDimsV2
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input dims info(version 2), especially for static aipp
+ * it is the same with aclmdlGetInputDims while model without static aipp
+ *
+ * @param modelDesc [IN] model description
+ * @param index [IN]     input tensor index
+ * @param dims [OUT]     dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlGetInputDims
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDimsV2(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output dims info
+ *
+ * @param modelDesc [IN] model description
+ * @param index [IN]     output tensor index
+ * @param dims [OUT]     dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get current output dims info
+ *
+ * @par Function
+ * The following use cases are supported:
+ * @li Get current output shape when model is dynamic and
+ * dynamic shape info is set
+ * @li Get max output shape when model is dynamic and
+ * dynamic shape info is not set
+ * @li Get actual output shape when model is static
+ *
+ * @param modelDesc [IN] model description
+ * @param index [IN]     output tensor index
+ * @param dims [OUT]     dims info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);
+
+/**
+ * @ingroup AscendCL
+ * @brief get attr value by op name
+ *
+ * @param modelDesc [IN]   model description
+ * @param opName [IN]      op name
+ * @param attr [IN]        attr name
+ *
+ * @retval the attr value
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input name by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      intput tensor index
+ *
+ * @retval input tensor name,the same life cycle with modelDesc
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetInputNameByIndex(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output name by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      output tensor index
+ *
+ * @retval output tensor name,the same life cycle with modelDesc
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetOutputNameByIndex(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input format by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      intput tensor index
+ *
+ * @retval input tensor format
+ */
+ACL_FUNC_VISIBILITY aclFormat aclmdlGetInputFormat(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output format by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]      output tensor index
+ *
+ * @retval output tensor format
+ */
+ACL_FUNC_VISIBILITY aclFormat aclmdlGetOutputFormat(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input data type by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  intput tensor index
+ *
+ * @retval input tensor data type
+ */
+ACL_FUNC_VISIBILITY aclDataType aclmdlGetInputDataType(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output data type by index
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  output tensor index
+ *
+ * @retval output tensor data type
+ */
+ACL_FUNC_VISIBILITY aclDataType aclmdlGetOutputDataType(const aclmdlDesc *modelDesc, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input tensor index by name
+ *
+ * @param modelDesc [IN]  model description
+ * @param name [IN]    intput tensor name
+ * @param index [OUT]  intput tensor index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get output tensor index by name
+ *
+ * @param modelDesc [IN]  model description
+ * @param name [IN]  output tensor name
+ * @param index [OUT]  output tensor index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetOutputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic batch info
+ *
+ * @param modelDesc [IN]  model description
+ * @param batch [OUT]  dynamic batch info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicBatch(const aclmdlDesc *modelDesc, aclmdlBatch *batch);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic height&width info
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  input tensor index
+ * @param hw [OUT]  dynamic height&width info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicHW(const aclmdlDesc *modelDesc, size_t index, aclmdlHW *hw);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic gear count
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  unused, must be -1
+ * @param gearCount [OUT]  dynamic gear count
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicGearCount(const aclmdlDesc *modelDesc, size_t index,
+                                                            size_t *gearCount);
+
+/**
+ * @ingroup AscendCL
+ * @brief get dynamic dims info
+ *
+ * @param modelDesc [IN]  model description
+ * @param index [IN]  unused, must be -1
+ * @param dims [OUT]  value of dynamic dims
+ * @param gearCount [IN]  dynamic gear count
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetInputDynamicDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims,
+                                                       size_t gearCount);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclmdlAIPP
+ *
+ * @param batchSize [IN]    batchsizes of model
+ *
+ * @retval the aclmdlAIPP pointer
+ */
+ACL_FUNC_VISIBILITY aclmdlAIPP *aclmdlCreateAIPP(uint64_t batchSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlAIPP
+ *
+ * @param aippParmsSet [IN]    Pointer for aclmdlAIPP to be destroyed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyAIPP(const aclmdlAIPP *aippParmsSet);
+
+/**
+ * @ingroup AscendCL
+ * @brief set InputFormat of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param inputFormat [IN]    The inputFormat of aipp
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, aclAippInputFormat inputFormat);
+
+/**
+ * @ingroup AscendCL
+ * @brief set cscParms of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]    Pointer for aclmdlAIPP
+ * @param csc_switch [IN]       Csc switch
+ * @param cscMatrixR0C0 [IN]    Csc_matrix_r0_c0
+ * @param cscMatrixR0C1 [IN]    Csc_matrix_r0_c1
+ * @param cscMatrixR0C2 [IN]    Csc_matrix_r0_c2
+ * @param cscMatrixR1C0 [IN]    Csc_matrix_r1_c0
+ * @param cscMatrixR1C1 [IN]    Csc_matrix_r1_c1
+ * @param cscMatrixR1C2 [IN]    Csc_matrix_r1_c2
+ * @param cscMatrixR2C0 [IN]    Csc_matrix_r2_c0
+ * @param cscMatrixR2C1 [IN]    Csc_matrix_r2_c1
+ * @param cscMatrixR2C2 [IN]    Csc_matrix_r2_c2
+ * @param cscOutputBiasR0 [IN]  Output Bias for RGB to YUV, element of row 0, unsigned number
+ * @param cscOutputBiasR1 [IN]  Output Bias for RGB to YUV, element of row 1, unsigned number
+ * @param cscOutputBiasR2 [IN]  Output Bias for RGB to YUV, element of row 2, unsigned number
+ * @param cscInputBiasR0 [IN]   Input Bias for YUV to RGB, element of row 0, unsigned number
+ * @param cscInputBiasR1 [IN]   Input Bias for YUV to RGB, element of row 1, unsigned number
+ * @param cscInputBiasR2 [IN]   Input Bias for YUV to RGB, element of row 2, unsigned number
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0,
+                                                    int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0,
+                                                    int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0,
+                                                    int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
+                                                    uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1,
+                                                    uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0,
+                                                    uint8_t cscInputBiasR1, uint8_t cscInputBiasR2);
+
+/**
+ * @ingroup AscendCL
+ * @brief set rb/ub swap switch of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param rbuvSwapSwitch [IN] rb/ub swap switch
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch);
+
+/**
+ * @ingroup AscendCL
+ * @brief set RGBA->ARGB, YUVA->AYUV swap switch of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param axSwapSwitch [IN]   RGBA->ARGB, YUVA->AYUV swap switch
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch);
+
+/**
+ * @ingroup AscendCL
+ * @brief set source image of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param srcImageSizeW [IN]  Source image width
+ * @param srcImageSizeH [IN]  Source image height
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW,
+                                                       int32_t srcImageSizeH);
+
+/**
+ * @ingroup AscendCL
+ * @brief set resize switch of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param scfSwitch [IN]      Resize switch
+ * @param scfInputSizeW [IN]  Input width of scf
+ * @param scfInputSizeH [IN]  Input height of scf
+ * @param scfOutputSizeW [IN] Output width of scf
+ * @param scfOutputSizeH [IN] Output height of scf
+ * @param batchIndex [IN]     Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW,
+                                                    int32_t scfInputSizeH, int32_t scfOutputSizeW,
+                                                    int32_t scfOutputSizeH, uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set cropParams of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]  Pointer for aclmdlAIPP
+ * @param cropSwitch [IN]     Crop switch
+ * @param cropStartPosW [IN]  The start horizontal position of cropping
+ * @param cropStartPosH [IN]  The start vertical position of cropping
+ * @param cropSizeW [IN]      Crop width
+ * @param cropSizeH [IN]      Crop height
+ * @param batchIndex [IN]     Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW,
+                                                     int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH,
+                                                     uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set paddingParams of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]      Pointer for aclmdlAIPP
+ * @param paddingSwitch [IN]      Padding switch
+ * @param paddingSizeTop [IN]     Top padding size
+ * @param paddingSizeBottom [IN]  Bottom padding size
+ * @param paddingSizeLeft [IN]    Left padding size
+ * @param paddingSizeRight [IN]   Right padding size
+ * @param batchIndex [IN]         Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch,
+                                                        int32_t paddingSizeTop, int32_t paddingSizeBottom,
+                                                        int32_t paddingSizeLeft, int32_t paddingSizeRight,
+                                                        uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set DtcPixelMean of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]      Pointer for aclmdlAIPP
+ * @param dtcPixelMeanChn0 [IN]   Mean value of channel 0
+ * @param dtcPixelMeanChn1 [IN]   Mean value of channel 1
+ * @param dtcPixelMeanChn2 [IN]   Mean value of channel 2
+ * @param dtcPixelMeanChn3 [IN]   Mean value of channel 3
+ * @param batchIndex [IN]         Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0,
+                                                       int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2,
+                                                       int16_t dtcPixelMeanChn3, uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set DtcPixelMin of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]    Pointer for aclmdlAIPP
+ * @param dtcPixelMinChn0 [IN]  Min value of channel 0
+ * @param dtcPixelMinChn1 [IN]  Min value of channel 1
+ * @param dtcPixelMinChn2 [IN]  Min value of channel 2
+ * @param dtcPixelMinChn3 [IN]  Min value of channel 3
+ * @param batchIndex [IN]       Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0,
+                                                      float dtcPixelMinChn1, float dtcPixelMinChn2,
+                                                      float dtcPixelMinChn3, uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set PixelVarReci of type aclmdlAIPP
+ *
+ * @param aippParmsSet [OUT]       Pointer for aclmdlAIPP
+ * @param dtcPixelVarReciChn0 [IN] sfr_dtc_pixel_variance_reci_ch0
+ * @param dtcPixelVarReciChn1 [IN] sfr_dtc_pixel_variance_reci_ch1
+ * @param dtcPixelVarReciChn2 [IN] sfr_dtc_pixel_variance_reci_ch2
+ * @param dtcPixelVarReciChn3 [IN] sfr_dtc_pixel_variance_reci_ch3
+ * @param batchIndex [IN]          Batch parameter index
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0,
+                                                       float dtcPixelVarReciChn1, float dtcPixelVarReciChn2,
+                                                       float dtcPixelVarReciChn3, uint64_t batchIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief set aipp parameters to model
+ *
+ * @param modelId [IN]        model id
+ * @param dataset [IN]        Pointer of dataset
+ * @param index [IN]          index of input for aipp data(ACL_DYNAMIC_AIPP_NODE)
+ * @param aippParmsSet [IN]   Pointer for aclmdlAIPP
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                const aclmdlAIPP *aippParmsSet);
+
+/**
+ * @ingroup AscendCL
+ * @brief set aipp parameters to model
+ *
+ * @param modelId [IN]        model id
+ * @param dataset [IN]        Pointer of dataset
+ * @param index [IN]          index of input for data which linked dynamic aipp(ACL_DATA_WITH_DYNAMIC_AIPP)
+ * @param aippParmsSet [IN]   Pointer for aclmdlAIPP
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index,
+                                                       const aclmdlAIPP *aippParmsSet);
+
+/**
+ * @ingroup AscendCL
+ * @brief get input aipp type
+ *
+ * @param modelId [IN]        model id
+ * @param index [IN]          index of input
+ * @param type [OUT]          aipp type for input.refrer to aclmdlInputAippType(enum)
+ * @param dynamicAttachedDataIndex [OUT]     index for dynamic attached data(ACL_DYNAMIC_AIPP_NODE)
+ *        valid when type is ACL_DATA_WITH_DYNAMIC_AIPP, invalid value is ACL_INVALID_NODE_INDEX
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type,
+                                               size_t *dynamicAttachedDataIndex);
+
+/**
+ * @ingroup AscendCL
+ * @brief get static aipp parameters from model
+ *
+ * @param modelId [IN]        model id
+ * @param index [IN]          index of tensor
+ * @param aippinfo [OUT]      Pointer for static aipp info
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
+ * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op description info
+ *
+ * @param deviceId [IN]       device id
+ * @param streamId [IN]       stream id
+ * @param taskId [IN]         task id
+ * @param opName [OUT]        pointer to op name
+ * @param opNameLen [IN]      the length of op name
+ * @param inputDesc [OUT]     pointer to input description
+ * @param numInputs [OUT]     the number of input tensor
+ * @param outputDesc [OUT]    pointer to output description
+ * @param numOutputs [OUT]    the number of output tensor
+ *
+ * @retval ACL_SUCCESS The function is successfully executed
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId,
+                                                      char *opName, size_t opNameLen, aclTensorDesc **inputDesc,
+                                                      size_t *numInputs, aclTensorDesc **outputDesc,
+                                                      size_t *numOutputs);
+
+/**
+ * @ingroup AscendCL
+ * @brief init dump
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlInitDump();
+
+/**
+ * @ingroup AscendCL
+ * @brief set param of dump
+ *
+ * @param dumpCfgPath [IN]   the path of dump config
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath);
+
+/**
+ * @ingroup AscendCL
+ * @brief finalize dump.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump();
+
+/**
+ * @ingroup AscendCL
+ * @brief load model with config
+ *
+ * @param handle [IN]    pointer to model config handle
+ * @param modelId [OUT]  pointer to model id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief create model config handle of type aclmdlConfigHandle
+ *
+ * @retval the aclmdlConfigHandle pointer
+ *
+ * @see aclmdlDestroyConfigHandle
+ */
+ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of type aclmdlConfigHandle
+ *
+ * @param handle [IN]   pointer to model config handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclmdlCreateConfigHandle
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief set config for model load
+ *
+ * @param handle [OUT]    pointer to model config handle
+ * @param attr [IN]       config attr in model config handle to be set
+ * @param attrValue [IN]  pointer to model config value
+ * @param valueSize [IN]  memory size of attrValue
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr,
+                                                const void *attrValue, size_t valueSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief get real tensor name from modelDesc
+ *
+ * @param modelDesc [IN]  pointer to modelDesc
+ * @param name [IN]       tensor name
+ *
+ * @retval the pointer of real tensor name
+ * @retval Failure return NULL
+ */
+ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelDesc, const char *name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_MODEL_H_
diff --git a/inc/external/acl/acl_op.h b/inc/external/acl/acl_op.h
new file mode 100644
index 00000000..d2e59bfb
--- /dev/null
+++ b/inc/external/acl/acl_op.h
@@ -0,0 +1,504 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_OP_H_
+#define INC_EXTERNAL_ACL_ACL_OP_H_
+
+#include "acl_base.h"
+#include "acl_rt.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct aclopHandle aclopHandle;
+typedef struct aclopAttr aclopAttr;
+typedef struct aclopKernelDesc aclopKernelDesc;
+
+typedef void (*aclDataDeallocator)(void *data, size_t length);
+
+static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;
+
+typedef enum aclEngineType {
+  ACL_ENGINE_SYS,
+  ACL_ENGINE_AICORE,
+  ACL_ENGINE_VECTOR,
+} aclopEngineType;
+
+/**
+ * @ingroup AscendCL
+ * @brief Set base directory that contains single op models
+ *
+ * @par Restriction
+ * The aclopSetModelDir interface can be called only once in a process.
+ * @param  modelDir [IN]   path of the directory
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetModelDir(const char *modelDir);
+
+/**
+ * @ingroup AscendCL
+ * @brief load single op models from memory
+ *
+ * @par Restriction
+ * The aclopLoad interface can be called more than one times in a process.
+ * @param model [IN]        address of single op models
+ * @param modelSize [IN]    size of single op models
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopLoad(const void *model, size_t modelSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief create data of type aclopAttr
+ *
+ * @retval pointer to created instance.
+ * @retval nullptr if run out of memory
+ */
+ACL_FUNC_VISIBILITY aclopAttr *aclopCreateAttr();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy data of typ aclopAttr
+ *
+ * @param attr [IN]   pointer to the instance of aclopAttr
+ */
+ACL_FUNC_VISIBILITY void aclopDestroyAttr(const aclopAttr *attr);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is bool
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *                         false if attrValue is 0, true otherwise.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrBool(aclopAttr *attr, const char *attrName, uint8_t attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is int64_t
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrInt(aclopAttr *attr, const char *attrName, int64_t attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is float
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attrName, float attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is string
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param attrValue [IN]   attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of bools
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values. false if attrValue is 0, true otherwise.
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
+                                                  const uint8_t *values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of ints
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
+                                                 const int64_t *values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of floats
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
+                                                   const float *values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of strings
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numValues [IN]   number of values
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
+                                                    const char **values);
+
+/**
+ * @ingroup AscendCL
+ * @brief set an attribute. the type of the attribute is list of list of ints
+ *
+ * @param attr [OUT]       pointer to the instance of aclopAttr
+ * @param attrName [IN]    attribute name
+ * @param numLists [IN]    number of lists
+ * @param numValues [IN]   pointer to number of values of each list
+ * @param values [IN]      pointer to values
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists,
+                                                     const int *numValues, const int64_t *const values[]);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load and execute the specified operator asynchronously
+ *
+ * @par Restriction
+ * @li The input and output organization of each operator is different,
+ * and the application needs to organize the operator strictly
+ * according to the operator input and output parameters when calling.
+ * @li When the user calls aclopExecute,
+ * the ACL finds the corresponding task according to the optype,
+ * the description of the input tesnsor,
+ * the description of the output tesnsor, and attr, and issues the execution.
+ *
+ * @param opType [IN]      type of op
+ * @param numInputs [IN]   number of inputs
+ * @param inputDesc [IN]   pointer to array of input tensor descriptions
+ * @param inputs [IN]      pointer to array of input buffers
+ * @param numOutputs [IN]  number of outputs
+ * @param outputDesc [IN]  pointer to array of output tensor descriptions
+ * @param outputs [OUT]    pointer to array of output buffers
+ * @param attr [IN]        pointer to instance of aclopAttr.
+ *                         may pass nullptr if the op has no attribute
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead")
+ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
+                                          const aclDataBuffer *const inputs[], int numOutputs,
+                                          const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[],
+                                          const aclopAttr *attr, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Load and execute the specified operator
+ *        The difference with aclopExecute is that aclopExecuteV2 will refresh outputDesc
+ *
+ * @par Restriction
+ * @li The input and output organization of each operator is different,
+ * and the application needs to organize the operator strictly
+ * according to the operator input and output parameters when calling.
+ * @li When the user calls aclopExecuteV2,
+ * the ACL finds the corresponding task according to the optype,
+ * the description of the input tesnsor,
+ * the description of the output tesnsor, and attr, and issues the execution.
+ *
+ * @param opType [IN]      type of op
+ * @param numInputs [IN]   number of inputs
+ * @param inputDesc [IN]   pointer to array of input tensor descriptions
+ * @param inputs [IN]      pointer to array of input buffers
+ * @param numOutputs [IN]  number of outputs
+ * @param outputDesc [IN|OUT]  pointer to array of output tensor descriptions
+ * @param outputs [OUT]    pointer to array of output buffers
+ * @param attr [IN]        pointer to instance of aclopAttr.
+ *                         may pass nullptr if the op has no attribute
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
+                                            aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
+                                            aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a instance of aclopHandle.
+ *
+ * @param opType [IN]      type of op
+ * @param numInputs [IN]   number of inputs
+ * @param inputDesc [IN]   pointer to array of input tensor descriptions
+ * @param numOutputs [IN]  number of outputs
+ * @param outputDesc [IN]  pointer to array of output tensor descriptions
+ * @param opAttr [IN]      pointer to instance of aclopAttr.
+ *                         may pass nullptr if the op has no attribute
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs,
+                                               const aclTensorDesc *const inputDesc[], int numOutputs,
+                                               const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
+                                               aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy aclopHandle instance
+ *
+ * @param handle [IN]   pointer to the instance of aclopHandle
+ */
+ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief execute an op with the handle.
+ *        can save op model matching cost compared with aclopExecute
+ *
+ * @param handle [IN]      pointer to the instance of aclopHandle.
+ *                         The aclopCreateHandle interface has been called
+ *                         in advance to create aclopHandle type data.
+ * @param numInputs [IN]   number of inputs
+ * @param inputs [IN]      pointer to array of input buffers.
+ *                         The aclCreateDataBuffer interface has been called
+ *                         in advance to create aclDataBuffer type data.
+ * @param numOutputs [IN]  number of outputs
+ * @param outputs [OUT]    pointer to array of output buffers
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclopCreateHandle | aclCreateDataBuffer
+ */
+ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs,
+                                                 const aclDataBuffer *const inputs[], int numOutputs,
+                                                 aclDataBuffer *const outputs[], aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief cast data type
+ *
+ * @param srcDesc [IN]     source tensor desc
+ * @param srcBuffer [IN]   source tensor buffer
+ * @param dstDesc [IN]     destination tensor desc
+ * @param dstBuffer [OUT]  destination tensor buffer
+ * @param truncate [IN]    do not truncate if value is 0, truncate otherwise
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer,
+                                       const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate,
+                                       aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for casting datatype
+ *
+ * @param srcDesc [IN]    source tensor desc
+ * @param dstDesc [IN]    destination tensor desc
+ * @param truncate [IN]   do not truncate if value is 0, truncate otherwise
+ * @param handle [OUT]    pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate,
+                                                      aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief create kernel
+ *
+ * @param opType [IN]           op type
+ * @param kernelId [IN]         kernel id
+ * @param kernelName [IN]       kernel name
+ * @param binData [IN]          kernel bin data
+ * @param binSize [IN]          kernel bin size
+ * @param enginetype [IN]       enigne type
+ * @param deallocator [IN]      callback function for deallocating bin data,
+ *                              null if bin data to be deallocated by caller
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclopCompile
+ */
+ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName,
+                                               void *binData, int binSize, aclopEngineType enginetype,
+                                               aclDataDeallocator deallocator);
+
+/**
+ * @ingroup AscendCL
+ * @brief create kernel
+ *
+ * @param numInputs [IN]            number of inputs
+ * @param inputDesc [IN]            pointer to array of input tensor descriptions
+ * @param numOutputs [IN]           number of outputs
+ * @param outputDesc [IN]           pointer to array of output tensor descriptions
+ * @param opAttr [IN]               pointer to instance of aclopAttr
+ * @param aclopKernelDesc [IN]      pointer to instance of aclopKernelDesc
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs,
+                                     const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr,
+                                     aclopKernelDesc *aclopKernelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief register compile function
+ *
+ * @param opType [IN]         op type
+ * @param func [IN]           compile function
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclopUnregisterCompileFunc
+ */
+ACL_FUNC_VISIBILITY aclError aclopRegisterCompileFunc(const char *opType, aclopCompileFunc func);
+
+/**
+ * @ingroup AscendCL
+ * @brief unregister compile function
+ *
+ * @param opType [IN]         op type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);
+
+/**
+ * @ingroup AscendCL
+ * @brief set kernel args
+ *
+ * @param kernelDesc [IN]               pointer to instance of aclopKernelDesc
+ * @param kernelId [IN]                 kernel id
+ * @param blockDim [IN]                 block dim
+ * @param args [IN]                     args
+ * @param argSize [IN]                  size in bytes of args
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim,
+                                                const void *args, uint32_t argSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief set workspace sizes
+ *
+ * @param kernelDesc [IN]               pointer to instance of aclopKernelDesc
+ * @param numWorkspaces [IN]            number of workspaces
+ * @param workspaceSizes [IN]           pointer to array of sizes of workspaces
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kernelDesc, int numWorkspaces,
+                                                          size_t *workspaceSizes);
+
+/**
+ * @ingroup AscendCL
+ * @brief compile op with dynamic shape
+ *
+ * @param opType [IN]       op type
+ * @param numInputs [IN]    number of inputs
+ * @param inputDesc [IN]    pointer to array of input tensor descriptions
+ * @param numOutputs [IN]   number of outputs
+ * @param outputDesc [IN]   pointer to array of output tensor descriptions
+ * @param attr [IN]         pointer to instance of aclopAttr.
+ *                          may pass nullptr if the op has no attribute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs,
+                                               const aclTensorDesc *const inputDesc[], int numOutputs,
+                                               const aclTensorDesc *const outputDesc[], const aclopAttr *attr);
+
+/**
+ * @ingroup AscendCL
+ * @brief inferShape the specified operator synchronously
+ *
+ * @param opType [IN]       type of op
+ * @param numInputs [IN]    number of inputs
+ * @param inputDesc [IN]    pointer to array of input tensor descriptions
+ * @param inputs [IN]       pointer to array of input buffers
+ * @param numOutputs [IN]   number of outputs
+ * @param outputDesc [OUT]  pointer to array of output tensor descriptions
+ * @param attr [IN]         pointer to instance of aclopAttr.
+ *                          may pass nullptr if the op has no attribute
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
+                                             aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[],
+                                             aclopAttr *attr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_OP_H_
diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h
new file mode 100644
index 00000000..d9d1b3da
--- /dev/null
+++ b/inc/external/acl/acl_op_compiler.h
@@ -0,0 +1,121 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
+#define INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
+
+#include "acl_base.h"
+#include "acl_op.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType;
+
+typedef enum {
+  ACL_PRECISION_MODE,
+  ACL_AICORE_NUM,
+  ACL_AUTO_TUNE_MODE,
+  ACL_OP_SELECT_IMPL_MODE,
+  ACL_OPTYPELIST_FOR_IMPLMODE,
+  ACL_OP_DEBUG_LEVEL,
+  ACL_DEBUG_DIR,
+  ACL_OP_COMPILER_CACHE_MODE,
+  ACL_OP_COMPILER_CACHE_DIR,
+  ACL_OP_PERFORMANCE_MODE
+} aclCompileOpt;
+
+typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag;
+
+/**
+ * @ingroup AscendCL
+ * @brief compile op
+ *
+ * @param opType [IN]           op type
+ * @param numInputs [IN]        number of inputs
+ * @param inputDesc [IN]        pointer to array of input tensor descriptions
+ * @param numOutputs [IN]       number of outputs
+ * @param outputDesc [IN]       pointer to array of output tensor descriptions
+ * @param attr [IN]           pointer to instance of aclopAttr.
+ *                              may pass nullptr if the op has no attribute
+ * @param engineType [IN]       engine type
+ * @param compileFlag [IN]      compile flag
+ * @param opPath [IN]           path of op
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[],
+                                          int numOutputs, const aclTensorDesc *const outputDesc[],
+                                          const aclopAttr *attr, aclopEngineType engineType,
+                                          aclopCompileType compileFlag, const char *opPath);
+
+/**
+ * @ingroup AscendCL
+ * @brief compile and execute op
+ *
+ * @param opType [IN]           op type
+ * @param numInputs [IN]        number of inputs
+ * @param inputDesc [IN]        pointer to array of input tensor descriptions
+ * @param inputs [IN]           pointer to array of input buffers
+ * @param numOutputs [IN]       number of outputs
+ * @param outputDesc [IN]       pointer to array of output tensor descriptions
+ * @param outputs [IN]          pointer to array of outputs buffers
+ * @param attr [IN]             pointer to instance of aclopAttr.
+ *                              may pass nullptr if the op has no attribute
+ * @param engineType [IN]       engine type
+ * @param compileFlag [IN]      compile flag
+ * @param opPath [IN]           path of op
+ * @param stream [IN]           stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(
+  const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
+  int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
+  aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief set compile option
+ *
+ * @param aclCompileOpt [IN]      compile option
+ * @param value [IN]              pointer for the option value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *value);
+
+/**
+ * @ingroup AscendCL
+ * @brief set compile flag
+ *
+ * @param flag [IN]    compile flag, ACL_OP_COMPILE_DEFAULT means compile with default mode
+ *                     ACL_OP_COMPILE_FUZZ means compile with fuzz mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_
diff --git a/inc/external/acl/acl_prof.h b/inc/external/acl/acl_prof.h
new file mode 100644
index 00000000..3784d8c6
--- /dev/null
+++ b/inc/external/acl/acl_prof.h
@@ -0,0 +1,329 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_PROF_H_
+#define INC_EXTERNAL_ACL_PROF_H_
+
+#include "acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACL_PROF_ACL_API 0x0001
+#define ACL_PROF_TASK_TIME 0x0002
+#define ACL_PROF_AICORE_METRICS 0x0004
+#define ACL_PROF_AICPU 0x0008
+
+/**
+ * @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead
+ */
+#define ACL_PROF_MAX_OP_NAME_LEN 257
+#define ACL_PROF_MAX_OP_TYPE_LEN 65
+
+typedef enum {
+  ACL_AICORE_ARITHMETIC_UTILIZATION = 0,
+  ACL_AICORE_PIPE_UTILIZATION = 1,
+  ACL_AICORE_MEMORY_BANDWIDTH = 2,
+  ACL_AICORE_L0B_AND_WIDTH = 3,
+  ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
+  ACL_AICORE_NONE = 0xFF
+} aclprofAicoreMetrics;
+
+typedef struct aclprofConfig aclprofConfig;
+typedef struct aclprofStopConfig aclprofStopConfig;
+typedef struct aclprofAicoreEvents aclprofAicoreEvents;
+typedef struct aclprofSubscribeConfig aclprofSubscribeConfig;
+
+/**
+ * @ingroup AscendCL
+ * @brief profiling initialize
+ *
+ * @param  profilerResultPath [IN]  path of profiling result
+ * @param  length [IN]              length of profilerResultPath
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofFinalize
+ */
+ACL_FUNC_VISIBILITY aclError aclprofInit(const char *profilerResultPath, size_t length);
+
+/**
+ * @ingroup AscendCL
+ * @brief profiling finalize
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofInit
+ */
+ACL_FUNC_VISIBILITY aclError aclprofFinalize();
+
+/**
+ * @ingroup AscendCL
+ * @brief Start profiling modules by profilerConfig
+ *
+ * @param  profilerConfig [IN]  config of profiling
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofStop
+ */
+ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create data of type aclprofConfig
+ *
+ * @param  deviceIdList [IN]      list of device id
+ * @param  deviceNums [IN]        number of devices
+ * @param  aicoreMetrics [IN]     type of aicore metrics
+ * @param  aicoreEvents [IN]      pointer to aicore events, only support NULL now
+ * @param  dataTypeConfig [IN]    config modules need profiling
+ *
+ * @retval the aclprofConfig pointer
+ *
+ * @see aclprofDestroyConfig
+ */
+ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums,
+                                                       aclprofAicoreMetrics aicoreMetrics,
+                                                       aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy data of type aclprofConfig
+ *
+ * @param  profilerConfig [IN]  config of profiling
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofCreateConfig
+ */
+ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief stop profiling modules by stopProfilingConfig
+ *
+ * @param  profilerConfig [IN]  pointer to stop config of profiling
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofStart
+ */
+ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief subscribe profiling data of model
+ *
+ * @param  modelId [IN]              the model id subscribed
+ * @param  profSubscribeConfig [IN]  pointer to config of model subscribe
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofModelUnSubscribe
+ */
+ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief unsubscribe profiling data of model
+ *
+ * @param  modelId [IN]  the model id unsubscribed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofModelSubscribe
+ */
+ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief create subscribe config
+ *
+ * @param  timeInfoSwitch [IN] switch whether get time info from model
+ * @param  aicoreMetrics [IN]  aicore metrics
+ * @param  fd [IN]             pointer to write pipe
+ *
+ * @retval the aclprofSubscribeConfig pointer
+ *
+ * @see aclprofDestroySubscribeConfig
+ */
+ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch,
+                                                                         aclprofAicoreMetrics aicoreMetrics, void *fd);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy subscribe config
+ *
+ * @param  profSubscribeConfig [IN]  subscribe config
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclprofCreateSubscribeConfig
+ */
+ACL_FUNC_VISIBILITY aclError aclprofDestroySubscribeConfig(const aclprofSubscribeConfig *profSubscribeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief create subscribe config
+ *
+ * @param  opDescSize [OUT]  size of op desc
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetOpDescSize(size_t *opDescSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op number from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  opNumber [OUT]  op number of subscription data
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLen, uint32_t *opNumber);
+
+/**
+ * @ingroup AscendCL
+ * @brief get length op type from subscription data
+ *
+ * @param  opInfo [IN]      pointer to subscription data
+ * @param  opInfoLen [IN]   memory size of subscription data
+ * @param  index [IN]       index of op array in opInfo
+ * @param  opTypeLen [OUT]  actual length of op type string
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index,
+                                                 size_t *opTypeLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op type from subscription data
+ *
+ * @param  opInfo [IN]      pointer to subscription data
+ * @param  opInfoLen [IN]   memory size of subscription data
+ * @param  index [IN]       index of op array in opInfo
+ * @param  opType [OUT]     obtained op type string
+ * @param  opTypeLen [IN]   obtained length of op type string
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType,
+                                              size_t opTypeLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief get length op name from subscription data
+ *
+ * @param  opInfo [IN]      pointer to subscription data
+ * @param  opInfoLen [IN]   memory size of subscription data
+ * @param  index [IN]       index of op array in opInfo
+ * @param  opNameLen [OUT]  actual length of op name string
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index,
+                                                 size_t *opNameLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief get op type from subscription data
+ *
+ * @param  opInfo [IN]      pointer to subscription data
+ * @param  opInfoLen [IN]   memory size of subscription data
+ * @param  index [IN]       index of op array in opInfo
+ * @param  opName [OUT]     obtained op name string
+ * @param  opNameLen [IN]   obtained length of op name string
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName,
+                                              size_t opNameLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief get start time of specified op from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ *
+ * @retval start time(us) of specified op with timestamp
+ * @retval 0 for failed
+ */
+ACL_FUNC_VISIBILITY uint64_t aclprofGetOpStart(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get end time of specified op from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ *
+ * @retval end time(us) of specified op with timestamp
+ * @retval 0 for failed
+ */
+ACL_FUNC_VISIBILITY uint64_t aclprofGetOpEnd(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get excution time of specified op from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ * @param  index [IN]      index of op array in opInfo
+ *
+ * @retval execution time(us) of specified op with timestamp
+ * @retval 0 for failed
+ */
+ACL_FUNC_VISIBILITY uint64_t aclprofGetOpDuration(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief get model id from subscription data
+ *
+ * @param  opInfo [IN]     pointer to subscription data
+ * @param  opInfoLen [IN]  memory size of subscription data
+ *
+ * @retval model id of subscription data
+ * @retval 0 for failed
+ */
+ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLen, uint32_t index);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_PROF_H_
diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h
new file mode 100644
index 00000000..5ee70724
--- /dev/null
+++ b/inc/external/acl/acl_rt.h
@@ -0,0 +1,958 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_RT_H_
+#define INC_EXTERNAL_ACL_ACL_RT_H_
+
+#include <stdint.h>
+#include <stddef.h>
+#include "acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ACL_EVENT_TIME_LINE 0x00000008u
+
+typedef enum aclrtRunMode {
+  ACL_DEVICE,
+  ACL_HOST,
+} aclrtRunMode;
+
+typedef enum aclrtTsId {
+  ACL_TS_ID_AICORE = 0,
+  ACL_TS_ID_AIVECTOR = 1,
+  ACL_TS_ID_RESERVED = 2,
+} aclrtTsId;
+
+typedef enum aclrtEventStatus {
+  ACL_EVENT_STATUS_COMPLETE = 0,
+  ACL_EVENT_STATUS_NOT_READY = 1,
+  ACL_EVENT_STATUS_RESERVED = 2,
+} aclrtEventStatus;
+
+typedef enum aclrtCallbackBlockType {
+  ACL_CALLBACK_NO_BLOCK,
+  ACL_CALLBACK_BLOCK,
+} aclrtCallbackBlockType;
+
+typedef enum aclrtMemcpyKind {
+  ACL_MEMCPY_HOST_TO_HOST,
+  ACL_MEMCPY_HOST_TO_DEVICE,
+  ACL_MEMCPY_DEVICE_TO_HOST,
+  ACL_MEMCPY_DEVICE_TO_DEVICE,
+} aclrtMemcpyKind;
+
+typedef enum aclrtMemMallocPolicy {
+  ACL_MEM_MALLOC_HUGE_FIRST,
+  ACL_MEM_MALLOC_HUGE_ONLY,
+  ACL_MEM_MALLOC_NORMAL_ONLY,
+  ACL_MEM_MALLOC_HUGE_FIRST_P2P,
+  ACL_MEM_MALLOC_HUGE_ONLY_P2P,
+  ACL_MEM_MALLOC_NORMAL_ONLY_P2P,
+} aclrtMemMallocPolicy;
+
+typedef enum aclrtMemAttr {
+  ACL_DDR_MEM,
+  ACL_HBM_MEM,
+  ACL_DDR_MEM_HUGE,
+  ACL_DDR_MEM_NORMAL,
+  ACL_HBM_MEM_HUGE,
+  ACL_HBM_MEM_NORMAL,
+  ACL_DDR_MEM_P2P_HUGE,
+  ACL_DDR_MEM_P2P_NORMAL,
+  ACL_HBM_MEM_P2P_HUGE,
+  ACL_HBM_MEM_P2P_NORMAL,
+} aclrtMemAttr;
+
+typedef enum aclrtGroupAttr {
+  ACL_GROUP_AICORE_INT,
+  ACL_GROUP_AIV_INT,
+  ACL_GROUP_AIC_INT,
+  ACL_GROUP_SDMANUM_INT,
+  ACL_GROUP_ASQNUM_INT,
+  ACL_GROUP_GROUPID_INT
+} aclrtGroupAttr;
+
+typedef struct tagRtGroupInfo aclrtGroupInfo;
+
+typedef struct rtExceptionInfo aclrtExceptionInfo;
+
+typedef void (*aclrtCallback)(void *userData);
+
+typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set a callback function to handle exception information
+ *
+ * @param callback [IN] callback function to handle exception information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get task id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The task id from exception information
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The stream id from exception information
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get thread id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The thread id of fail task
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get device id from exception information
+ *
+ * @param info [IN]   pointer of exception information
+ *
+ * @retval The thread id of fail task
+ * @retval 0xFFFFFFFF if info is null
+ */
+ACL_FUNC_VISIBILITY uint32_t aclrtGetDeviceIdFromExceptionInfo(const aclrtExceptionInfo *info);
+
+/**
+ * @ingroup AscendCL
+ * @brief The thread that handles the callback function on the Stream
+ *
+ * @param threadId [IN] thread ID
+ * @param stream [IN]   stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Add a callback function to be executed on the host
+ *        to the task queue of the Stream
+ *
+ * @param fn [IN]   Specify the callback function to be added
+ *                  The function prototype of the callback function is:
+ *                  typedef void (*aclrtCallback)(void *userData);
+ * @param userData [IN]   User data to be passed to the callback function
+ * @param blockType [IN]  callback block type
+ * @param stream [IN]     stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType,
+                                                 aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief After waiting for a specified time, trigger callback processing
+ *
+ * @par Function
+ *  The thread processing callback specified by
+ *  the aclrtSubscribeReport interface
+ *
+ * @param timeout [IN]   timeout value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSubscribeReport
+ */
+ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief Cancel thread registration,
+ *        the callback function on the specified Stream
+ *        is no longer processed by the specified thread
+ *
+ * @param threadId [IN]   thread ID
+ * @param stream [IN]     stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create context and associates it with the calling thread
+ *
+ * @par Function
+ * The following use cases are supported:
+ * @li If you don't call the aclrtCreateContext interface
+ * to explicitly create the context,
+ * the system will use the default context, which is implicitly created
+ * when the aclrtSetDevice interface is called.
+ * @li If multiple contexts are created in a process
+ * (there is no limit on the number of contexts),
+ * the current thread can only use one of them at the same time.
+ * It is recommended to explicitly specify the context of the current thread
+ * through the aclrtSetCurrentContext interface to increase.
+ * the maintainability of the program.
+ *
+ * @param  context [OUT]    point to the created context
+ * @param  deviceId [IN]    device to create context on
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSetDevice | aclrtSetCurrentContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy context instance
+ *
+ * @par Function
+ * Can only destroy context created through aclrtCreateContext interface
+ *
+ * @param  context [IN]   the context to destroy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context);
+
+/**
+ * @ingroup AscendCL
+ * @brief set the context of the thread
+ *
+ * @par Function
+ * The following scenarios are supported:
+ * @li If the aclrtCreateContext interface is called in a thread to explicitly
+ * create a Context (for example: ctx1), the thread's Context can be specified
+ * without calling the aclrtSetCurrentContext interface.
+ * The system uses ctx1 as the context of thread1 by default.
+ * @li If the aclrtCreateContext interface is not explicitly created,
+ * the system uses the default context as the context of the thread.
+ * At this time, the aclrtDestroyContext interface cannot be used to release
+ * the default context.
+ * @li If the aclrtSetCurrentContext interface is called multiple times to
+ * set the thread's Context, the last one prevails.
+ *
+ * @par Restriction
+ * @li If the cevice corresponding to the context set for the thread
+ * has been reset, you cannot set the context as the context of the thread,
+ * otherwise a business exception will result.
+ * @li It is recommended to use the context created in a thread.
+ * If the aclrtCreateContext interface is called in thread A to create a context,
+ * and the context is used in thread B,
+ * the user must guarantee the execution order of tasks in the same stream
+ * under the same context in two threads.
+ *
+ * @param  context [IN]   the current context of the thread
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateContext | aclrtDestroyContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context);
+
+/**
+ * @ingroup AscendCL
+ * @brief get the context of the thread
+ *
+ * @par Function
+ * If the user calls the aclrtSetCurrentContext interface
+ * multiple times to set the context of the current thread,
+ * then the last set context is obtained
+ *
+ * @param  context [OUT]   the current context of the thread
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSetCurrentContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context);
+
+/**
+ * @ingroup AscendCL
+ * @brief Specify the device to use for the operation
+ * implicitly create the default context and the default stream
+ *
+ * @par Function
+ * The following use cases are supported:
+ * @li Device can be specified in the process or thread.
+ * If you call the aclrtSetDevice interface multiple
+ * times to specify the same device,
+ * you only need to call the aclrtResetDevice interface to reset the device.
+ * @li The same device can be specified for operation
+ *  in different processes or threads.
+ * @li Device is specified in a process,
+ * and multiple threads in the process can share this device to explicitly
+ * create a Context (aclrtCreateContext interface).
+ * @li In multi-device scenarios, you can switch to other devices
+ * through the aclrtSetDevice interface in the process.
+ *
+ * @param  deviceId [IN]  the device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtResetDevice |aclrtCreateContext
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Reset the current operating Device and free resources on the device,
+ * including the default context, the default stream,
+ * and all streams created under the default context,
+ * and synchronizes the interface.
+ * If the task under the default context or stream has not been completed,
+ * the system will wait for the task to complete before releasing it.
+ *
+ * @par Restriction
+ * @li The Context, Stream, and Event that are explicitly created
+ * on the device to be reset. Before resetting,
+ * it is recommended to follow the following interface calling sequence,
+ * otherwise business abnormalities may be caused.
+ * @li Interface calling sequence:
+ * call aclrtDestroyEvent interface to release Event or
+ * call aclrtDestroyStream interface to release explicitly created Stream->
+ * call aclrtDestroyContext to release explicitly created Context->
+ * call aclrtResetDevice interface
+ *
+ * @param  deviceId [IN]   the device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get target device of current thread
+ *
+ * @param deviceId [OUT]  the device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get target side
+ *
+ * @param runMode [OUT]    the run mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Wait for compute device to finish
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set Scheduling TS
+ *
+ * @param tsId [IN]   the ts id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get total device number.
+ *
+ * @param count [OUT]    the device number
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count);
+
+/**
+ * @ingroup AscendCL
+ * @brief create event instance
+ *
+ * @param event [OUT]   created event
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event);
+
+/**
+ * @ingroup AscendCL
+ * @brief create event instance with flag
+ *
+ * @param event [OUT]   created event
+ * @param flag [IN]     event flag
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateEventWithFlag(aclrtEvent *event, uint32_t flag);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy event instance
+ *
+ * @par Function
+ *  Only events created through the aclrtCreateEvent interface can be
+ *  destroyed, synchronous interfaces. When destroying an event,
+ *  the user must ensure that the tasks involved in the aclrtSynchronizeEvent
+ *  interface or the aclrtStreamWaitEvent interface are completed before
+ *  they are destroyed.
+ *
+ * @param  event [IN]   event to destroy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event);
+
+/**
+ * @ingroup AscendCL
+ * @brief Record an Event in the Stream
+ *
+ * @param event [IN]    event to record
+ * @param stream [IN]   stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Reset an event
+ *
+ * @par Function
+ *  Users need to make sure to wait for the tasks in the Stream
+ *  to complete before resetting the Event
+ *
+ * @param event [IN]    event to reset
+ * @param stream [IN]   stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Queries an event's status
+ *
+ * @param  event [IN]    event to query
+ * @param  status [OUT]  event status
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status);
+
+/**
+ * @ingroup AscendCL
+ * @brief Block Host Running, wait event to be complete
+ *
+ * @param  event [IN]   event to wait
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event);
+
+/**
+ * @ingroup AscendCL
+ * @brief computes the elapsed time between events.
+ *
+ * @param ms [OUT]     time between start and end in ms
+ * @param start [IN]   starting event
+ * @param end [IN]     ending event
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end);
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc memory on device
+ *
+ * @par Function
+ *  alloc for size linear memory on device
+ *  and return a pointer to allocated memory by *devPtr
+ *
+ * @par Restriction
+ * @li The memory requested by the aclrtMalloc interface needs to be released
+ * through the aclrtFree interface.
+ * @li Before calling the media data processing interface,
+ * if you need to apply memory on the device to store input or output data,
+ * you need to call acldvppMalloc to apply for memory.
+ *
+ * @param devPtr [OUT]  pointer to pointer to allocated memory on device
+ * @param size [IN]     alloc memory size
+ * @param policy [IN]   memory alloc policy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtFree | acldvppMalloc | aclrtMallocCached
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
+
+/**
+ * @ingroup AscendCL
+ * @brief allocate memory on device with cache
+ *
+ * @par Function
+ *  alloc for size linear memory on device
+ *  and return a pointer to allocated memory by *devPtr
+ *
+ * @par Restriction
+ * @li The memory requested by the aclrtMallocCached interface needs to be released
+ * through the aclrtFree interface.
+ *
+ * @param devPtr [OUT]  pointer to pointer to allocated memory on device
+ * @param size [IN]     alloc memory size
+ * @param policy [IN]   memory alloc policy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtFree | aclrtMalloc
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy);
+
+/**
+ * @ingroup AscendCL
+ * @brief flush cache data to ddr
+ *
+ * @param devPtr [IN]  the pointer that flush data to ddr
+ * @param size [IN]    flush size
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemFlush(void *devPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief invalidate cache data
+ *
+ * @param devPtr [IN]  pointer to invalidate cache data
+ * @param size [IN]    invalidate size
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemInvalidate(void *devPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief free device memory
+ *
+ * @par Function
+ *  can only free memory allocated through the aclrtMalloc interface
+ *
+ * @param  devPtr [IN]  Pointer to memory to be freed
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtMalloc
+ */
+ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr);
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc memory on host
+ *
+ * @par Restriction
+ * @li The requested memory cannot be used in the Device
+ * and needs to be explicitly copied to the Device.
+ * @li The memory requested by the aclrtMallocHost interface
+ * needs to be released through the aclrtFreeHost interface.
+ *
+ * @param  hostPtr [OUT] pointer to pointer to allocated memory on the host
+ * @param  size [IN]     alloc memory size
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtFreeHost
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief free host memory
+ *
+ * @par Function
+ *  can only free memory allocated through the aclrtMallocHost interface
+ *
+ * @param  hostPtr [IN]   free memory pointer
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtMallocHost
+ */
+ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);
+
+/**
+ * @ingroup AscendCL
+ * @brief synchronous memory replication between host and device
+ *
+ * @param dst [IN]       destination address pointer
+ * @param destMax [IN]   Max length of the destination address memory
+ * @param src [IN]       source address pointer
+ * @param count [IN]     the length of byte to copy
+ * @param kind [IN]      memcpy type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count,
+                                         aclrtMemcpyKind kind);
+
+/**
+ * @ingroup AscendCL
+ * @brief Initialize memory and set contents of memory to specified value
+ *
+ * @par Function
+ *  The memory to be initialized is on the Host or device side,
+ *  and the system determines whether
+ *  it is host or device according to the address
+ *
+ * @param devPtr [IN]    Starting address of memory
+ * @param maxCount [IN]  Max length of destination address memory
+ * @param value [IN]     Set value
+ * @param count [IN]     The length of memory
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count);
+
+/**
+ * @ingroup AscendCL
+ * @brief  Asynchronous memory replication between Host and Device
+ *
+ * @par Function
+ *  After calling this interface,
+ *  be sure to call the aclrtSynchronizeStream interface to ensure that
+ *  the task of memory replication has been completed
+ *
+ * @par Restriction
+ * @li For on-chip Device-to-Device memory copy,
+ *     both the source and destination addresses must be 64-byte aligned
+ *
+ * @param dst [IN]     destination address pointer
+ * @param destMax [IN] Max length of destination address memory
+ * @param src [IN]     source address pointer
+ * @param count [IN]   the number of byte to copy
+ * @param kind [IN]    memcpy type
+ * @param stream [IN]  asynchronized task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count,
+                                              aclrtMemcpyKind kind, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Asynchronous initialize memory
+ * and set contents of memory to specified value async
+ *
+ * @par Function
+ *  The memory to be initialized is on the Host or device side,
+ *  and the system determines whether
+ *  it is host or device according to the address
+ *
+ * @param devPtr [IN]      destination address pointer
+ * @param maxCount [IN]    Max length of destination address memory
+ * @param value [IN]       set value
+ * @param count [IN]       the number of byte to set
+ * @param stream [IN]      asynchronized task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count,
+                                              aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief  create stream instance
+ *
+ * @param  stream [OUT]   the created stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy stream instance
+ *
+ * @par Function
+ * Can only destroy streams created through the aclrtCreateStream interface
+ *
+ * @par Restriction
+ * Before calling the aclrtDestroyStream interface to destroy
+ * the specified Stream, you need to call the aclrtSynchronizeStream interface
+ * to ensure that the tasks in the Stream have been completed.
+ *
+ * @param stream [IN]  the stream to destroy
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateStream | aclrtSynchronizeStream
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief block the host until all tasks
+ * in the specified stream have completed
+ *
+ * @param  stream [IN]   the stream to wait
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Blocks the operation of the specified Stream until
+ * the specified Event is completed.
+ * Support for multiple streams waiting for the same event.
+ *
+ * @param  stream [IN]   the wait stream If using thedefault Stream, set NULL
+ * @param  event [IN]    the event to wait
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event);
+
+/**
+ * @ingroup AscendCL
+ * @brief set group
+ *
+ * @par Function
+ *  set the task to the corresponding group
+ *
+ * @param groupId [IN]   group id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | aclrtGetGroupInfoDetail
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetGroup(int32_t groupId);
+
+/**
+ * @ingroup AscendCL
+ * @brief get the number of group
+ *
+ * @par Function
+ *  get the number of group. if the number of group is zero,
+ *  it means that group is not supported or group is not created.
+ *
+ * @param count [OUT]   the number of group
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetGroupCount(uint32_t *count);
+
+/**
+ * @ingroup AscendCL
+ * @brief create group information
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ *
+ * @see aclrtDestroyGroupInfo
+ */
+ACL_FUNC_VISIBILITY aclrtGroupInfo *aclrtCreateGroupInfo();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy group information
+ *
+ * @param groupInfo [IN]   pointer to group information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtCreateGroupInfo
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDestroyGroupInfo(aclrtGroupInfo *groupInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief get all group information
+ *
+ * @param groupInfo [OUT]   pointer to group information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtGetGroupCount
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief get detail information of group
+ *
+ * @param groupInfo [IN]    pointer to group information
+ * @param groupIndex [IN]   group index value
+ * @param attr [IN]         group attribute
+ * @param attrValue [OUT]   pointer to attribute value
+ * @param valueLen [IN]     length of attribute value
+ * @param paramRetSize [OUT]   pointer to real length of attribute value
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtGetGroupCount | aclrtGetAllGroupInfo
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupIndex,
+                                                     aclrtGroupAttr attr, void *attrValue, size_t valueLen,
+                                                     size_t *paramRetSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief checking whether current device and peer device support the p2p feature
+ *
+ * @param canAccessPeer [OUT]   pointer to save the checking result
+ * @param deviceId [IN]         current device id
+ * @param peerDeviceId [IN]     peer device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtDeviceEnablePeerAccess | aclrtDeviceDisablePeerAccess
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDeviceCanAccessPeer(int32_t *canAccessPeer, int32_t deviceId, int32_t peerDeviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief enable the peer device to support the p2p feature
+ *
+ * @param peerDeviceId [IN]   the peer device id
+ * @param flags [IN]   reserved field, now it must be zero
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtDeviceCanAccessPeer | aclrtDeviceDisablePeerAccess
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDeviceEnablePeerAccess(int32_t peerDeviceId, uint32_t flags);
+
+/**
+ * @ingroup AscendCL
+ * @brief disable the peer device to support the p2p function
+ *
+ * @param peerDeviceId [IN]   the peer device id
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclrtDeviceCanAccessPeer | aclrtDeviceEnablePeerAccess
+ */
+ACL_FUNC_VISIBILITY aclError aclrtDeviceDisablePeerAccess(int32_t peerDeviceId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Obtain the free memory and total memory of specified attribute.
+ * the specified memory include normal memory and huge memory.
+ *
+ * @param attr [IN]    the memory attribute of specified device
+ * @param free [OUT]   the free memory of specified device
+ * @param total [OUT]  the total memory of specified device.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, size_t *total);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set the timeout interval for waitting of op
+ *
+ * @param timeout [IN]   op wait timeout
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_RT_H_
diff --git a/inc/external/acl/acl_tdt.h b/inc/external/acl/acl_tdt.h
new file mode 100644
index 00000000..c357518d
--- /dev/null
+++ b/inc/external/acl/acl_tdt.h
@@ -0,0 +1,276 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_ACL_TDT_H_
+#define INC_EXTERNAL_ACL_ACL_TDT_H_
+
+#include "acl/acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum acltdtTensorType {
+  ACL_TENSOR_DATA_UNDEFINED = -1,
+  ACL_TENSOR_DATA_TENSOR,
+  ACL_TENSOR_DATA_END_OF_SEQUENCE,
+  ACL_TENSOR_DATA_ABNORMAL
+};
+
+typedef struct acltdtDataItem acltdtDataItem;
+typedef struct acltdtDataset acltdtDataset;
+typedef struct acltdtChannelHandle acltdtChannelHandle;
+
+/**
+ * @ingroup AscendCL
+ * @brief Get tensor type from item
+ *
+ * @param dataItem [IN] pointer to the data item
+ *
+ * @retval Tensor type.
+ * @retval ACL_DT_UNDEFINED if dataItem is null
+ */
+ACL_FUNC_VISIBILITY acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data type from item
+ *
+ * @param dataItem [IN] pointer to the data item
+ *
+ * @retval Data type.
+ * @retval ACL_DT_UNDEFINED if dataItem is null
+ */
+ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data address from item
+ *
+ * @param dataItem [IN] pointer to data item
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data size from item
+ *
+ * @param dataItem [IN] pointer to data item
+ *
+ * @retval 0 for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dim's number from item
+ *
+ * @param dataItem [IN] pointer to data item
+ *
+ * @retval 0 for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dims from item
+ *
+ * @param  dataItem [IN]      the struct of data item
+ * @param  dims [IN|OUT]      pointer to the dims of dataTtem
+ * @param  dimNum [IN]        the size of the dims
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the struct of data item
+ *
+ * @param tdtType [IN]  Tdt tensor type
+ * @param dims [IN]     pointer of tdtDataItem's dims
+ * @param dimNum [IN]   Dim number
+ * @param dataType [IN] Data type
+ * @param data [IN]     Data pointer
+ * @param size [IN]     Data size
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyDataItem
+ */
+ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum,
+                                                         aclDataType dataType, void *data, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the struct of data item
+ *
+ * @param dataItem [IN]  pointer to the data item
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateDataItem
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyDataItem(acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the tdt dataset
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtDestroyDataset
+ */
+ACL_FUNC_VISIBILITY acltdtDataset *acltdtCreateDataset();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the tdt dataset
+ *
+ * @param dataset [IN]  pointer to the dataset
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateDataset
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyDataset(acltdtDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the data item
+ *
+ * @param dataset [IN] pointer to the dataset
+ * @param index [IN]   index of the dataset
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtAddDataItem
+ */
+ACL_FUNC_VISIBILITY acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the data item
+ *
+ * @param dataset [OUT] pointer to the dataset
+ * @param dataItem [IN] pointer to the data item
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtGetDataItem
+ */
+ACL_FUNC_VISIBILITY aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get the size of dataset
+ *
+ * @param dataset [IN]  pointer to the dataset
+ *
+ * @retval 0 for failed
+ * @retval OtherValues success
+ */
+ACL_FUNC_VISIBILITY size_t acltdtGetDatasetSize(const acltdtDataset *dataset);
+
+/**
+ * @ingroup AscendCL
+ * @brief Stop the channel
+ *
+ * @param handle [IN]  pointer to the channel handle
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateChannel | acltdtDestroyChannel
+ */
+ACL_FUNC_VISIBILITY aclError acltdtStopChannel(acltdtChannelHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create the channel
+ *
+ * @param deviceId [IN]  the device id
+ * @param name [IN]      the channel's name
+ *
+ * @retval null for failed
+ * @retval OtherValues success
+ *
+ * @see acltdtStopChannel | acltdtDestroyChannel
+ */
+ACL_FUNC_VISIBILITY acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy the channel
+ *
+ * @param handle [IN]  pointer to the channel handle
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtCreateChannel | acltdtStopChannel
+ */
+ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief Send tensor to device
+ *
+ * @param handle [IN]  pointer to the channel handle
+ * @param dataset [IN] pointer to the dataset
+ * @param timeout [IN] to be reserved, now it must be -1
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtReceiveTensor
+ */
+ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset,
+                                              int32_t timeout);
+
+/**
+ * @ingroup AscendCL
+ * @brief Receive tensor from device
+ *
+ * @param handle [IN]      pointer to the channel handle
+ * @param dataset [OUT]    pointer to the dataset
+ * @param timeout [IN]     to be reserved, now it must be -1
+ *
+ * @retval ACL_SUCCESS  The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acltdtSendTensor
+ */
+ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset,
+                                                 int32_t timeout);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_ACL_TDT_H_
diff --git a/inc/external/acl/error_codes/ge_error_codes.h b/inc/external/acl/error_codes/ge_error_codes.h
new file mode 100644
index 00000000..cafc5a64
--- /dev/null
+++ b/inc/external/acl/error_codes/ge_error_codes.h
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
+#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_
+
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000;
+static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007;
+static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012;
+static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013;
+static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014;
+static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015;
+static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
+static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
+static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
+static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
+static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020;
+static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021;
+static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022;
+static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
+static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
+static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;
+static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004;
+static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005;
+static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006;
+static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007;
+static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008;
+static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009;
+
+#ifdef __cplusplus
+}  // namespace ge
+#endif
+#endif  // INC_EXTERNAL_GE_GE_ERROR_CODES_H_
diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h
new file mode 100644
index 00000000..a1392cc6
--- /dev/null
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -0,0 +1,109 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
+#define __INC_EXTERNEL_RT_ERROR_CODES_H__
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const int32_t ACL_RT_SUCCESS = 0;  // success
+
+static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000;             // param invalid
+static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001;          // invalid device id
+static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002;              // current context null
+static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003;            // stream not in current context
+static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004;             // model not in current context
+static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005;              // stream not in model
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006;   // event timestamp invalid
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007;  // event timestamp reversal
+static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008;            // memory address unaligned
+static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009;                 // open file failed
+static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010;                // write file failed
+static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011;          // error subscribe stream
+static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012;          // error subscribe thread
+static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013;             // group not set
+static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014;          // group not create
+static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015;          // callback not register to stream
+static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016;       // invalid memory type
+static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017;            // invalid handle
+static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018;       // invalid malloc type
+static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019;              // wait timeout
+
+static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000;  // feature not support
+static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001;    // memory allocation error
+static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002;          // memory free error
+static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003;     // aicore over flow
+static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004;            // no device
+static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005;  // resource alloc fail
+static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006;        // no permission
+static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007;    // no event resource
+static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream resource
+static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
+static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
+static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011;      // no cdq resource
+
+static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;              // runtime internal error
+static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                    // ts internel error
+static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002;            // task full in stream
+static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003;           // task empty in stream
+static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004;         // stream not complete
+static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005;             // end of sequence
+static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006;          // event not complete
+static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007;       // context release error
+static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008;                 // soc version error
+static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009;       // task type not support
+static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010;              // ts lost heartbeat
+static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011;               // model execute failed
+static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012;              // report timeout
+static const int32_t ACL_ERROR_RT_SYS_DMA = 507013;                     // sys dma error
+static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014;              // aicore timeout
+static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015;            // aicore exception
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016;       // aicore trap exception
+static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017;               // aicpu timeout
+static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018;             // aicpu exception
+static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019;      // aicpu datadump response error
+static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020;         // aicpu model operate response error
+static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021;             // profiling error
+static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022;                   // ipc error
+static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023;          // model abort normal
+static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024;        // kernel unregistering
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025;         // ringbuffer not init
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026;          // ringbuffer no data
+static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027;               // kernel lookup error
+static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028;            // kernel register duplicate
+static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029;         // debug register failed
+static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030;       // debug unregister failed
+static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;               // label not in current context
+static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;             // program register num use out
+static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;             // device setup error
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034;         // vector core timeout
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035;       // vector core exception
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036;  // vector core trap exception
+static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037;          // cdq alloc batch abnormal
+
+static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
+static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error
+static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901;          // hdc disconnect
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // __INC_EXTERNEL_RT_ERROR_CODES_H__
diff --git a/inc/external/acl/ops/acl_cblas.h b/inc/external/acl/ops/acl_cblas.h
new file mode 100644
index 00000000..3d81eb2b
--- /dev/null
+++ b/inc/external/acl/ops/acl_cblas.h
@@ -0,0 +1,334 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
+#define INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
+
+#include "acl/acl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType;
+
+typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType;
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param alpha [IN]       pointer to scalar used for multiplication.
+ *                         of same type as dataTypeC
+ * @param a [IN]           pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param x [IN]           pointer to vector x
+ * @param incx [IN]        stride between consecutive elements of vector x
+ * @param dataTypeX [IN]   datatype of vector x
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         of same type as dataTypeC If beta == 0,
+ *                         then y does not have to be a valid input
+ * @param y [IN|OUT]       pointer to vector y
+ * @param incy [IN]        stride between consecutive elements of vector y
+ * @param dataTypeY [IN]   datatype of vector y
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda,
+                                           aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX,
+                                           const void *beta, void *y, int incy, aclDataType dataTypeY,
+                                           aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param dataTypeX [IN]   datatype of vector x
+ * @param dataTypeY [IN]   datatype of vector y
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA,
+                                                          aclDataType dataTypeX, aclDataType dataTypeY,
+                                                          aclComputeType type, aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param a [IN]           pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param x [IN]           pointer to vector x
+ * @param incx [IN]        stride between consecutive elements of vector x
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then y does not have to be a valid input
+ * @param y [IN|OUT]       pointer to vector y
+ * @param incy [IN]        stride between consecutive elements of vector y
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha,
+                                          const aclFloat16 *a, int lda, const aclFloat16 *x, int incx,
+                                          const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type,
+                                          aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type,
+                                                         aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param a [IN]           pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param x [IN]           pointer to vector x
+ * @param incx [IN]        stride between consecutive elements of vector x
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then y does not have to be a valid input
+ * @param y [IN|OUT]       pointer to vector y
+ * @param incy [IN]        stride between consecutive elements of vector y
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a,
+                                           int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y,
+                                           int incy, aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-vector multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param m [IN]           number of rows of matrix A
+ * @param n [IN]           number of columns of matrix A
+ * @param handle [OUT]     pointer to the pointer to the handle
+ * @param type [IN]        computation type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type,
+                                                          aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param alpha [IN]       pointer to scalar used for multiplication. of same type as dataTypeC
+ * @param matrixA [IN]     pointer to matrix A
+ * @param lda [IN]         leading dimension array used to store  matrix A
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param matrixB [IN]     pointer to matrix B
+ * @param ldb [IN]         leading dimension array used to store  matrix B
+ * @param dataTypeB [IN]   datatype of matrix B
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         of same type as dataTypeC If beta == 0,
+ *                         then matrixC does not have to be a valid input
+ * @param matrixC [IN|OUT] pointer to matrix C
+ * @param ldc [IN]         leading dimension array used to store  matrix C
+ * @param dataTypeC [IN]   datatype of matrix C
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
+                                           int k, const void *alpha, const void *matrixA, int lda,
+                                           aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB,
+                                           const void *beta, void *matrixC, int ldc, aclDataType dataTypeC,
+                                           aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param dataTypeA [IN]   datatype of matrix A
+ * @param dataTypeB [IN]   datatype of matrix B
+ * @param dataTypeC [IN]   datatype of matrix C
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ * @param type [IN]        computation type
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC,
+                                                          int m, int n, int k, aclDataType dataTypeA,
+                                                          aclDataType dataTypeB, aclDataType dataTypeC,
+                                                          aclComputeType type, aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param matrixA [IN]     pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param matrixB [IN]     pointer to matrix B
+ * @param ldb [IN]         leading dimension used to store the matrix B
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then matrixC does not have to be a valid input
+ * @param matrixC [IN|OUT] pointer to matrix C
+ * @param ldc [IN]         leading dimension used to store the matrix C
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
+                                          int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda,
+                                          const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta,
+                                          aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC,
+                                                         int m, int n, int k, aclComputeType type,
+                                                         aclopHandle **handle);
+
+/**
+ * @ingroup AscendCL
+ * @brief perform the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param alpha [IN]       pointer to scalar used for multiplication
+ * @param matrixA [IN]     pointer to matrix A
+ * @param lda [IN]         leading dimension used to store the matrix A
+ * @param matrixB [IN]     pointer to matrix B
+ * @param ldb [IN]         leading dimension used to store the matrix B
+ * @param beta [IN]        pointer to scalar used for multiplication.
+ *                         If beta value == 0,
+ *                         then matrixC does not have to be a valid input
+ * @param matrixC [IN|OUT] pointer to matrix C
+ * @param ldc [IN]         leading dimension used to store the matrix C
+ * @param type [IN]        computation type
+ * @param stream [IN]      stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n,
+                                           int k, const int32_t *alpha, const int8_t *matrixA, int lda,
+                                           const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC,
+                                           int ldc, aclComputeType type, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief create a handle for performing the matrix-matrix multiplication
+ *
+ * @param transA [IN]      transpose type of matrix A
+ * @param transB [IN]      transpose type of matrix B
+ * @param transC [IN]      transpose type of matrix C
+ * @param m [IN]           number of rows of matrix A and matrix C
+ * @param n [IN]           number of columns of matrix B and matrix C
+ * @param k [IN]           number of columns of matrix A and rows of matrix B
+ * @param type [IN]        computation type
+ * @param handle [OUT]     pointer to the pointer to the handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC,
+                                                          int m, int n, int k, aclComputeType type,
+                                                          aclopHandle **handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h
new file mode 100644
index 00000000..dcaa3936
--- /dev/null
+++ b/inc/external/acl/ops/acl_dvpp.h
@@ -0,0 +1,2568 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if !defined(ENABLE_DVPP_INTERFACE)
+#if defined(_MSC_VER)
+#error message("if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE).")
+#else
+#error "if you want to use dvpp funtions ,please use the macro definition (ENABLE_DVPP_INTERFACE)."
+#endif
+#endif
+
+#ifndef INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
+#define INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
+
+#include <stdint.h>
+#include <stddef.h>
+#include "acl/acl.h"
+#include "acl/acl_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct acldvppPicDesc acldvppPicDesc;
+typedef struct acldvppBatchPicDesc acldvppBatchPicDesc;
+typedef struct acldvppRoiConfig acldvppRoiConfig;
+typedef struct acldvppResizeConfig acldvppResizeConfig;
+typedef struct acldvppBorderConfig acldvppBorderConfig;
+typedef struct acldvppLutMap acldvppLutMap;
+typedef struct acldvppChannelDesc acldvppChannelDesc;
+typedef struct acldvppJpegeConfig acldvppJpegeConfig;
+typedef struct aclvdecChannelDesc aclvdecChannelDesc;
+typedef struct acldvppStreamDesc acldvppStreamDesc;
+typedef struct aclvdecFrameConfig aclvdecFrameConfig;
+typedef struct aclvencChannelDesc aclvencChannelDesc;
+typedef struct aclvencFrameConfig aclvencFrameConfig;
+typedef struct acldvppHist acldvppHist;
+typedef void (*aclvdecCallback)(acldvppStreamDesc *input, acldvppPicDesc *output, void *userData);
+typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output, void *userdata);
+
+// Supported Pixel Format
+enum acldvppPixelFormat {
+  PIXEL_FORMAT_YUV_400 = 0,                      // 0
+  PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1,           // 1
+  PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2,           // 2
+  PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3,           // 3
+  PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4,           // 4
+  PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5,           // 5
+  PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6,           // 6
+  PIXEL_FORMAT_YUYV_PACKED_422 = 7,              // 7
+  PIXEL_FORMAT_UYVY_PACKED_422 = 8,              // 8
+  PIXEL_FORMAT_YVYU_PACKED_422 = 9,              // 9
+  PIXEL_FORMAT_VYUY_PACKED_422 = 10,             // 10
+  PIXEL_FORMAT_YUV_PACKED_444 = 11,              // 11
+  PIXEL_FORMAT_RGB_888 = 12,                     // 12
+  PIXEL_FORMAT_BGR_888 = 13,                     // 13
+  PIXEL_FORMAT_ARGB_8888 = 14,                   // 14
+  PIXEL_FORMAT_ABGR_8888 = 15,                   // 15
+  PIXEL_FORMAT_RGBA_8888 = 16,                   // 16
+  PIXEL_FORMAT_BGRA_8888 = 17,                   // 17
+  PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18,  // 18
+  PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19,  // 19
+  PIXEL_FORMAT_YVU_PLANAR_420 = 20,              // 20
+  PIXEL_FORMAT_YVU_PLANAR_422,
+  PIXEL_FORMAT_YVU_PLANAR_444,
+  PIXEL_FORMAT_RGB_444 = 23,
+  PIXEL_FORMAT_BGR_444,
+  PIXEL_FORMAT_ARGB_4444,
+  PIXEL_FORMAT_ABGR_4444,
+  PIXEL_FORMAT_RGBA_4444,
+  PIXEL_FORMAT_BGRA_4444,
+  PIXEL_FORMAT_RGB_555,
+  PIXEL_FORMAT_BGR_555,
+  PIXEL_FORMAT_RGB_565,
+  PIXEL_FORMAT_BGR_565,
+  PIXEL_FORMAT_ARGB_1555,
+  PIXEL_FORMAT_ABGR_1555,
+  PIXEL_FORMAT_RGBA_1555,
+  PIXEL_FORMAT_BGRA_1555,
+  PIXEL_FORMAT_ARGB_8565,
+  PIXEL_FORMAT_ABGR_8565,
+  PIXEL_FORMAT_RGBA_8565,
+  PIXEL_FORMAT_BGRA_8565,
+  PIXEL_FORMAT_RGB_BAYER_8BPP = 50,
+  PIXEL_FORMAT_RGB_BAYER_10BPP,
+  PIXEL_FORMAT_RGB_BAYER_12BPP,
+  PIXEL_FORMAT_RGB_BAYER_14BPP,
+  PIXEL_FORMAT_RGB_BAYER_16BPP,
+  PIXEL_FORMAT_BGR_888_PLANAR = 70,
+  PIXEL_FORMAT_HSV_888_PACKAGE,
+  PIXEL_FORMAT_HSV_888_PLANAR,
+  PIXEL_FORMAT_LAB_888_PACKAGE,
+  PIXEL_FORMAT_LAB_888_PLANAR,
+  PIXEL_FORMAT_S8C1,
+  PIXEL_FORMAT_S8C2_PACKAGE,
+  PIXEL_FORMAT_S8C2_PLANAR,
+  PIXEL_FORMAT_S16C1,
+  PIXEL_FORMAT_U8C1,
+  PIXEL_FORMAT_U16C1,
+  PIXEL_FORMAT_S32C1,
+  PIXEL_FORMAT_U32C1,
+  PIXEL_FORMAT_U64C1,
+  PIXEL_FORMAT_S64C1,
+  PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000,
+  PIXEL_FORMAT_YVU_SEMIPLANAR_440,
+  PIXEL_FORMAT_FLOAT32,
+  PIXEL_FORMAT_BUTT,
+  PIXEL_FORMAT_UNKNOWN = 10000
+};
+
+// Stream Format
+enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };
+
+// Supported Channel Mode
+enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };
+
+// Supported Border Type
+enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };
+
+// Venc parameter type
+enum aclvencChannelDescParamType {
+  ACL_VENC_THREAD_ID_UINT64 = 0,
+  ACL_VENC_CALLBACK_PTR,
+  ACL_VENC_PIXEL_FORMAT_UINT32,
+  ACL_VENC_ENCODE_TYPE_UINT32,
+  ACL_VENC_PIC_WIDTH_UINT32,
+  ACL_VENC_PIC_HEIGHT_UINT32,
+  ACL_VENC_KEY_FRAME_INTERVAL_UINT32,
+  ACL_VENC_BUF_ADDR_PTR,
+  ACL_VENC_BUF_SIZE_UINT32,
+  ACL_VENC_RC_MODE_UINT32,
+  ACL_VENC_SRC_RATE_UINT32,
+  ACL_VENC_MAX_BITRATE_UINT32,
+  ACL_VENC_MAX_IP_PROP_UINT32
+};
+
+// Jpeg picture format
+enum acldvppJpegFormat {
+  ACL_JPEG_CSS_444 = 0,
+  ACL_JPEG_CSS_422,
+  ACL_JPEG_CSS_420,
+  ACL_JPEG_CSS_GRAY,
+  ACL_JPEG_CSS_440,
+  ACL_JPEG_CSS_411,
+  ACL_JPEG_CSS_UNKNOWN = 1000
+};
+
+/**
+ * @ingroup AscendCL
+ * @brief alloc device memory for dvpp.
+ *
+ * @par Function
+ * @li It's mainly used for allocating memory to device media data processing.
+ * The requested memory meets the data processing requirements.
+ * After calling this interface to request memory,
+ * you must release the memory using the acldvppFree interface.
+ * @li When calling the acldvppMalloc interface to apply for memory,
+ * the size entered by the user is aligned upwards to 32 integer multiples,
+ * and an additional 32 bytes are applied.
+ *
+ * @par Restriction
+ * If the user uses the acldvppMalloc interface to apply for a large block of
+ * memory and divide and manage the memory by himself,
+ * when applying for memory, the user needs to align up to 32 integer
+ * times + 32 bytes (ALIGN_UP [len] +32 words) according to
+ * the actual data size of each picture Section) to manage memory.
+ *
+ * @param devPtr [OUT]    memory pointer.
+ * @param size [IN]       memory size.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppFree
+ */
+ACL_FUNC_VISIBILITY aclError acldvppMalloc(void **devPtr, size_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief free device memory for dvpp.
+ *
+ * @par Function
+ * Free the memory requested through the acldvppMalloc interface
+ * @param devPtr [IN]      memory pointer to free.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppMalloc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppFree(void *devPtr);
+
+/**
+ * @ingroup AscendCL
+ * @brief create DvppChannelDesc.
+ *
+ * @par Function
+ * Create a channel for image data processing.
+ * The same channel can be reused
+ * and is no longer available after destruction
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppChannelDesc *acldvppCreateChannelDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy dvppChannelDesc.
+ *
+ * @par Function
+ * Can only destroy channels created by the acldvppCreateChannel interface
+ * @param channelDesc [IN]     the channel description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannelDesc | acldvppDestroyChannel
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyChannelDesc(acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp channel Id.
+ *
+ * @par Restriction
+ * Interface calling sequence:
+ * acldvppCreateChannelDesc --> acldvppCreateChannel -->
+ * acldvppGetChannelDescChannelId
+ *
+ * @param channelDesc [IN]     the channel description.
+ *
+ * @retval channel id.
+ *
+ * @see acldvppCreateChannelDesc | acldvppCreateChannel
+ */
+ACL_FUNC_VISIBILITY uint64_t acldvppGetChannelDescChannelId(const acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp picture description.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppCreatePicDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp picture description.
+ *
+ * @par Function
+ * Can only destroy picture description information created
+ * through acldvppCreatePicDesc interface.
+ * @param picDesc [IN]     dvpp picture description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyPicDesc(acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's data.
+ *
+ * @param picDesc [OUT]   dvpp picture description.
+ * @param dataDev [IN]    dvpp picture dataDev.Must be the memory
+ *                        requested using the acldvppMalloc interface.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppMalloc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescData(acldvppPicDesc *picDesc, void *dataDev);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's size.
+ *
+ * @param picDesc [OUT]      dvpp picture description.
+ * @param size dvpp [IN]     picture size.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescSize(acldvppPicDesc *picDesc, uint32_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's format.
+ *
+ * @param picDesc [OUT]    dvpp picture description.
+ * @param format [IN]      dvpp picture format.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescFormat(acldvppPicDesc *picDesc, acldvppPixelFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's width.
+ *
+ * @param picDesc [OUT]   dvpp picture description.
+ * @param width [IN]      dvpp picture width.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidth(acldvppPicDesc *picDesc, uint32_t width);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's height.
+ *
+ * @param picDesc [OUT]  dvpp picture description.
+ * @param height [IN]    dvpp picture height.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeight(acldvppPicDesc *picDesc, uint32_t height);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's widthStride.
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ *
+ * @param picDesc [OUT]      dvpp picture description.
+ * @param widthStride [IN]   dvpp picture widthStride.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescWidthStride(acldvppPicDesc *picDesc, uint32_t widthStride);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's heightStride.
+ *
+ * @par Restriction
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param picDesc [OUT]        dvpp picture description.
+ * @param heightStride [IN]    dvpp picture heightStride.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescHeightStride(acldvppPicDesc *picDesc, uint32_t heightStride);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp picture description's retcode.
+ *
+ * @param picDesc [OUT]    dvpp picture description.
+ * @param retCode [IN]     dvpp picture retcode.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetPicDescRetCode(acldvppPicDesc *picDesc, uint32_t retCode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get picture data.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval picture data addr.
+ * @retval default nullptr.
+ */
+ACL_FUNC_VISIBILITY void *acldvppGetPicDescData(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get picture data size.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval picture data size.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescSize(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's format.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval format
+ * @retval default PIXEL_FORMAT_YUV_400.
+ */
+ACL_FUNC_VISIBILITY acldvppPixelFormat acldvppGetPicDescFormat(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's width.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval width.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidth(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's height.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval height.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeight(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's widthStride.
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval stride width.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescWidthStride(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's heightStride.
+ *
+ * @par Restriction
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval stride height.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescHeightStride(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture desc's retcode.
+ *
+ * @param picDesc [IN]    dvpp picture description.
+ *
+ * @retval ret code.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp roi config.
+ *
+ * @param left [IN]    the left offset, must be even
+ * @param right [IN]   the right offset, must be odd
+ * @param top [IN]     the top offset, must be even
+ * @param bottom [IN]  the bottom offset, must be odd
+ *
+ * @retval null for failed.
+ * @retval other success
+ */
+ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top,
+                                                             uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp roi config.
+ *
+ * @par Function
+ * Destroys data created through the acldvppCreateRoiConfig interface
+ * @param roiConfig [IN]    dvpp roi config.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyRoiConfig(acldvppRoiConfig *roiConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set left of RoiConfig.
+ *
+ * @param config [OUT]  RoiConfig
+ * @param left [IN]     left offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigLeft(acldvppRoiConfig *config, uint32_t left);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set right of RoiConfig.
+ *
+ * @param config [OUT]  RoiConfig
+ * @param right [IN]    right offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigRight(acldvppRoiConfig *config, uint32_t right);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set top of RoiConfig.
+ *
+ * @param config [OUT]  RoiConfig
+ * @param top [IN]      top offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigTop(acldvppRoiConfig *config, uint32_t top);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set bottom of RoiConfig.
+ *
+ * @param config [OUT]   RoiConfig
+ * @param bottom [IN]    bottom offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set RoiConfig.
+ *
+ * @param config [OUT]    RoiConfig
+ * @param left [IN]       left offset
+ * @param right [IN]      right offset
+ * @param top [IN]        top offset
+ * @param bottom [IN]     bottom offset
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top,
+                                                 uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp resize config.
+ * The specified scaling algorithm is not supported.
+ * The default scaling algorithm is "nearest neighbor interpolation".
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY acldvppResizeConfig *acldvppCreateResizeConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp resize config.
+ *
+ * @par Function
+ * Destroys the scaling configuration data created by
+ * the acldvppCreateResizeConfig interface
+ *
+ * @param resizeConfig [IN]    resize config.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyResizeConfig(acldvppResizeConfig *resizeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create jpege config.
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY acldvppJpegeConfig *acldvppCreateJpegeConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy jpege config.
+ *
+ * @par Function
+ * Destroys the encoding configuration data created by
+ * the acldvppCreateJpegeConfig interface
+ * @param jpegeConfig [IN] config pointer to destroy.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateJpegeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyJpegeConfig(acldvppJpegeConfig *jpegeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set jpege config's level.
+ *
+ * @param jpegeConfig [OUT]    Call the acldvppCreateJpegeConfig
+ *                             interface to create acldvppJpegeConfig data
+ * @param level [IN]   Encoding quality range [0, 100],
+ *                     where level 0 encoding quality is similar to level 100,
+ *                     and the smaller the value in [1, 100],
+ *                     the worse the quality of the output picture.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetJpegeConfigLevel(acldvppJpegeConfig *jpegeConfig, uint32_t level);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get jpege config's level.
+ *
+ * @param jpegeConfig [IN]    jpege config.
+ *
+ * @retval compression level.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetJpegeConfigLevel(const acldvppJpegeConfig *jpegeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief create vdecChannelDesc.Channel description information
+ * when creating a video data processing channel.
+ *
+ * @retval null for failed.
+ * @retval other success
+ */
+ACL_FUNC_VISIBILITY aclvdecChannelDesc *aclvdecCreateChannelDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy vdecChannelDesc.
+ *
+ * @par Function
+ * Can only destroy aclvdecChannelDesc type created
+ * through aclvdecCreateChannelDesc interface
+ * @param channelDesc [IN]    channel description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+
+ * @see aclvdecCreateChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannelDesc(aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's channel id.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param channelId [IN]     decoding channel id: 0~15.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescChannelId(aclvdecChannelDesc *channelDesc, uint32_t channelId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's thread id.
+ *
+ * @param channelDesc [OUT]    vdec channel description.
+ * @param threadId [IN]        thread id.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescThreadId(aclvdecChannelDesc *channelDesc, uint64_t threadId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's callback function.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param callback [IN]      function callback.Function prototype:
+ * void (* aclvdecCallback)
+ * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata)
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCallback
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescCallback(aclvdecChannelDesc *channelDesc, aclvdecCallback callback);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's video encoding type.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param enType [IN]        video encoding type.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescEnType(aclvdecChannelDesc *channelDesc, acldvppStreamFormat enType);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's out picture format.
+ *
+ * @param channelDesc [OUT]     vdec channel description.
+ * @param outPicFormat [IN]     out picture format (acldvppPixelFormat).
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicFormat(aclvdecChannelDesc *channelDesc,
+                                                               acldvppPixelFormat outPicFormat);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's out picture width.
+ *
+ * @param channelDesc [OUT]    vdec channel description.
+ * @param outPicWidth [IN]     out picture width.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicWidth(aclvdecChannelDesc *channelDesc, uint32_t outPicWidth);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's out picture height.
+ *
+ * @param channelDesc [OUT]     vdec channel description.
+ * @param outPicHeight [IN]     out picture height.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutPicHeight(aclvdecChannelDesc *channelDesc, uint32_t outPicHeight);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's reference frame num.
+ *
+ * @param channelDesc [OUT]    vdec channel description.
+ * @param refFrameNum [IN]     reference frame num.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescRefFrameNum(aclvdecChannelDesc *channelDesc, uint32_t refFrameNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel description's bit depth.
+ *
+ * @param channelDesc [OUT]  vdec channel description.
+ * @param bitDepth [IN]      bit depth.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescBitDepth(aclvdecChannelDesc *channelDesc, uint32_t bitDepth);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's channel id.
+ *
+ * @param channelDesc [IN]     vdec channel description.
+ *
+ * @retval decoding channel id: 0~15.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescChannelId(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's thread id.
+ *
+ * @param channelDesc [IN]     vdec channel description.
+ *
+ * @retval thread id.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint64_t aclvdecGetChannelDescThreadId(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's callback function.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval function callback.Function prototype:
+ * void (* aclvdecCallback)
+ * (acldvppStreamDesc * input, acldvppPicDesc * output, void* userdata)
+ * @retval default null.
+ *
+ * @see aclvdecCallback
+ */
+ACL_FUNC_VISIBILITY aclvdecCallback aclvdecGetChannelDescCallback(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's video encoding type.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval video encoding type.
+ * @retval default H265_MAIN_LEVEL.
+ */
+ACL_FUNC_VISIBILITY acldvppStreamFormat aclvdecGetChannelDescEnType(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's out picture format.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval out picture format.
+ * @retval default DVPP_OUTPUT_YUV420SP_UV.
+ */
+ACL_FUNC_VISIBILITY acldvppPixelFormat aclvdecGetChannelDescOutPicFormat(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's out picture width.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval out picture width.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicWidth(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's out picture height.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval out picture height (for vdec malloc memory).
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutPicHeight(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's bit depth.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval bit depth.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescBitDepth(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel description's reference frame num.
+ *
+ * @param channelDesc [IN]    vdec channel description.
+ *
+ * @retval reference frame num.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescRefFrameNum(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief create vencChannelDesc.
+ *
+ * @retval null for failed, other success
+ */
+ACL_FUNC_VISIBILITY aclvencChannelDesc *aclvencCreateChannelDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief destroy vencChannelDesc.
+ *
+ * @param channelDesc [IN] channel desc.
+ *
+ * @retval ACL_SUCCESS:success, other:failed
+ */
+ACL_FUNC_VISIBILITY aclError aclvencDestroyChannelDesc(aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set decoding thread id for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param threadId [IN] thread id
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescThreadId(aclvencChannelDesc *channelDesc, uint64_t threadId);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set func callback for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param callback [IN]     func callback
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescCallback(aclvencChannelDesc *channelDesc, aclvencCallback callback);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set video encoding type for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param enType [IN]       video encoding type
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescEnType(aclvencChannelDesc *channelDesc, acldvppStreamFormat enType);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set pic format for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param picFormat [IN]    pic format
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicFormat(aclvencChannelDesc *channelDesc,
+                                                            acldvppPixelFormat picFormat);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set out pic width for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param picWidth [IN]     pic width
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicWidth(aclvencChannelDesc *channelDesc, uint32_t picWidth);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set pic height for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param picHeight [IN]    pic height
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescPicHeight(aclvencChannelDesc *channelDesc, uint32_t picHeight);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set key frame interval for venc channel desc.
+ *
+ * @param channelDesc [OUT]     venc channel desc
+ * @param keyFrameInterval [IN] Interval of key frame
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescKeyFrameInterval(aclvencChannelDesc *channelDesc,
+                                                                   uint32_t keyFrameInterval);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set output buffer address for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param bufAddr [IN]      output buffer address
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufAddr(aclvencChannelDesc *channelDesc, void *bufAddr);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set output buffer size for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param bufSize [IN]      output buffer size
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescBufSize(aclvencChannelDesc *channelDesc, uint32_t bufSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set rc model for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param rcMode [IN]       venc rc mode(VBR=1, CBR=2)
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescRcMode(aclvencChannelDesc *channelDesc, uint32_t rcMode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set source rate for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param srcRate [IN] source rate
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescSrcRate(aclvencChannelDesc *channelDesc, uint32_t srcRate);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set max bit rate for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param maxBitRate [IN]   max bit rate
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc *channelDesc, uint32_t maxBitRate);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set venc parameter for venc channel desc.
+ *
+ * @param channelDesc [OUT] venc channel desc
+ * @param paramType [IN]    parameter type
+ * @param length [IN]       parameter length
+ * @param param [IN]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc,
+                                                        aclvencChannelDescParamType paramType, size_t length,
+                                                        const void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get output buffer address for venc channel desc.
+ *
+ * @param channelDesc[IN] venc channel desc
+ *
+ * @retval output buffer address
+ */
+ACL_FUNC_VISIBILITY void *aclvencGetChannelDescBufAddr(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get output buffer size for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval output buffer size
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescBufSize(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get decoding channel id for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval decoding channel id: 0~15, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescChannelId(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get decoding thread id for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval thread id, default 0
+ */
+ACL_FUNC_VISIBILITY uint64_t aclvencGetChannelDescThreadId(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get func callback for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval func callback, default null
+ */
+ACL_FUNC_VISIBILITY aclvencCallback aclvencGetChannelDescCallback(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get video encoding type for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval video encoding type, default H265_MAIN_LEVEL
+ */
+ACL_FUNC_VISIBILITY acldvppStreamFormat aclvencGetChannelDescEnType(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get pic format for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval pic format
+ */
+ACL_FUNC_VISIBILITY acldvppPixelFormat aclvencGetChannelDescPicFormat(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get pic width for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval pic width, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicWidth(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get pic height for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval pic height, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescPicHeight(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get interval of key frame for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval interval of key frame, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescKeyFrameInterval(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get rc mode for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval rc mode, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescRcMode(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get source rate for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval source rate, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescSrcRate(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get max bit rate for venc channel desc.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval max bit rate, default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ *
+ * @brief Get venc parameter for venc channel desc.
+ *
+ * @param channelDesc [IN]   venc channel desc
+ * @param paramType [IN]     parameter type
+ * @param length [IN]        parameter length
+ * @param paramRetSize [OUT] pointer to parameter real length
+ * @param param [OUT]        pointer to parameter value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc,
+                                                        aclvencChannelDescParamType paramType, size_t length,
+                                                        size_t *paramRetSize, void *param);
+
+/**
+ * @ingroup AscendCL
+ * @brief get forced restart of I-frame interval from config
+ *
+ * @param config [IN] venc frame config
+ *
+ * @retval 0: Not forced; 1: Forced restart of I-frame -1: error
+ */
+ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigForceIFrame(const aclvencFrameConfig *config);
+
+/**
+ * @ingroup AscendCL
+ * @brief get forced restart of I-frame interval from config
+ *
+ * @param config [IN] venc frame config
+ *
+ * @retval Whether it is the end frame: 0: no; 1: end frame
+ */
+ACL_FUNC_VISIBILITY uint8_t aclvencGetFrameConfigEos(const aclvencFrameConfig *config);
+
+/**
+ * @ingroup AscendCL
+ * @brief set single frame encoding configuration parameters
+ *
+ * @param config [OUT]    venc frame config
+ * @param forceFrame [IN] forced restart of I-frame interval: 0: Not forced; 1: Forced restart of I-frame
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigForceIFrame(aclvencFrameConfig *config, uint8_t forceIFrame);
+
+/**
+ * @ingroup AscendCL
+ * @brief set single frame encoding configuration parameters
+ *
+ * @param config [OUT] venc frame config
+ * @param eos [IN]     Whether it is the end frame: 0: no; 1: end frame
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSetFrameConfigEos(aclvencFrameConfig *config, uint8_t eos);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp venc destroy frame config
+ *
+ * @param config [IN] venc frame config
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencDestroyFrameConfig(aclvencFrameConfig *config);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp venc frame config.
+ *
+ * @retval null for failed, other aclvencFrameConfig ptr
+ */
+ACL_FUNC_VISIBILITY aclvencFrameConfig *aclvencCreateFrameConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp venc channel.
+ *
+ * @param channelDesc [IN|OUT] venc channel desc
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencCreateChannel(aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp venc channel.
+ *
+ * @param channelDesc [IN] venc channel desc
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencDestroyChannel(aclvencChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp venc launch send frame task.
+ *
+ * @param channelDesc [IN] venc channel desc
+ * @param input [IN]       input picture desc
+ * @param reserve [IN]     reserve parameter
+ * @param config [IN]      dvpp frame config
+ * @param userdata [IN]    user callback function
+ *
+ * @retval ACL_SUCCESS for ok, others for fail
+ */
+ACL_FUNC_VISIBILITY aclError aclvencSendFrame(aclvencChannelDesc *channelDesc, acldvppPicDesc *input, void *reserve,
+                                              aclvencFrameConfig *config, void *userdata);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp stream description.
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY acldvppStreamDesc *acldvppCreateStreamDesc();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp stream description.
+ *
+ * @par Function
+ * Can only destroy acldvppStreamDesc type created through
+ * acldvppCreateStreamDesc interface.
+ *
+ * @param streamDesc [IN]     dvpp stream description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateStreamDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyStreamDesc(acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's data addr.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param dataDev [IN]        data addr.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescData(acldvppStreamDesc *streamDesc, void *dataDev);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's data size.
+ *
+ * @param streamDesc [OUT]     dvpp stream description.
+ * @param size [IN]            data size.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescSize(acldvppStreamDesc *streamDesc, uint32_t size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's format.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param format [IN]         stream format.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescFormat(acldvppStreamDesc *streamDesc, acldvppStreamFormat format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's timestamp.
+ *
+ * @param streamDesc [OUT]  dvpp stream description.
+ * @param timestamp [IN]    current timestamp.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescTimestamp(acldvppStreamDesc *streamDesc, uint64_t timestamp);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's ret code.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param retCode [IN]        result code.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescRetCode(acldvppStreamDesc *streamDesc, uint32_t retCode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set stream description's eos.
+ *
+ * @param streamDesc [OUT]    dvpp stream description.
+ * @param eos [IN]            end flag of sequence.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetStreamDescEos(acldvppStreamDesc *streamDesc, uint8_t eos);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's data addr.
+ *
+ * @param streamDesc [IN]     dvpp stream description.
+ *
+ * @retval data addr.
+ * @retval deault nullptr.
+ */
+ACL_FUNC_VISIBILITY void *acldvppGetStreamDescData(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's data size.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval data size.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescSize(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's format.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval stream format.
+ * @retval default ACL_DVPP_STREAM_H264.
+ */
+ACL_FUNC_VISIBILITY acldvppStreamFormat acldvppGetStreamDescFormat(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's timestamp.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval current timestamp.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint64_t acldvppGetStreamDescTimestamp(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's retCode.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval result code.
+ * @retval default 0.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetStreamDescRetCode(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get stream description's eos.
+ *
+ * @param streamDesc [IN]    dvpp stream description.
+ *
+ * @retval end flag of sequence.
+ * @retval default 0(false).
+ */
+ACL_FUNC_VISIBILITY uint8_t acldvppGetStreamDescEos(const acldvppStreamDesc *streamDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create vdec frame config.
+ *
+ * @retval null for failed.
+ * @retval other success.
+ */
+ACL_FUNC_VISIBILITY aclvdecFrameConfig *aclvdecCreateFrameConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy vdec frame config.
+ *
+ * @par Function
+ * Can only destroy aclvdecFrameConfig type created through
+ *  aclvdecCreateFrameConfig interface
+ *
+ * @param vdecFrameConfig [IN]     vdec frame config.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateFrameConfig
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecFrameConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get image width and height of jpeg.
+ *
+ * @param data [IN]          image data in host memory
+ * @param size [IN]          the size of image data
+ * @param width [OUT]        the width of image from image header
+ * @param height [OUT]       the height of image from image header
+ * @param components [OUT]   the components of image from image header
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height,
+                                                     int32_t *components);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get image width and height of jpeg.
+ *
+ * @param data [IN]          image data in host memory
+ * @param size [IN]          the size of image data
+ * @param width [OUT]        the width of image from image header
+ * @param height [OUT]       the height of image from image header
+ * @param components [OUT]   the components of image from image header
+ * @param format [OUT]       the format of image from image header
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width,
+                                                       uint32_t *height, int32_t *components,
+                                                       acldvppJpegFormat *format);
+
+/**
+ * @ingroup AscendCL
+ * @brief Predict encode size of jpeg image.
+ *
+ * @param inputDesc [IN]     dvpp image desc
+ * @param config [IN]        jpeg encode config
+ * @param size [OUT]         the size predicted of image
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc,
+                                                       const acldvppJpegeConfig *config, uint32_t *size);
+
+/**
+ * @ingroup AscendCL
+ * @brief Predict decode size of jpeg image.
+ *
+ * @param data [IN]                 origin image data in host memory
+ * @param dataSize [IN]             the size of origin image data
+ * @param outputPixelFormat [IN]    the pixel format jpeg decode
+ * @param decSize [OUT]             the size predicted for decode image
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize,
+                                                       acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get image width and height of png.
+ *
+ * @param data [IN]          image data in host memory
+ * @param size [IN]          the size of image data
+ * @param width [OUT]        the width of image from image header
+ * @param height [OUT]       the height of image from image header
+ * @param components [OUT]   the components of image from image header
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width,
+                                                    uint32_t *height, int32_t *components);
+
+/**
+ * @ingroup AscendCL
+ * @brief Predict decode size of png image.
+ *
+ * @param data [IN]                 origin image data in host memory
+ * @param dataSize [IN]             the size of origin image data
+ * @param outputPixelFormat [IN]    the pixel format jpeg decode
+ * @param decSize [OUT]             the size predicted for decode image
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize,
+                                                      acldvppPixelFormat outputPixelFormat, uint32_t *decSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp channel, the same channel can be reused
+ * and is no longer available after destruction.
+ *
+ * @param channelDesc [IN|OUT]    the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppCreateChannel(acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp channel.
+ *
+ * @par Restriction
+ * Can only destroy channel created through the acldvppCreateChannel interface
+ *
+ * @param channelDesc [IN]   the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc resize.
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param channelDesc [IN]  the channel destruction
+ * @param inputDesc [IN]    resize input picture destruction
+ * @param outputDesc [IN|OUT]  resize output picture destruction
+ * @param resizeConfig [IN] resize config
+ * @param stream [IN]       resize task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ * | acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                   acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig,
+                                                   aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc crop.
+ *
+ * @par Function
+ * crop the input picture according to the specified area,
+ * and then store the picture in the output memory as the output picture
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param channelDesc [IN]      the channel destruction
+ * @param inputDesc [IN]        crop input picture destruction
+ * @param outputDesc [IN|OUT]   crop output picture destruction
+ * @param cropArea [IN]         crop area config
+ * @param stream [IN]           crop task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                 acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
+                                                 aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc crop and resize config.
+ *
+ * @par Function
+ * crop the input picture with resize config according to the specified area,
+ * and then store the picture in the output memory as the output picture
+ *
+ * @par Restriction
+ * Width alignment requirements:
+ * @li The minimum stride is 32 and the maximum is 4096 * 4
+ * (that is, an image in argb format with a width of 4096);
+ * @li For 8K scaling, widthStride is required to be aligned to 2;
+ * @li For non 8K scaling, the calculation formula for widthStride
+ * is different for different image formats:
+ *   @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16
+ *   @li yuv422packed: input image width * 2 and then align to 16
+ *   @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16
+ *   @li xrgb8888: input image width * 4, align to 16
+ *   @li HFBC:input image width
+ * Height alignment requirements:
+ * @li The height of the input image is aligned to 2.
+ * High stride minimum 6 and maximum 4096.
+ *
+ * @param channelDesc [IN]     the channel destruction
+ * @param inputDesc [IN]       crop input picture destruction
+ * @param outputDesc [IN|OUT]  crop output picture destruction
+ * @param cropArea [IN]        crop area config
+ * @param resizeConfig [IN]    resize config
+ * @param stream [IN]          crop and resize config task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                       acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
+                                                       acldvppResizeConfig *resizeConfig, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop.
+ *
+ * @par Function
+ * crop the input batch picture according to the specified area
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]         the channel destruction
+ * @param srcBatchPicDescs [IN]    crop input batch picture destruction
+ * @param roiNums [IN]    roi config numbers
+ * @param size [IN]       roiNum size
+ * @param dstBatchPicDescs [IN|OUT]    crop output batch picture destruction
+ * @param cropAreas [IN]    crop area configs
+ * @param stream [IN]       crop batch task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc,
+                                                      acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
+                                                      uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
+                                                      acldvppRoiConfig *cropAreas[], aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop and resize config.
+ *
+ * @par Function
+ * crop the input batch picture with resize config according to the specified area
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]             the channel destruction
+ * @param srcBatchPicDescs [IN]        crop input batch picture destruction
+ * @param roiNums [IN]                 roi config numbers
+ * @param size [IN]                    roiNum size
+ * @param dstBatchPicDescs [IN|OUT]    crop output batch picture destruction
+ * @param cropAreas [IN]               crop area configs
+ * @param resizeConfig [IN]            resize config
+ * @param stream [IN]                  crop batch and resize config task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc,
+                                                            acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
+                                                            uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
+                                                            acldvppRoiConfig *cropAreas[],
+                                                            acldvppResizeConfig *resizeConfig, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc crop and paste.
+ *
+ * @par Function
+ * crop the input picture according to the specified area,
+ * and paste the picture to the specified position of the target picture
+ * as the output picture
+ *
+ * @param channelDesc [IN]   thechannel destruction
+ * @param inputDesc [IN]     crop and paste input picture destruction
+ * @param outputDesc [IN|OUT]   crop and paste output picture destruction
+ * @param cropArea [IN]      crop area config
+ * @param pasteArea [IN]     paste area config
+ * @param stream [IN]        crop and paste task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                         acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
+                                                         acldvppRoiConfig *pasteArea, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc crop, resize config and paste.
+ *
+ * @par Function
+ * crop the input picture with resize config according to the specified area,
+ * and paste the picture to the specified position of the target picture
+ * as the output picture
+ *
+ * @param channelDesc [IN]       thechannel destruction
+ * @param inputDesc [IN]         crop and paste input picture destruction
+ * @param outputDesc [IN|OUT]    crop and paste output picture destruction
+ * @param cropArea [IN]          crop area config
+ * @param pasteArea [IN]         paste area config
+ * @param resizeConfig [IN]      resize config
+ * @param stream [IN]            crop, paste and resize task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                            acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea,
+                                                            acldvppRoiConfig *pasteArea,
+                                                            acldvppResizeConfig *resizeConfig, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop and paste.
+ *
+ * @par Function
+ * crop the input batch picture according to the specified area,
+ * and paste the pictures to the specified position of the target pictures
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]       the channel destruction
+ * @param srcBatchPicDescs [IN]  crop input batch picture destruction
+ * @param roiNums [IN]     roi config numbers
+ * @param size [IN]        roiNum size
+ * @param dstBatchPicDescs [IN|OUT]    crop output batch picture destruction
+ * @param cropAreas [IN]   crop area configs
+ * @param pasteAreas [IN]  paste area configs
+ * @param stream [IN]      crop batch task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc,
+                                                              acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums,
+                                                              uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs,
+                                                              acldvppRoiConfig *cropAreas[],
+                                                              acldvppRoiConfig *pasteAreas[], aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop, resize config and paste.
+ *
+ * @par Function
+ * crop the input batch picture with resize config according to the specified area,
+ * and paste the pictures to the specified position of the target pictures
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]             the channel destruction
+ * @param srcBatchPicDescs [IN]        crop input batch picture destruction
+ * @param roiNums [IN]                 roi config numbers
+ * @param size [IN]                    roiNum size
+ * @param dstBatchPicDescs [IN|OUT]    crop output batch picture destruction
+ * @param cropAreas [IN]               crop area configs
+ * @param pasteAreas [IN]              paste area configs
+ * @param resizeConfig [IN]            resize config
+ * @param stream [IN]                  crop batch and resize config task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(
+  acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
+  acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[],
+  acldvppResizeConfig *resizeConfig, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc jpeg decode.
+ *
+ * @par Function
+ * For different source picture formats, after decoding,
+ * output pictures in the following format:
+ * @li jpeg(444) -> YUV444SP:V is front U is back,
+ * YUV420 SP V is front U is back, YUV420SP U is front V is back;
+ * @li jpeg(422) -> YUV422SP:V is in front U is behind,
+ * YUV420SP V is in front U is behind, YUV420SP U is in front V is behind;
+ * @li jpeg(420) -> YUV420SP:
+ * V is front U is back, YUV420SP U is front V is back;
+ * @li jpeg(400) -> YUV420SP:UV data is filled with 0 x 80.
+ *
+ * @param channelDesc [IN]  the channel destruction
+ * @param data [IN]         decode input picture destruction's data
+ * @param size [IN]         decode input picture destruction's size
+ * @param outputDesc [IN|OUT]  decode output picture destruction
+ * @param stream [IN]       decode task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
+                                                    acldvppPicDesc *outputDesc, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc jpeg encode.
+ *
+ * @param channelDesc [IN]  the channel destruction
+ * @param inputDesc [IN]    encode input picture destruction
+ * @param data [OUT]        encode output picture destruction's data
+ * @param size [IN|OUT]     encode output picture destruction's size
+ * @param config [IN]       jpeg encode config
+ * @param stream [IN]       encode task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateJpegeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                    const void *data, uint32_t *size, acldvppJpegeConfig *config,
+                                                    aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc png decode.
+ *
+ * @param channelDesc [IN]    the channel destruction
+ * @param data [IN]           decode input picture destruction's data
+ * @param size [IN]           decode input picture destruction's size
+ * @param outputDesc [IN|OUT]    decode output picture destruction
+ * @param stream [IN]         decode task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size,
+                                                   acldvppPicDesc *outputDesc, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create vdec channel.
+ *
+ * @par Function
+ * Create a channel for video data processing,
+ * the same channel can be reused,
+ * and is no longer available after destruction
+ *
+ * @param channelDesc [IN|OUT]    the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannelDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecCreateChannel(aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy vdec channel.
+ *
+ * @par Function
+ * Can only destroy channels created by the aclvdecCreateChannel interface
+ *
+ * @param channelDesc [IN]    the channel destruction
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannel
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vdec send frame.
+ *
+ * @par Function
+ * Pass the input memory to be decoded
+ * and the decoded output memory to the decoder for decoding
+ *
+ * @param channelDesc [IN] vdec channel destruction
+ * @param input [IN]       input stream destruction
+ * @param output [IN|OUT]  output picture destruction
+ * @param config [IN]      vdec frame config
+ * @param userData [IN]    user data for callback function
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
+                                              acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vdec send skipped frame.
+ *
+ * @par Function
+ * Pass video frame to decoder
+ *
+ * @param channelDesc [IN] vdec channel destruction
+ * @param input [IN]       input stream destruction
+ * @param config [IN]      vdec frame config
+ * @param userData [IN]    user data for callback function
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input,
+                                                     aclvdecFrameConfig *config, void *userData);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc convert color.
+ *
+ * @par Restriction
+ * @li outputDesc:Width height stride, No changes are allowed. Just configure 0
+ * @par Function
+ * Convert color gamut
+ *
+ * @param channelDesc [IN] the channel destruction
+ * @param inputDesc [IN]   convert color input picture destruction
+ * @param outputDesc [IN|OUT] convert color output picture destruction
+ * @param stream [IN]      convert color task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                         acldvppPicDesc *outputDesc, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc pyramid down.
+ *
+ * @par Restriction
+ * @li outputDesc:format only supported YUV400
+ * @par Function
+ * Image pyramid down
+ *
+ * @param channelDesc [IN] the channel destruction
+ * @param inputDesc [IN]   pyr down input picture destruction
+ * @param outputDesc [IN|OUT] pyr down output picture destruction
+ * @param reserve [IN]     reserved param , must be nullptr
+ * @param stream [IN]      pyr down task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc,
+                                                    acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set dvpp channel mode.
+ *
+ * @param channelDesc [OUT] the channel destruction
+ * @param mode [IN]         channel mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set resize config interpolation.
+ *
+ * @param resizeConfig [OUT] the resize config
+ * @param interpolation [IN] interpolation
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetResizeConfigInterpolation(acldvppResizeConfig *resizeConfig,
+                                                                 uint32_t interpolation);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get resize config interpolation.
+ *
+ * @param resizeConfig [IN] the resize config
+ *
+ * @retval Interpolation of resize config.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppResizeConfig *resizeConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vdec channel out mode.
+ *
+ * @param channelDesc [OUT] the channel destruction
+ * @param outMode [IN] channel out mode
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get vdec channel out mode.
+ *
+ * @param channelDesc [IN] the channel destruction
+ *
+ * @retval Out mode of channel destruction
+ * @retval default 0
+ */
+ACL_FUNC_VISIBILITY uint32_t aclvdecGetChannelDescOutMode(const aclvdecChannelDesc *channelDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp batch picture description.
+ *
+ * @param batchSize [IN]    batch size
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppBatchPicDesc *acldvppCreateBatchPicDesc(uint32_t batchSize);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp picture description.
+ *
+ * @param batchPicDesc [IN] dvpp batch picture description.
+ * @param index [IN]        index of batch
+ *
+ * @retval null for failed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateBatchPicDesc
+ */
+ACL_FUNC_VISIBILITY acldvppPicDesc *acldvppGetPicDesc(acldvppBatchPicDesc *batchPicDesc, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy dvpp batch picture description.
+ *
+ * @par Function
+ * Can only destroy batch picture description information created
+ * through acldvppCreateBatchPicDesc interface.
+ *
+ * @param batchPicDesc [IN]     dvpp batch picture description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateBatchPicDesc
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyBatchPicDesc(acldvppBatchPicDesc *batchPicDesc);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp lut map.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppLutMap *acldvppCreateLutMap();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy lut map.
+ *
+ * @param lutMap [IN]    lut map
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyLutMap(acldvppLutMap *lutMap);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get lut map dims.
+ *
+ * @param lutMap [IN]    lut map
+ *
+ * @retval 0 for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get lut map data.
+ *
+ * @param lutMap [IN]   lut map
+ * @param dim [IN]      input dim of map
+ * @param data [OUT]    the dim of lut map's data
+ * @param len [OUT]     the dim of lut map's length
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data,
+                                                  uint32_t *len);
+/**
+ * @ingroup AscendCL
+ * @brief Vpc equalize hist.
+ *
+ * @param channelDesc [IN] channel desc
+ * @param inputDesc [IN]   input desc
+ * @param outputDesc [IN|OUT] output desc
+ * @param lutMap [IN]      lut map param
+ * @param stream [IN]      runtime stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc,
+                                                         const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
+                                                         const acldvppLutMap *lutMap, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create dvpp border config.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig();
+
+/**
+ * @ingroup AscendCL
+ * @brief Set value of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param index [IN]         index of value array
+ * @param value [IN]         value
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index,
+                                                         double value);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set border type of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param borderType [IN]    border type
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBorderType(acldvppBorderConfig *borderConfig,
+                                                              acldvppBorderType borderType);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set top of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param top [IN]           top of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigTop(acldvppBorderConfig *borderConfig, uint32_t top);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set bottom of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param bottom [IN]        bottom of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigBottom(acldvppBorderConfig *borderConfig, uint32_t bottom);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set left of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param left [IN]          left of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigLeft(acldvppBorderConfig *borderConfig, uint32_t left);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set right of border config.
+ *
+ * @param borderConfig [OUT] border config
+ * @param right [IN]         right of border
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigRight(acldvppBorderConfig *borderConfig, uint32_t right);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get value of border config.
+ *
+ * @param borderConfig [IN] border config
+ * @param index[IN] index of value array
+ *
+ * @retval invalid value is < 0, normal Value is >= 0
+ */
+ACL_FUNC_VISIBILITY double acldvppGetBorderConfigValue(const acldvppBorderConfig *borderConfig, uint32_t index);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get border type of border config.
+ *
+ * @param borderConfig [IN] border config
+ * @retval border type of border config
+ */
+ACL_FUNC_VISIBILITY acldvppBorderType acldvppGetBorderConfigBorderType(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get right of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, top value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigTop(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get Bottom of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, top value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigBottom(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get left of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, top value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigLeft(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get right of border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval default 0, right value of border config
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetBorderConfigRight(const acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy border config.
+ *
+ * @param borderConfig [IN] border config
+ *
+ * @retval ACL_SUCCESS for success, other for failure
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *borderConfig);
+
+/**
+ * @ingroup AscendCL
+ * @brief Vpc make border.
+ *
+ * @param channelDesc [IN]  channel desc
+ * @param inputDesc [IN]    input desc
+ * @param outputDesc [IN|OUT]  output desc
+ * @param borderConfig [IN] border config param
+ * @param stream [IN]       runtime stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc,
+                                                       const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc,
+                                                       const acldvppBorderConfig *borderConfig, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Dvpp vpc calc hist.
+ *
+ * @param channelDesc [IN] the channel destruction
+ * @param srcPicDesc [IN]  pyr down input picture destruction
+ * @param hist [IN|OUT]    pyr down output picture destruction
+ * @param reserve [IN]     reserved param, must be nullptr
+ * @param stream [IN]      task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc,
+                                                     acldvppHist *hist, void *reserve, aclrtStream stream);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create vpc hist description.
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist();
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy vpc hist description.
+ *
+ * @par Function
+ * Can only destroy hist description information created
+ * through acldvppCreateHist interface.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateHist
+ */
+ACL_FUNC_VISIBILITY aclError acldvppDestroyHist(acldvppHist *hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dims of vpc hist description.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval dims of vpc hist description.
+ *
+ * @see acldvppCreateHist | acldvppVpcCalcHistAsync
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetHistDims(acldvppHist *hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get data from vpc hist description by dim.
+ *
+ * @param hist [IN]  vpc hist description.
+ * @param dim [IN]   which dim to get data.
+ * @param data [OUT] address of output hist data.
+ * @param len [OUT]  len of output hist data.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateHist | acldvppVpcCalcHistAsync
+ */
+ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, uint32_t **data, uint16_t *len);
+
+/**
+ * @ingroup AscendCL
+ * @brief Get dvpp calc hist process return code.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval Dvpp calc hist process return code.
+ *
+ * @see acldvppCreateHist | acldvppVpcCalcHistAsync
+ */
+ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief Set vpc hist description to 0.
+ *
+ * @par Function
+ * Can only clear hist description information created
+ * through acldvppCreateHist interface.
+ *
+ * @param hist [IN] vpc hist description.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateHist
+ */
+ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist);
+
+/**
+ * @ingroup AscendCL
+ * @brief dvpp vpc batch crop, resize config and make border.
+ *
+ * @par Function
+ * crop the input batch picture with resize config and border configs according to the specified area
+ * as the output batch pictures
+ *
+ * @param channelDesc [IN]              the channel destruction
+ * @param srcBatchPicDescs [IN]         crop input batch picture destruction
+ * @param roiNums [IN]                  roi config numbers
+ * @param size [IN]                     roiNum size
+ * @param dstBatchPicDescs [IN|OUT]     crop output batch picture destruction
+ * @param cropAreas [IN]                crop area configs
+ * @param borderCfgs [IN]               border configs
+ * @param resizeConfig [IN]             resize config
+ * @param stream [IN]                   crop batch, resize config and make border task stream
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig
+ */
+ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(
+  acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size,
+  acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[],
+  acldvppResizeConfig *resizeConfig, aclrtStream stream);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_
diff --git a/inc/external/acl/ops/acl_fv.h b/inc/external/acl/ops/acl_fv.h
new file mode 100644
index 00000000..4bd392c9
--- /dev/null
+++ b/inc/external/acl/ops/acl_fv.h
@@ -0,0 +1,348 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
+#define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
+
+#include "acl/acl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct aclfvInitPara aclfvInitPara;
+typedef struct aclfvFeatureInfo aclfvFeatureInfo;
+typedef struct aclfvRepoRange aclfvRepoRange;
+typedef struct aclfvQueryTable aclfvQueryTable;
+typedef struct aclfvSearchInput aclfvSearchInput;
+typedef struct aclfvSearchResult aclfvSearchResult;
+
+// search operation type
+enum aclfvSearchType {
+  SEARCH_1_N,  // 1:N operation type
+  SEARCH_N_M   // N:M operation type
+};
+
+/**
+ * @ingroup AscendCL
+ * @brief Create fv init param.
+ *
+ * @param fsNum [IN]  The feature num
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY aclfvInitPara *aclfvCreateInitPara(uint64_t fsNum);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy fv init param.
+ *
+ * @par Function
+ * Can only destroy fv init param information created
+ * through aclfvCreateInitPara interface.
+ *
+ * @param initPara [IN]   fv init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateInitPara
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyInitPara(aclfvInitPara *initPara);
+
+/**
+ * @ingroup AscendCL
+ * @brief set value for maxTopNumFor1N which in fv init param.
+ *
+ * @param initPara [IN|OUT]     fv init param.
+ * @param maxTopNumFor1N [IN]   maxTopNumFor1N value for init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclfvSet1NTopNum(aclfvInitPara *initPara, uint32_t maxTopNumFor1N);
+
+/**
+ * @ingroup AscendCL
+ * @brief set value for maxTopNumForNM which in fv init param.
+ *
+ * @param initPara [IN|OUT]        fv init param.
+ * @param maxTopNumForNM [IN]   maxTopNumForNM value for init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t maxTopNumForNM);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create fv feature info.
+ *
+ * @param id0 [IN]     The first level library id0
+ * @param id1 [IN]     Secondary library id1
+ * @param offset [IN]  The offset of the first feature in the library
+ * @param featureLen [IN]       Single feature length
+ * @param featureCount [IN]     Single feature count
+ * @param featureData [IN]      Feature value list
+ * @param featureDataLen [IN]   Feature value list length
+ *
+ * @retval null for failed.
+ * @retval OtherValues success.
+ */
+ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
+                                                             uint32_t featureLen, uint32_t featureCount,
+                                                             uint8_t *featureData, uint32_t featureDataLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy fv feature info.
+ *
+ * @par Function
+ * Can only destroy fv feature info information created
+ * through aclfvCreateFeatureInfo interface.
+ *
+ * @param featureInfo [IN]     fv feature info.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateFeatureInfo
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyFeatureInfo(aclfvFeatureInfo *featureInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create fv repo range.
+ *
+ * @param id0Min [IN]  id0 start value
+ * @param id0Min [IN]  id0 max
+ * @param id1Min [IN]  id0 start value
+ * @param id1Max [IN]  id1 max
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvRepoRange *aclfvCreateRepoRange(uint32_t id0Min, uint32_t id0Max, uint32_t id1Min,
+                                                         uint32_t id1Max);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy fv repo range.
+ *
+ * @par Function
+ * Can only destroy fv repo range information created
+ * through aclfvCreateRepoRange interface.
+ *
+ * @param repoRange [IN]     fv repo range.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateRepoRange
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyRepoRange(aclfvRepoRange *repoRange);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create query table.
+ *
+ * @param queryCnt [IN]   Number of tables, the maximum number is 6
+ * @param tableLen [IN]   Single table length, table length is 32KB
+ * @param tableData [IN]  Feature value list
+ * @param tableDataLen [IN]   The length of memory requested by the featureData pointer
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvQueryTable *aclfvCreateQueryTable(uint32_t queryCnt, uint32_t tableLen, uint8_t *tableData,
+                                                           uint32_t tableDataLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy query table.
+ *
+ * @par Function
+ * Can only destroy query table information created
+ * through aclfvCreateQueryTable interface.
+ *
+ * @param queryTable [IN]     query table.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateQueryTable
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroyQueryTable(aclfvQueryTable *queryTable);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create search input.
+ *
+ * @param queryTable [IN]  query table
+ * @param repoRange [IN]   query repo range
+ * @param topk [IN]  query topk
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvSearchInput *aclfvCreateSearchInput(aclfvQueryTable *queryTable, aclfvRepoRange *repoRange,
+                                                             uint32_t topk);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy search input.
+ *
+ * @par Function
+ * Can only destroy search input information created
+ * through aclfvCreateSearchInput interface.
+ *
+ * @param searchInput [IN]     search input.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateSearchInput
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInput);
+
+/**
+ * @ingroup AscendCL
+ * @brief Create search result.
+ *
+ * @param queryCnt [IN]   Retrieve the number of features
+ * @param resultNum [IN]  The number of search results for each feature, the number is queryCnt
+ * @param resultNumDataLen [IN]  resultNum memory length
+ * @param id0 [IN]  Level 1 library id0
+ * @param id1 [IN]  Secondary library id1
+ * @param resultOffset [IN]   The offset of the bottom library corresponding
+ * to each feature retrieval result, total length topK * queryCnt
+ * @param resultDistance [IN]  Distance, total length topK * queryCnt
+ * @param dataLen [IN]  The memory size requested by
+ * id0\id1\reslutOffset\resultDistance
+ *
+ * @retval null for failed. OtherValues success
+ */
+ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
+                                                               uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1,
+                                                               uint32_t *resultOffset, float *resultDistance,
+                                                               uint32_t dataLen);
+
+/**
+ * @ingroup AscendCL
+ * @brief Destroy search result.
+ *
+ * @par Function
+ * Can only destroy search result information created
+ * through aclfvCreateSearchResult interface.
+ *
+ * @param searchResult [IN]     search result.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ *
+ * @see aclfvCreateSearchResult
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDestroySearchResult(aclfvSearchResult *searchResult);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv IP initialize.
+ *
+ * @param initPara [IN]     fv init param.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvInit(aclfvInitPara *initPara);
+
+/**
+ * @ingroup AscendCL
+ * @brief release fv resources.
+ *
+ * @par Function
+ * Can only release fv resources created
+ * through aclfvInit interface.
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ *
+ * @see aclfvInit
+ */
+ACL_FUNC_VISIBILITY aclError aclfvRelease();
+
+/**
+ * @ingroup AscendCL
+ * @brief fv repo add.
+ *
+ * @param type [IN]          repo add type
+ * @param featureInfo [IN]   add feature information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv repo del.
+ *
+ * @param type [IN]       repo delete type
+ * @param repoRange [IN]  repo range information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv accurate del.
+ *
+ * @param featureInfo [IN]   accurate delete feature information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv accurate modify.
+ *
+ * @param featureInfo [IN]  accurate modify feature information
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo);
+
+/**
+ * @ingroup AscendCL
+ * @brief fv search.
+ *
+ * @param type [IN]  search type
+ * @param searchInput [IN]    search input
+ * @param searchRst [OUT]     search result
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure.
+ */
+ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput *searchInput,
+                                         aclfvSearchResult *searchRst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h
new file mode 100644
index 00000000..8261adc4
--- /dev/null
+++ b/inc/external/hccl/hccl.h
@@ -0,0 +1,159 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hccl.h
+ * @brief HCCL API
+ */
+
+#ifndef HCCL_H_
+#define HCCL_H_
+
+#include <hccl/hccl_types.h>
+#include <acl/acl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+/**
+ * @brief Initialize HCCL.
+ *
+ * @param clusterInfo A string identifying the cluster info file path, include file name.
+ * @param rank A integer identifying the identify for the rank.
+ * @param comm A pointer identifying the initialized communication resource.
+ * @return HcclResult
+ * @see HcclCommDestroy()
+ */
+extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm);
+
+/**
+ * @brief Get hccl root info.
+ *
+ * @param rootInfo A pointer identifying the hccl root info.
+ * @return HcclResult
+ */
+extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo);
+
+/**
+ * @brief Initialize HCCL with root info.
+ *
+ * @param nRanks A integer identifying the rank size of the cluster.
+ * @param rootInfo A struct identifying the hccl root info.
+ * @param rank A integer identifying the identify for the rank.
+ * @param comm A pointer identifying the initialized communication resource.
+ * @return HcclResult
+ * @see HcclCommDestroy()
+ */
+extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm);
+
+/**
+ * @brief AllReduce operator.
+ *
+ * @param sendBuf A pointer identifying the input data address of the operator.
+ * @param recvBuf A pointer identifying the output data address of the operator.
+ * @param count An integer(u64) identifying the number of the output data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16,
+ * float32.
+ * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
+                                HcclComm comm, aclrtStream stream);
+
+/**
+ * @brief Broadcast operator.
+ *
+ * @param buf A pointer identifying the data address of the operator.
+ * @param count An integer(u64) identifying the number of the data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param root An integer(u32) identifying the the root rank in the operator.
+ * @param comm A pointer identifying the communication resource based on
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm,
+                                aclrtStream stream);
+
+/**
+ * @brief ReduceScatter operator.
+ *
+ * @param sendBuf A pointer identifying the input data address of the operator.
+ * @param recvBuf A pointer identifying the output data address of the operator.
+ * @param recvCount An integer(u64) identifying the number of the output data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType,
+                                    HcclReduceOp op, HcclComm comm, aclrtStream stream);
+
+/**
+ * @brief AllGather operator.
+ *
+ * @param sendBuf A pointer identifying the input data address of the operator.
+ * @param recvBuf A pointer identifying the output data address of the operator.
+ * @param sendCount An integer(u64) identifying the number of the input data.
+ * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm,
+                                aclrtStream stream);
+/**
+ * @brief Get the rank size of this comm.
+ *
+ * @param comm A pointer identifying the communication resource based on.
+ * @param rankSize  A pointer identifying the rank size.
+ * @return HcclResult
+ */
+extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize);
+
+/**
+ * @brief Get the rank id of this comm.
+ *
+ * @param comm A pointer identifying the communication resource based on.
+ * @param rankSize  A pointer identifying the rank id.
+ * @return HcclResult
+ */
+extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank);
+/**
+ * @brief Barrier operator.
+ *
+ * @param comm A pointer identifying the communication resource based on.
+ * @param stream A pointer identifying the stream information.
+ * @return HcclResult
+ */
+extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream);
+
+/**
+ * @brief Destroy HCCL comm
+ *
+ * @param comm A pointer identifying the communication resource targetting
+ * @return HcclResult
+ * @see HcclCommInitClusterInfo()
+ */
+extern HcclResult HcclCommDestroy(HcclComm comm);
+
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+#endif  // HCCL_H_
diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h
new file mode 100644
index 00000000..0e832396
--- /dev/null
+++ b/inc/external/hccl/hccl_types.h
@@ -0,0 +1,101 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hccl_types.h
+ * @brief HCCL data type definition
+ *
+ */
+
+#ifndef HCCL_TYPES_H_
+#define HCCL_TYPES_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+/**
+ * @brief HCCL functions return value definition
+ */
+typedef enum {
+  HCCL_SUCCESS = 0,              /**< success */
+  HCCL_E_PARA = 1,               /**< parameter error */
+  HCCL_E_PTR = 2,                /**< empty pointer */
+  HCCL_E_MEMORY = 3,             /**< memory error */
+  HCCL_E_INTERNAL = 4,           /**< internal error */
+  HCCL_E_NOT_SUPPORT = 5,        /**< not support feature */
+  HCCL_E_NOT_FOUND = 6,          /**< not found specific resource */
+  HCCL_E_UNAVAIL = 7,            /**< resource unavailable */
+  HCCL_E_SYSCALL = 8,            /**< call system interface error */
+  HCCL_E_TIMEOUT = 9,            /**< timeout */
+  HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
+  HCCL_E_TCP_CONNECT = 11,       /**< tcp connect fail */
+  HCCL_E_ROCE_CONNECT = 12,      /**< roce connect fail */
+  HCCL_E_TCP_TRANSFER = 13,      /**< tcp transfer fail */
+  HCCL_E_ROCE_TRANSFER = 14,     /**< roce transfer fail */
+  HCCL_E_RUNTIME = 15,           /**< call runtime api fail */
+  HCCL_E_DRV = 16,               /**< call driver api fail */
+  HCCL_E_PROFILING = 17,         /**< call profiling api fail */
+  HCCL_E_CCE = 18,               /**< call cce api fail */
+  HCCL_E_NETWORK = 19,           /**< call network api fail */
+  HCCL_E_RESERVED                /**< reserved */
+} HcclResult;
+
+/**
+ * @brief handle to HCCL communicator
+ */
+typedef void *HcclComm;
+
+/**
+ * @brief HCCL Reduction opperation
+ */
+typedef enum {
+  HCCL_REDUCE_SUM = 0,  /**< sum */
+  HCCL_REDUCE_PROD = 1, /**< prod */
+  HCCL_REDUCE_MAX = 2,  /**< max */
+  HCCL_REDUCE_MIN = 3,  /**< min */
+  HCCL_REDUCE_RESERVED  /**< reserved */
+} HcclReduceOp;
+
+/**
+ * @brief HCCL data type
+ */
+typedef enum {
+  HCCL_DATA_TYPE_INT8 = 0,   /**< int8 */
+  HCCL_DATA_TYPE_INT16 = 1,  /**< int16 */
+  HCCL_DATA_TYPE_INT32 = 2,  /**< int32 */
+  HCCL_DATA_TYPE_FP16 = 3,   /**< fp16 */
+  HCCL_DATA_TYPE_FP32 = 4,   /**< fp32 */
+  HCCL_DATA_TYPE_INT64 = 5,  /**< int64 */
+  HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */
+  HCCL_DATA_TYPE_RESERVED    /**< reserved */
+} HcclDataType;
+
+const uint32_t HCCL_ROOT_INFO_BYTES = 4108;  // 4108: root info length
+
+/**
+ * @brief HCCL root info
+ */
+typedef struct HcclRootInfoDef {
+  char internal[HCCL_ROOT_INFO_BYTES];
+} HcclRootInfo;
+
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+#endif  // HCCL_TYPES_H_
diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h
new file mode 100644
index 00000000..a1392cc6
--- /dev/null
+++ b/inc/external/runtime/rt_error_codes.h
@@ -0,0 +1,109 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
+#define __INC_EXTERNEL_RT_ERROR_CODES_H__
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const int32_t ACL_RT_SUCCESS = 0;  // success
+
+static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000;             // param invalid
+static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001;          // invalid device id
+static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002;              // current context null
+static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003;            // stream not in current context
+static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004;             // model not in current context
+static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005;              // stream not in model
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006;   // event timestamp invalid
+static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007;  // event timestamp reversal
+static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008;            // memory address unaligned
+static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009;                 // open file failed
+static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010;                // write file failed
+static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011;          // error subscribe stream
+static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012;          // error subscribe thread
+static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013;             // group not set
+static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014;          // group not create
+static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015;          // callback not register to stream
+static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016;       // invalid memory type
+static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017;            // invalid handle
+static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018;       // invalid malloc type
+static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019;              // wait timeout
+
+static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000;  // feature not support
+static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001;    // memory allocation error
+static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002;          // memory free error
+static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003;     // aicore over flow
+static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004;            // no device
+static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005;  // resource alloc fail
+static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006;        // no permission
+static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007;    // no event resource
+static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008;   // no stream resource
+static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009;   // no notify resource
+static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010;    // no model resource
+static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011;      // no cdq resource
+
+static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;              // runtime internal error
+static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                    // ts internel error
+static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002;            // task full in stream
+static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003;           // task empty in stream
+static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004;         // stream not complete
+static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005;             // end of sequence
+static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006;          // event not complete
+static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007;       // context release error
+static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008;                 // soc version error
+static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009;       // task type not support
+static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010;              // ts lost heartbeat
+static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011;               // model execute failed
+static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012;              // report timeout
+static const int32_t ACL_ERROR_RT_SYS_DMA = 507013;                     // sys dma error
+static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014;              // aicore timeout
+static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015;            // aicore exception
+static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016;       // aicore trap exception
+static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017;               // aicpu timeout
+static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018;             // aicpu exception
+static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019;      // aicpu datadump response error
+static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020;         // aicpu model operate response error
+static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021;             // profiling error
+static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022;                   // ipc error
+static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023;          // model abort normal
+static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024;        // kernel unregistering
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025;         // ringbuffer not init
+static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026;          // ringbuffer no data
+static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027;               // kernel lookup error
+static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028;            // kernel register duplicate
+static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029;         // debug register failed
+static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030;       // debug unregister failed
+static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;               // label not in current context
+static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;             // program register num use out
+static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;             // device setup error
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034;         // vector core timeout
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035;       // vector core exception
+static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036;  // vector core trap exception
+static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037;          // cdq alloc batch abnormal
+
+static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
+static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error
+static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901;          // hdc disconnect
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // __INC_EXTERNEL_RT_ERROR_CODES_H__
diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h
index f59c6454..4530bff7 100644
--- a/inc/framework/ge_runtime/task_info.h
+++ b/inc/framework/ge_runtime/task_info.h
@@ -271,13 +271,14 @@ class FusionEndTaskInfo : public TaskInfo {
 class HcclTaskInfo : public TaskInfo {
  public:
   HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr,
-               void *output_data_addr, int64_t workspace_size, int64_t hccl_stream_num,
+               void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num,
                const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id,
                int64_t op_type, int64_t data_type, const std::string &group, bool dump_flag)
       : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag),
         hccl_type_(hccl_type),
         input_data_addr_(input_data_addr),
         output_data_addr_(output_data_addr),
+        workspace_addr_(workspace_addr),
         workspace_size_(workspace_size),
         hccl_stream_num_(hccl_stream_num),
         private_def_(private_def),
@@ -292,6 +293,7 @@ class HcclTaskInfo : public TaskInfo {
   const std::string &hccl_type() const { return hccl_type_; }
   void *input_data_addr() const { return input_data_addr_; }
   void *output_data_addr() const { return output_data_addr_; }
+  void *workspace_addr() const { return workspace_addr_; }
   int64_t workspace_size() const { return workspace_size_; }
   int64_t hccl_stream_num() const { return hccl_stream_num_; }
   const std::vector<uint8_t> &private_def() const { return private_def_; }
@@ -306,6 +308,7 @@ class HcclTaskInfo : public TaskInfo {
   std::string hccl_type_;
   void *input_data_addr_;
   void *output_data_addr_;
+  void *workspace_addr_;
   int64_t workspace_size_;
   int64_t hccl_stream_num_;
   std::vector<uint8_t> private_def_;
diff --git a/metadef b/metadef
index a725349b..21178899 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit a725349b65aef2940555af2ddb7b9461fbe0d5fd
+Subproject commit 211788997dcc9aa63527541a44d511388c06bce5
diff --git a/scripts/format_source_code.sh b/scripts/format_source_code.sh
new file mode 100755
index 00000000..1fd0b4f6
--- /dev/null
+++ b/scripts/format_source_code.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+# Copyright 2019-2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+set -e
+
+CLANG_FORMAT=$(which clang-format) || (echo "Please install 'clang-format' tool first"; exit 1)
+
+version=$("${CLANG_FORMAT}" --version | sed -n "s/.*\ \([0-9]*\)\.[0-9]*\.[0-9]*.*/\1/p")
+if [[ "${version}" -lt "8" ]]; then
+  echo "clang-format's version must be at least 8.0.0"
+  exit 1
+fi
+
+CURRENT_PATH=$(pwd)
+SCRIPTS_PATH=$(dirname "$0")
+
+echo "CURRENT_PATH=${CURRENT_PATH}"
+echo "SCRIPTS_PATH=${SCRIPTS_PATH}"
+
+# print usage message
+function usage()
+{
+  echo "Format the specified source files to conform the code style."
+  echo "Usage:"
+  echo "bash $0 [-a] [-c] [-l] [-h]"
+  echo "e.g. $0 -c"
+  echo ""
+  echo "Options:"
+  echo "    -a format of all files"
+  echo "    -c format of the files changed compared to last commit, default case"
+  echo "    -l format of the files changed in last commit"
+  echo "    -h Print usage"
+}
+
+# check and set options
+function checkopts()
+{
+  # init variable
+  mode="changed"    # default format changed files
+
+  # Process the options
+  while getopts 'aclh' opt
+  do
+    case "${opt}" in
+      a)
+        mode="all"
+        ;;
+      c)
+        mode="changed"
+        ;;
+      l)
+        mode="lastcommit"
+        ;;
+      h)
+        usage
+        exit 0
+        ;;
+      *)
+        echo "Unknown option ${opt}!"
+        usage
+        exit 1
+    esac
+  done
+}
+
+# init variable
+# check options
+checkopts "$@"
+
+# switch to project root path, which contains clang-format config file '.clang-format'
+cd "${SCRIPTS_PATH}/.." || exit 1
+
+FMT_FILE_LIST='__format_files_list__'
+
+if [[ "X${mode}" == "Xall" ]]; then
+  find src -type f -name "*" | grep "\.h$\|\.cc$" > "${FMT_FILE_LIST}" || true
+  find inc -type f -name "*" | grep "\.h$\|\.cc$" >> "${FMT_FILE_LIST}" || true
+elif [[ "X${mode}" == "Xchanged" ]]; then
+  # --diff-filter=ACMRTUXB will ignore deleted files in commit
+  git diff --diff-filter=ACMRTUXB --name-only | grep "^inc\|^src" | grep "\.h$\|\.cc$" >> "${FMT_FILE_LIST}" || true
+else  # "X${mode}" == "Xlastcommit"
+  git diff --diff-filter=ACMRTUXB --name-only HEAD~ HEAD | grep "^inc\|^src" | grep "\.h$\|\.cc$" > "${FMT_FILE_LIST}" || true
+fi
+
+while read line; do
+  if [ -f "${line}" ]; then
+    ${CLANG_FORMAT} -i "${line}"
+  fi
+done < "${FMT_FILE_LIST}"
+
+rm "${FMT_FILE_LIST}"
+cd "${CURRENT_PATH}" || exit 1
+
+echo "Specified cpp source files have been format successfully."
diff --git a/third_party/fwkacllib/inc/cce/taskdown_common.hpp b/third_party/fwkacllib/inc/cce/taskdown_common.hpp
index 3ecea523..7954162e 100644
--- a/third_party/fwkacllib/inc/cce/taskdown_common.hpp
+++ b/third_party/fwkacllib/inc/cce/taskdown_common.hpp
@@ -27,15 +27,16 @@ namespace cce {
 #define CC_FUSION_OP_MAX 32
 
 typedef enum tagccKernelType {
-  CCE_AI_CORE = 0, /* cce aicore */
-  CCE_AI_CPU = 1,  /* cce aicpu */
-  TE = 2,          /* te operator*/
-  CUSTOMIZED = 3,  /* customized operator */
-  TE_AI_CORE = 4,  /* te aicore operator*/
-  TE_AI_CPU = 5,   /* te aicpu operator */
-  AI_CPU = 6,      /* aicpu */
-  CUST_AI_CPU = 7, /* custom aicpu*/
-  INVALID = 8,     /* unknown kernel type */
+  CCE_AI_CORE = 0,   /* cce aicore */
+  CCE_AI_CPU = 1,    /* cce aicpu */
+  TE = 2,            /* te operator*/
+  CUSTOMIZED = 3,    /* customized operator */
+  TE_AI_CORE = 4,    /* te aicore operator*/
+  TE_AI_CPU = 5,     /* te aicpu operator */
+  AI_CPU = 6,        /* aicpu */
+  CUST_AI_CPU = 7,   /* custom aicpu*/
+  HOST_CPU = 8,      /* host cpu */
+  INVALID = 10000    /* unknown kernel type */
 } ccKernelType;
 
 typedef struct tagOpContext {
diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
old mode 100755
new mode 100644
diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h
index e57563b3..ffbf552b 100644
--- a/third_party/fwkacllib/inc/hccl/base.h
+++ b/third_party/fwkacllib/inc/hccl/base.h
@@ -124,27 +124,27 @@ struct HcomRemoteAccessAddrInfo {
 };
 
 struct HcomAllToAllVParams {
-  void *sendbuf;
-  void *sendcounts;
-  void *sdispls;
-  HcclDataType sendtype;
-  void *recvbuf;
-  void *recvcounts;
-  void *rdispls;
-  HcclDataType recvtype;
-  const char *group;
+    void *sendbuf;  // device mem
+    void *sendcounts;  // device mem;  Type: uint_64
+    void *sdispls;  // device mem;  Type: uint_64
+    HcclDataType sendtype;
+    void *recvbuf;  // device mem
+    void *recvcounts;  // device mem;  Type: uint_64 
+    void *rdispls;  // device mem;  Type: uint_64
+    HcclDataType recvtype;
+    const char *group;  // not used now
 };
 
 struct HcomGatherAllToAllVParams {
- void *addrInfo;
- void *addrInfoCountPerRank;
- void *recvbuf;
- void *recvcounts;
- void *rdispls;
- void *gatheredbuf;
- s32 addrLength;
- HcclDataType recvtype;
- const char *group;
+    void *addrInfo;  // device mem;  contains host VA[uint_64]:  [addr, length, addr, length, addr, length, ...]
+    void *addrInfoCountPerRank;  // device mem;  length: ranksize;  contains addrInfoCounts for every rank
+    void *recvbuf;  // device mem
+    void *recvcounts;  // device mem;  Type: uint_64
+    void *rdispls;  // device mem;  Type: uint_64
+    void *gatheredbuf;  // device mem
+    s32 addrLength;
+    HcclDataType recvtype;
+    const char *group;  // not used now
 };
 
 #ifdef __cplusplus
diff --git a/third_party/fwkacllib/inc/hccl/hccl_types.h b/third_party/fwkacllib/inc/hccl/hccl_types.h
deleted file mode 100644
index 50a64795..00000000
--- a/third_party/fwkacllib/inc/hccl/hccl_types.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file hccl_types.h
- * @brief HCCL data type definition 
- * 
- */
- 
-#ifndef HCCL_TYPES_H_
-#define HCCL_TYPES_H_
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-/**
- * @brief HCCL functions return value definition
- */
-typedef enum {
-    HCCL_SUCCESS = 0,               /**< success */
-    HCCL_E_PARA = 1,                /**< parameter error */
-    HCCL_E_PTR = 2,                 /**< empty pointer */
-    HCCL_E_MEMORY = 3,              /**< memory error */
-    HCCL_E_INTERNAL = 4,            /**< internal error */
-    HCCL_E_NOT_SUPPORT = 5,         /**< not support feature */
-    HCCL_E_NOT_FOUND = 6,           /**< not found specific resource */
-    HCCL_E_UNAVAIL = 7,             /**< resource unavailable */
-    HCCL_E_SYSCALL = 8,             /**< call system interface error */
-    HCCL_E_TIMEOUT = 9,             /**< timeout */
-    HCCL_E_OPEN_FILE_FAILURE = 10,  /**< open file fail */
-    HCCL_E_TCP_CONNECT = 11,        /**< tcp connect fail */
-    HCCL_E_ROCE_CONNECT = 12,       /**< roce connect fail */
-    HCCL_E_TCP_TRANSFER = 13,       /**< tcp transfer fail */
-    HCCL_E_ROCE_TRANSFER = 14,      /**< roce transfer fail */
-    HCCL_E_RUNTIME = 15,            /**< call runtime api fail */
-    HCCL_E_DRV = 16,                /**< call driver api fail */
-    HCCL_E_PROFILING = 17,          /**< call profiling api fail */
-    HCCL_E_CCE = 18,                /**< call cce api fail */
-    HCCL_E_NETWORK = 19,            /**< call network api fail */
-    HCCL_E_RESERVED                 /**< reserved */
-} HcclResult;
-
-/**
- * @brief handle to HCCL communicator
- */
-typedef void *HcclComm;
-
-/**
- * @brief HCCL Reduction opperation
- */
-typedef enum {
-    HCCL_REDUCE_SUM = 0,    /**< sum */
-    HCCL_REDUCE_PROD = 1,   /**< prod */
-    HCCL_REDUCE_MAX = 2,    /**< max */
-    HCCL_REDUCE_MIN = 3,    /**< min */
-    HCCL_REDUCE_RESERVED    /**< reserved */
-} HcclReduceOp;
-
-/**
- * @brief HCCL data type
- */
-typedef enum {
-    HCCL_DATA_TYPE_INT8 = 0,    /**< int8 */
-    HCCL_DATA_TYPE_INT16 = 1,   /**< int16 */
-    HCCL_DATA_TYPE_INT32 = 2,   /**< int32 */
-    HCCL_DATA_TYPE_FP16 = 3,    /**< fp16 */
-    HCCL_DATA_TYPE_FP32 = 4,    /**< fp32 */
-    HCCL_DATA_TYPE_INT64 = 5,    /**< int64 */
-    HCCL_DATA_TYPE_UINT64 = 6,    /**< uint64 */
-    HCCL_DATA_TYPE_RESERVED     /**< reserved */
-} HcclDataType;
-
-const uint32_t HCCL_ROOT_INFO_BYTES =  4108; // 4108: root info length
-
-/**
- * @brief HCCL root info
- */
-typedef struct HcclRootInfoDef {
-    char internal[HCCL_ROOT_INFO_BYTES];
-} HcclRootInfo;
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-#endif // HCCL_TYPES_H_
diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h
index 955764d6..bf1f395b 100644
--- a/third_party/fwkacllib/inc/hccl/hcom.h
+++ b/third_party/fwkacllib/inc/hccl/hcom.h
@@ -164,8 +164,22 @@ HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
                                        const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
                                        std::function<void(HcclResult status)> callback);
 
+/**
+ * @brief Put alltoallv communication operation into hcom executor.
+ *
+ * @param params information about alltoallv communication operation.
+ * @param callback callback after collective communication operation.
+ * @return HcclResult
+ */
 HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function<void(HcclResult status)> callback);
 
+/**
+ * @brief Put agther alltoallv communication operation into hcom executor.
+ *
+ * @param params information about agther alltoallv communication operation.
+ * @param callback callback after collective communication operation.
+ * @return HcclResult
+ */
 HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params,
                                           std::function<void(HcclResult status)> callback);
 
diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h
index 38a689ee..f8d5ccf3 100644
--- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h
+++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h
@@ -56,6 +56,7 @@
 #include <dirent.h>
 #include <getopt.h>
 #include <libgen.h>
+#include <malloc.h>
 
 #include <linux/types.h>
 #include <linux/hdreg.h>
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
index 993f36ba..3d196e41 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
@@ -550,6 +550,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod
 MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
 MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
 MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra);
+
+MMPA_FUNC_VISIBILITY mmSize mmGetPageSize();
+MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize);
+MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr);
 #define MMPA_DLL_API
 
 #ifdef __cplusplus
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
index 49e97a5d..e6b6f71e 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
@@ -557,6 +557,10 @@ MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMod
 MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
 MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
 MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra);
+
+MMPA_FUNC_VISIBILITY mmSize mmGetPageSize();
+MMPA_FUNC_VISIBILITY VOID *mmAlignMalloc(mmSize mallocSize, mmSize alignSize);
+MMPA_FUNC_VISIBILITY VOID mmAlignFree(VOID *addr);
 #ifdef __cplusplus
 #if __cplusplus
 }
diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h
index bed984bd..86805f72 100644
--- a/third_party/fwkacllib/inc/ops/aipp.h
+++ b/third_party/fwkacllib/inc/ops/aipp.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -65,6 +65,8 @@ in aipp config file, framework will auto add one input node to graph at last. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator AippData.
+*@par Restrictions:
+*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
 */
 REG_OP(AippData)
     .INPUT(data, TensorType::ALL())
diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h
index 1ac83783..cc11f5f9 100644
--- a/third_party/fwkacllib/inc/ops/all_ops.h
+++ b/third_party/fwkacllib/inc/ops/all_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,6 +39,7 @@
 #include "image_ops.h"
 #include "internal_ops.h"
 #include "linalg_ops.h"
+#include "list_ops.h"
 #include "logging_ops.h"
 #include "lookup_ops.h"
 #include "math_ops.h"
diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h
index e1f64421..fd35b546 100644
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -626,7 +626,7 @@ REG_OP(StopGradient)
 *x: A tensor. \n
 
 *@par Outputs:
-*y: A tensor. \n
+*y: A tensor with the same shape and contents as input. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Identity.
@@ -666,7 +666,7 @@ REG_OP(IdentityN)
 *@li axis: The dimension index at which to expand. \n
 
 *@par Outputs:
-*y: A tensor. \n
+*y: A tensor with the same data as input, with an additional dimension inserted at the index specified by axis. \n
 
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator ExpandDims.
@@ -713,7 +713,7 @@ REG_OP(Unsqueeze)
 *@par Outputs:
 *y: A tensor. \n
 
-*@par Attention:
+*@attention Constraints:
 *This operator cannot be directly called by the acllopExecute API. \n
 
 *@par Third-party framework compatibility
@@ -1153,6 +1153,102 @@ REG_OP(EditDistance)
     .OUTPUT(output, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(EditDistance)
 
+/**
+* @brief sort_v2.
+
+* @par Inputs:
+* @li x: An ND tensor of type float16.
+
+* @par Attributes:
+
+* @li axis: An optional int. The dimension to sort along. This value defaults to -1.
+* @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False.
+
+* @par Outputs:
+* @li y: An ND tensor of type float16.
+
+* @attention Constraints:
+* @li Axis should select the last dim.
+* @li When the sorting data is less than 150K, it is recommended to use this tbe ops,
+ and the descending performance is better than the ascending.
+* @li The upper limit of data on Ascend910 is 2000K.
+*/
+REG_OP(SortV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(axis, Int, -1)
+    .ATTR(descending, Bool, false)
+    .OP_END_FACTORY_REG(SortV2)
+
+/**
+* @brief Expand the input tensor to a compatible shape. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li x: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int8 ,uint8. \n
+* @li shape: A Tensor to specify the shape that the input tensor expanded to. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator Expand.
+*/
+
+REG_OP(Expand)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .OP_END_FACTORY_REG(Expand)
+
+/**
+*@Returns a tensor containing the indices of all non-zero elements of input. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
+
+*@par Attributes:
+* transpose: the output tensor will be transposed if true. \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the PyTorch operator NonZero.
+*/
+
+REG_OP(NonZero)
+    .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \
+              DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_INT64}))
+    .ATTR(transpose, Bool, false)
+    .OP_END_FACTORY_REG(NonZero)
+
+/**
+* @brief Expand the input tensor to a compatible shape. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li x: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int8 ,uint8. \n
+
+* @par Attributes:
+* @li shape: A required listInt to specify the shape that the input tensor expanded to. \n
+
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator Expand.
+*/
+
+REG_OP(ExpandD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .REQUIRED_ATTR(shape, ListInt)
+    .OP_END_FACTORY_REG(ExpandD)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h
index d9883253..f05135d1 100644
--- a/third_party/fwkacllib/inc/ops/audio_ops.h
+++ b/third_party/fwkacllib/inc/ops/audio_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
new file mode 100644
index 00000000..d0800a08
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file avg_pool_1d_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Generate an auxiliary matrix .  \n
+
+*@par Inputs:
+* @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32,
+ int64, float16, float, double.The format must be NHWC NCHW NC1HWC0.
+
+*@par Attributes:
+*@li ksize: Kernel size. Input type is int.
+*@li strides: Input type is int.
+*@li pads: Input type is listInt .
+*@li ceil_mode: Bool, default value is false.
+*@li count_include_pad: Bool, default value is false.  \n
+
+*@par Outputs:
+*y_tensor: A  tensor with the same types as "x" .  \n
+*@par Third-party framework compatibility
+
+*Compatible with the TensorFlow operator Unbatch.
+*/
+REG_OP(AvgPool1DAvgMatrix)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8,
+                          DT_INT32, DT_INT64, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8,
+                           DT_INT32, DT_INT64, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, Int)
+    .REQUIRED_ATTR(strides, Int)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, false)
+    .OP_END_FACTORY_REG(AvgPool1DAvgMatrix)
+}
+#endif
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h
index 8a1c5a7b..ca4fe1db 100644
--- a/third_party/fwkacllib/inc/ops/batch_ops.h
+++ b/third_party/fwkacllib/inc/ops/batch_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -64,10 +64,10 @@ the same types as "x_tensors" .  It's a dynamic output.  \n
 REG_OP(Batch)
   .DYNAMIC_INPUT(x_tensors, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, \
       DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_BOOL, DT_DOUBLE}))
-  .OUTPUT(y_index, TensorType({ DT_INT64 }))
-  .OUTPUT(y_id, TensorType({ DT_INT64 }))
   .DYNAMIC_OUTPUT(y_tensors, TensorType({DT_INT8, DT_UINT8, DT_INT16, \
       DT_UINT16, DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_BOOL}))
+  .OUTPUT(y_index, TensorType({ DT_INT64 }))
+  .OUTPUT(y_id, TensorType({ DT_INT64 }))
   .REQUIRED_ATTR(num_batch_threads, Int)
   .REQUIRED_ATTR(max_batch_size, Int)
   .ATTR(max_enqueued_batches, Int, 10)
@@ -107,11 +107,13 @@ across multiple sessions .   \n
 
 REG_OP(Unbatch)
   .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .INPUT(index, TensorType({DT_INT64}))
   .INPUT(id, TensorType({DT_INT64}))
   .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .REQUIRED_ATTR(timeout_micros, Int)
   .ATTR(container, String, "")
   .ATTR(shared_name, String, "")
@@ -146,13 +148,16 @@ across multiple sessions .   \n
 
 REG_OP(UnbatchGrad)
   .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .INPUT(index, TensorType({DT_INT64}))
   .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .INPUT(id, TensorType({DT_INT64}))
   .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .ATTR(container, String, "")
   .ATTR(shared_name, String, "")
   .OP_END_FACTORY_REG(UnbatchGrad)
diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h
index 5c83e161..dac78118 100644
--- a/third_party/fwkacllib/inc/ops/bitwise_ops.h
+++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,35 @@
 
 namespace ge {
 
+/**
+*@brief Element-wise computes the bitwise left-shift of x and y . \n
+
+*@par Inputs:
+*Input "x" is a k-dimensional tensor. Inputs "num_lower" and "num_upper"
+are 0D scalars.
+* @li x: A Tensor. Must be one of the following types: int8, int16, int32,
+int64, uint8, uint16, uint32, uint64.
+* @li y: A Tensor. Has the same type as "x".  \n
+
+*@par Outputs:
+* z: A Tensor. Has the same type as "x".  \n
+
+*@attention Constraints:
+*Unique runs on the Ascend AI CPU, which delivers poor performance.  \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator LeftShift.
+*/
+
+REG_OP(LeftShift)
+    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
+           DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
+    .INPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
+           DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
+    .OUTPUT(z, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, \
+            DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64}))
+    .OP_END_FACTORY_REG(LeftShift)
+
 /**
 *@brief Element-wise computes the bitwise right-shift of x and y . \n
 
diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
index 550e8b7d..08e54824 100644
--- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
+++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
index e20607bf..890c52ae 100644
--- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
+++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/condtake_ops.h b/third_party/fwkacllib/inc/ops/condtake_ops.h
index 5e91eb07..029cffbf 100644
--- a/third_party/fwkacllib/inc/ops/condtake_ops.h
+++ b/third_party/fwkacllib/inc/ops/condtake_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h
index 7196b14f..e5bd3534 100644
--- a/third_party/fwkacllib/inc/ops/control_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -96,7 +96,7 @@ REG_OP(RefMerge)
  *       Otherwise, the data is forwarded to "output_false" . \n
 
  *@par Inputs:
- *@li data: The tensor to be forwarded. \ n
+ *@li data: The tensor to be forwarded. \n
  *          Must be one of the following types: float16, float32, float64,
  *          int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool.
  *@li pred: A boolean scalar. The output port that will receive data . \n
@@ -387,12 +387,12 @@ REG_OP(ControlTrigger)
 
 *@par Inputs:
 * Three inputs, including:
-*@li x: One dimensional tensore of type int32, specifying queried shape, max size is 8.
-*@li data_seq: One dimensional tensore of type int32, specifying the mapped table is queried.
-*@li level_index: One dimensional tensore of type int32, specifying secondary index. \n
+*@li x: One dimensional tensor of type int32, specifying queried shape, max size is 128.
+*@li data_seq: One dimensional tensor of type int32, specifying the mapped table is queried.
+*@li level_index: One dimensional tensor of type int32, specifying secondary index. \n
 
 *@par Outputs:
-*@li y: A Tensor with shape [batch, 8], of type int32, specifying index of shape in the map.
+*@li y: A Tensor with shape [8], of type int32, specifying index of shape in the map.
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
diff --git a/third_party/fwkacllib/inc/ops/correlation.h b/third_party/fwkacllib/inc/ops/correlation.h
new file mode 100644
index 00000000..caebba50
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/correlation.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file correlation.h
+ * \brief
+ */
+#ifndef GE_OP_CORRELATION_OPS_H
+#define GE_OP_CORRELATION_OPS_H
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief Computes a 2D Correlation given 4D "x" and "filter" tensors.
+*
+*@par Inputs:
+* @li filter: A 4D tensor of filters.
+* @li x: A 4D tensor of input images, batch number must equal to batch
+* number of "filter", and channel must equal to channel of "filter".
+*
+*@par Attributes:
+* @li groups: set correlation mode, must be 1 or channel.
+*
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+
+*@par Third-party framework compatibility
+* Compatible with caffe correlation custom operator.
+*/
+REG_OP(Correlation)
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .ATTR(groups, Int, 1)
+    .OP_END_FACTORY_REG(Correlation)
+}  // namespace ge
+
+#endif  // GE_OP_NN_CALCULATION_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h
index 2c75fd09..e907b828 100644
--- a/third_party/fwkacllib/inc/ops/ctc_ops.h
+++ b/third_party/fwkacllib/inc/ops/ctc_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -137,6 +137,87 @@ REG_OP(CTCBeamSearchDecoder)
     .OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE}))
     .OP_END_FACTORY_REG(CTCBeamSearchDecoder)
 
+/**
+*@brief The Connectionist Temporal Classification loss.
+
+*@par Inputs:
+*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size,
+                and C = number of classes (including blank).
+                It represent the logarithmized probabilities of the outputs.
+*@li targets: Tensor of size (N, S), where S= max target length.
+             It represent the target sequences.
+*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs.
+*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets.
+
+*@par Outputs:
+*@li neg_log_likelihood: A loss value which is differentiable with respect to each input node.
+*@li log_alpha: The probability of possible trace of input to target.
+
+*@par Attributes:
+*@li blank : Blank label. Default 0.
+*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'.
+*@li zero_infinity : Whether to zero infinite losses and the associated gradients.
+
+*@par Third-party framework compatibility
+* Compatible with Pytorch CTCLoss operator.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(CTCLossV2)
+    .INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(targets, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(input_lengths, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(target_lengths, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(neg_log_likelihood, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(log_alpha, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(blank, Int, 0)
+    .ATTR(reduction, String, "mean")
+    .ATTR(zero_infinity, Bool, false)
+    .OP_END_FACTORY_REG(CTCLossV2)
+
+/**
+*@brief The Connectionist Temporal Classification loss grad.
+
+*@par Inputs:
+*@li grad_out: Gradient renewal coefficient. Tensor of size (N), where N = batch size.
+*@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size,
+                and C = number of classes (including blank).
+                It represent the logarithmized probabilities of the outputs.
+*@li targets: Tensor of size (N, S), where S= max target length.
+             It represent the target sequences.
+*@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs.
+*@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets.
+*@li neg_log_likelihood: A loss value which is differentiable with respect to each input node.
+*@li log_alpha: The probability of possible trace of input to target.
+
+*@par Outputs:
+*@li grad: Tensor of size (T, N, C), The grad of Connectionist Temporal Classification loss.
+
+*@par Attributes:
+*@li blank : Blank label. Default 0.
+*@li reduction: Specifies the reduction to apply to the output. Default: 'mean'.
+*@li zero_infinity : Whether to zero infinite losses and the associated gradients.
+
+*@par Third-party framework compatibility
+* Compatible with Pytorch CTCLoss operator.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(CTCLossV2Grad)
+    .INPUT(grad_out, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(log_probs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(targets, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(input_lengths, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(target_lengths, TensorType({DT_INT32, DT_INT64}))
+    .INPUT(neg_log_likelihood, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(log_alpha, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(grad, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(blank, Int, 0)
+    .ATTR(reduction, String, "mean")
+    .ATTR(zero_infinity, Bool, false)
+    .OP_END_FACTORY_REG(CTCLossV2Grad)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h
index bb937a75..6021f4e3 100644
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -908,7 +908,7 @@ REG_OP(TensorArray)
     .OUTPUT(handle, TensorType({DT_RESOURCE}))
     .OUTPUT(flow, TensorType({DT_FLOAT}))
     .REQUIRED_ATTR(dtype, Type)
-    .ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE)
+    .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK)
     .ATTR(dynamic_size, Bool, false)
     .ATTR(clear_after_read, Bool, true)
     .ATTR(identical_element_shapes, Bool, false)
@@ -963,7 +963,7 @@ REG_OP(TensorArrayConcat)
         DT_QUINT8, DT_QINT32}))
     .OUTPUT(lengths, TensorType({DT_INT64}))
     .REQUIRED_ATTR(dtype, Type)
-    .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_SHAPE)
+    .ATTR(element_shape_except0, ListInt, ge::UNKNOWN_RANK)
     .OP_END_FACTORY_REG(TensorArrayConcat)
 
 /**
@@ -999,7 +999,7 @@ REG_OP(TensorArrayGather)
         DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8,
         DT_QUINT8, DT_QINT32}))
     .REQUIRED_ATTR(dtype, Type)
-    .ATTR(element_shape, ListInt, ge::UNKNOWN_SHAPE)
+    .ATTR(element_shape, ListInt, ge::UNKNOWN_RANK)
     .OP_END_FACTORY_REG(TensorArrayGather)
 
 /**
@@ -1430,6 +1430,24 @@ REG_OP(OrderedMapClear)
     .ATTR(shared_name, String, "")
     .OP_END_FACTORY_REG(OrderedMapClear)
 
+/**
+*@brief FakeQueue, support tf api FixedLengthRecordReader. \n
+
+*@par Inputs:
+*Including:
+* @li resource: A Tensor of type DT_RESOURCE.
+
+*@par Outputs:
+*handle: A Tensor of type DT_STRING ref. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator FakeQueue.
+*/
+REG_OP(FakeQueue)
+    .INPUT(resource, TensorType({DT_RESOURCE}))
+    .OUTPUT(handle, TensorType({DT_STRING}))
+    .OP_END_FACTORY_REG(FakeQueue)
+
 /**
 *@brief Returns the number of incomplete elements in the underlying container. \n
 
@@ -2258,6 +2276,7 @@ REG_OP(LruCache)
   .ATTR(shared_name, String, "LruCache")
   .ATTR(cache_size, Int, 100000)
   .ATTR(load_factor, Float, 1)
+  .REQUIRED_ATTR(dtype, Type)
   .OP_END_FACTORY_REG(LruCache)
 
 /**
@@ -2277,9 +2296,9 @@ REG_OP(CacheAdd)
   .INPUT(cache, TensorType({DT_RESOURCE}))
   .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
-  .OUTPUT(swap_in_idx, TensorType({DT_INT64}))
+  .OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
-  .OUTPUT(swap_out_idx, TensorType({DT_INT64}))
+  .OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OP_END_FACTORY_REG(CacheAdd)
 
 /**
@@ -2295,9 +2314,65 @@ REG_OP(CacheAdd)
 REG_OP(CacheRemoteIndexToLocal)
   .INPUT(cache, TensorType({DT_RESOURCE}))
   .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
-  .OUTPUT(local_idx, TensorType({DT_INT64}))
+  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OP_END_FACTORY_REG(CacheRemoteIndexToLocal)
 
+/**
+*@brief CacheAllToLocalIndex, get id in cache
+*@par Inputs:
+*cache: resource data
+*local_idx: id in cache.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(CacheAllIndexToLocal)
+  .INPUT(cache, TensorType({DT_RESOURCE}))
+  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .REQUIRED_ATTR(dtype, Type)
+  .OP_END_FACTORY_REG(CacheAllIndexToLocal)
+
+/**
+*@brief DynamicGetNext, dynamic get next data
+*@par Inputs:
+*x: the iterator, all types are available
+*@par Outputs:
+*y: the date in iterator, all types are available
+*@par Attributes:
+*output_types: types of all outputs
+*output_shapes: shapes of all outputs
+*_dynamic_graph_execute_mode: dynamic graph execution mode,
+value is one of lazy_recompile and dynamic_execute
+*_getnext_inputs_shape_range: shape ranges of outputs,
+it works where _dynamic_graph_execute_mode is dynamic_execute
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicGetNext)
+  .INPUT(x, TensorType::ALL())
+  .DYNAMIC_OUTPUT(y, TensorType::ALL())
+  .ATTR(output_types, ListType, {})
+  .ATTR(output_shapes, ListListInt, {{}, {}})
+  .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
+  .ATTR(_getnext_inputs_shape_range, String, "")
+  .OP_END_FACTORY_REG(DynamicGetNext)
+
+/**
+*@brief AdpGetNext
+*@par Outputs:
+*y: the data in iterator, all types are available
+*@par Attributes:
+*output_types: types of all outputs
+*output_shapes: shapes of all outputs
+*queue_name: cdqm queue name
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(AdpGetNext)
+  .DYNAMIC_OUTPUT(y, TensorType::ALL())
+  .ATTR(output_types, ListType, {})
+  .ATTR(output_shapes, ListListInt, {{}, {}})
+  .ATTR(queue_name, String, "")
+  .OP_END_FACTORY_REG(AdpGetNext)
 }   // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index c64bc138..f61e2939 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,10 +28,13 @@ namespace ge {
 
 *@par Inputs:
 *Dynamic inputs, including:
-* @li x: A list of Tensor objects, each with same shape and type. The supported types are:
+*x: A list of Tensor objects, each with same shape and type. The supported types are:
 *   float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *   qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n
 
+*@par Attributes:
+*N: An required attribute of type int32, means nums of inputs. \n
+
 *@par Outputs:
 *y: A Tensor. Has the same shape and type as the elements of "x". \n
 
@@ -122,7 +125,8 @@ REG_OP(MinimumGrad)
 *@par Inputs:
 *One input:
 *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8,
-   int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n
+   int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+   For float32 type, the actual calculation on the chip is based on float16.  \n
 
 *@par Attributes:
 *dst_type: An required attribute of type int32, specifying the dst data type. \n
@@ -142,6 +146,8 @@ REG_OP(Cast)
 
 /**
 *@brief Returns the truth value of (x1 >= x2) element-wise. \n
+*when input is int32 and (x2 - x1) > 2**31 or < -2**31
+*aicore accuracy is not guaranteed \n
 
 *@par Inputs:
 *Two inputs, including:
@@ -163,6 +169,8 @@ REG_OP(GreaterEqual)
 
 /**
 *@brief Returns the truth value of (x1 < x2) element-wise. \n
+*when input is int32 and (x2 - x1) > 2**31 or < -2**31
+*aicore accuracy is not guaranteed \n
 
 *@par Inputs:
 *Two inputs, including:
@@ -322,8 +330,8 @@ REG_OP(Sub)
 *@brief computes the absolute value of a tensor. \n
 
 *@par Inputs:
-*One inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n
+*One input, including: \n
+*x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x". \n
@@ -563,6 +571,8 @@ REG_OP(InvGrad)
 
 /**
 *@brief: Returns the truth value of (x <= y) element-wise. \n
+*when input is int32 and (x2 - x1) > 2**31 or < -2**31
+*aicore accuracy is not guaranteed \n
 
 *@par Inputs:
 * Two inputs, including:
@@ -611,6 +621,15 @@ REG_OP(Log1p)
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x1".
+
+*@attention Constraints:
+*@li x2: The input data does not support 0
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
+*requirement of double thousandths in the mini form
+*@li Due to different architectures, the calculation results of this operator 
+*on NPU and CPU may be inconsistent
+*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
+
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator Mod.
 */
@@ -1020,7 +1039,7 @@ REG_OP(BesselI1e)
 * y = log_base(shift + scale * x), with "base" > 0. \n
 
 * @par Inputs:
-* @li x: A Tensor of type complex64, complex128, float16, float32 or double. \n
+* x: A Tensor of type complex64, complex128, float16, float32 or double. \n
 
 * @par Attributes:
 * @li base: An optional float32, specifying the base "e". Defaults to "-1.0"
@@ -1065,7 +1084,7 @@ REG_OP(Log)
 * uint8, int8, uint16, int16, int32, int64, complex64, complex128. \n
 
 * @attention Constraints:
-* @li "x1" and "x2" have incompatible shapes or types. \n
+* "x1" and "x2" have incompatible shapes or types. \n
 
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Multiply.
@@ -1451,6 +1470,8 @@ REG_OP(ReciprocalGrad)
 
 /**
 *@brief Returns the truth value of (x1 > x2) element-wise. \n
+*when input is int32 and (x2 - x1) > 2**31 or < -2**31
+*aicore accuracy is not guaranteed \n
 
 *@par Inputs:
 *@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8,
@@ -2042,6 +2063,15 @@ REG_OP(FloorDiv)
 *
 *@par Outputs:
 *y: Result remainder.
+
+*@attention Constraints:
+*@li x2: The input data does not support 0
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
+*requirement of double thousandths in the mini form
+*@li Due to different architectures, the calculation results of this operator 
+*on NPU and CPU may be inconsistent
+*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
+
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator FloorMod.
 */
@@ -2168,6 +2198,14 @@ REG_OP(Tan)
 *@par Outputs:
 *y: A Tensor. Has the same type as "x1". \n
 
+*@attention Constraints:
+*@li x2: The input data does not support 0
+*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the 
+*requirement of double thousandths in the mini form
+*@li Due to different architectures, the calculation results of this operator 
+*on NPU and CPU may be inconsistent
+*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
+
 *@par Third-party framework compatibility
 *@li Compatible with the TensorFlow operator TruncateMod.
 */
@@ -2424,6 +2462,25 @@ REG_OP(Eltwise)
     .ATTR(coeff, ListFloat, {})
     .OP_END_FACTORY_REG(Eltwise)
 
+/**
+ *@brief Computes the inverse error function of each element of input. \n
+
+ *@par Inputs:
+ *One inputs, including:
+ * @li input_x: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+
+ *@par Outputs:
+ *y: A Tensor with the same type and shape of input_x's. \n
+
+ *@par Third-party framework compatibility
+ *Compatible with the Pytorch operator Erfinv. \n
+ */
+REG_OP(Erfinv)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(Erfinv)
+
 /**
 *@brief Computes element-wise population count. \n
 
@@ -2829,9 +2886,9 @@ REG_OP(AdamApplyOneAssign)
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(LambApplyOptimizerAssign)
-    .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2842,6 +2899,8 @@ REG_OP(LambApplyOptimizerAssign)
     .INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT}))
     .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT}))
     .OP_END_FACTORY_REG(LambApplyOptimizerAssign)
 
 /**
@@ -2873,7 +2932,8 @@ REG_OP(LambApplyWeightAssign)
     .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT}))
     .OP_END_FACTORY_REG(LambApplyWeightAssign)
 
 /**
@@ -3183,12 +3243,14 @@ REG_OP(Fills)
 *@brief Add tensor with scale. \n
 
 *@par Inputs:
-*Five inputs, including:
-* @li x1: A Tensor. Must be one of the following types:int32,int16, float16, float32.
-* @li x2: A scale. Must be float. \n
+*One input, including: \n
+*x: A Tensor. Must be one of the following types:int32,int16, float16, float32. \n
+
+*@par Attributes:
+*value: A scale. Must be float. \n
 
 *@par Outputs:
-*@li y: A Tensor. Has the same type and shape as "x1". \n
+*y: A Tensor. Has the same type and shape as "x1". \n
 
 *@par Third-party framework compatibility:
 * Compatible with the Pytorch operator adds.
@@ -3329,8 +3391,441 @@ REG_OP(TensorRedirect)
     .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8,
                            DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32}))
     .OP_END_FACTORY_REG(TensorRedirect)
-}  // namespace ge
 
+/**
+* @brief Performs the element-wise division of tensor x2 by tensor x3,
+* multiply the result by the scalar value and add it to tensor x1
+
+* @par Inputs:
+* Three inputs, including:
+* @li input_data: A mutable input Tensor. Must be one of the following types:
+*     float16, float32.
+* @li x1: A mutable input Tensor of the same type as x1.
+* @li x2: A mutable input Tensor of the same type as x1.
+* @li value: A mutable input Tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+
+* @par Outputs:
+* @li y: A mutable Tensor. Has the same type as "x1". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Addcdiv.
+*/
+REG_OP(Addcdiv)
+    .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Addcdiv)
+
+/**
+* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, 
+* multiply the result by the scalar value and add it to tensor input_data 
+
+
+* @par Inputs:
+* Three inputs, including:
+* @li input_data: A mutable input Tensor. Must be one of the following types:
+*     float16, float32, int8, int32, uint8.
+* @li x1: A mutable input Tensor of the same type as x1.
+* @li x2: A mutable input Tensor of the same type as x1.
+* @li value: A tensor which includes only one element of the same type as x1. \n
+
+* @par Outputs:
+* @li y: A mutable output Tensor. Has the same type as "x1". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Addcmul.
+*/
+REG_OP(Addcmul)
+    .INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
+    .OP_END_FACTORY_REG(Addcmul)
 
+/**
+* @brief Computes the result of x2 * alpha + x1.
+
+* @par Inputs:
+* @li x1: An ND tensor of type float16, float32, int32.
+* @li x2: An ND tensor of type float16, float32, int32.
+* @li alpha: A scalar tensor of type float16, float32. \n
+
+* @par Outputs:
+* @li y: An ND tensor tensor with the same shape and type as "x1". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Axpy.
+*/
+REG_OP(AxpyV2)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OP_END_FACTORY_REG(AxpyV2)
+
+/**
+* @brief Computes the result of x1 - x2.
+
+* @par Inputs:
+* @li x1: An ND tensor of type float16, float, int32.
+* @li x2: An ND tensor of type float16, float, int32. \n
+
+* @par Outputs:
+* @li y: An ND tensor tensor with the same type as "x1". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Sub.
+*/
+REG_OP(PtSub)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OP_END_FACTORY_REG(PtSub)
+
+/**
+* @brief Add the partial values of two tensors in format NC1HWC0.
+
+* @par Inputs:
+* @li x1: A Tensor in 5HD, and must be one of the following types: float16,
+* float32. \n
+* @li x2: A Tensor of the same type as "x1", and the same shape as "x1",
+* except for the C1 value. \n
+
+* @par Attributes:
+* @li x1_c1_offset: A required int. Offset value of C1 in "x1". \n
+* @li x2_c1_offset: A required int. Offset value of C1 in "x2". \n
+* @li c1_len: A required int. C1 len of "y". The value must be less than
+* the difference between C1 and offset in "x1" and "x2". \n
+
+* @par Outputs:
+* @li y:  A Tensor of the same type as "x1", and the same shape as "x1",
+* except for the C1 value. Record the result after adding. \n
+*/
+REG_OP(StrideAdd)
+    .INPUT(x1, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(x2, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .REQUIRED_ATTR(x1_c1_offset, Int)
+    .REQUIRED_ATTR(x2_c1_offset, Int)
+    .REQUIRED_ATTR(c1_len, Int)
+    .OP_END_FACTORY_REG(StrideAdd)
+
+/**
+* @brief Compare two tensors are totally equal or not, only output a bool value"
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_x: A Tensor. the first tensor. \n
+* @li input_y: A Tensor. the second tensor. \n
+
+* @par Outputs:
+* @li output_z: A Tensor. Bool type, compare result of the two inputs. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch equal operator. \n
+*/
+REG_OP(TensorEqual)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .OUTPUT(output_z, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(TensorEqual)
+
+/**
+ * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). 
+ * All inputs and outputs must have the same data type. This operator supports multidirectional 
+ * (i.e., Numpy-style) broadcasting
+ * 
+ * @par inputs
+ * one input including:
+ * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64
+ * 
+ * @par output
+ * one output including:
+ * @li y:A Tensor of the same type as x
+ * 
+ */
+REG_OP(MaxN)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) 
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(MaxN)
+
+
+/**
+ * @brief Calculates x * maske * value.
+ *
+ * @par Inputs:
+ * @li x: An tensor of type float16 or float32, specifying the input to the data layer.
+ * @li mask: An tensor of type int8 or float16 or float32, be same shape with x. \n
+ *
+ * @par Attributes:
+ * value: A optional float. \n
+ *
+ * @par Outputs:
+ * y: The output tensor of type float16 or float32.
+ @ li y:A Tensor of the same type and shape as x
+ *
+ */
+REG_OP(MaskedScale)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32}))
+    .INPUT(mask, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32}))
+    .REQUIRED_ATTR(value, Float)
+    .OP_END_FACTORY_REG(MaskedScale)
+
+/**
+ * @brief Calculate the lerp function. \n
+
+ * @par Inputs:
+ * Three inputs, including:
+ * @li start: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * @li end: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * @li weight: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+
+ * @par Outputs:
+ * y: A Tensor with the same type and shape of input_x's. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator Lerp. \n
+ */
+REG_OP(Lerp)
+    .INPUT(start, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(end, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Lerp)
+
+/**
+*@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n
+
+*
+*@par Inputs:
+*@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16
+*@li x2: A tensor of the same type as "x1".
+*
+*@par Attributes:
+* atol: Defaults to "1e-05".
+* rtol: Defaults to "1e-03".
+*
+*@par Outputs:
+* num: A tensor of type float32.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*
+*/
+REG_OP(DataCompare)
+  .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
+  .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
+  .OUTPUT(num, TensorType({DT_FLOAT}))
+  .ATTR(atol, Float, 1e-5)
+  .ATTR(rtol, Float, 1e-3)
+  .OP_END_FACTORY_REG(DataCompare)
+
+/**
+*@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0
+*otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along
+*which Hardmax will be performed.The output tensor has the same shape and contains the Hardmax values of the
+*corresponding input.
+*
+*@par inputs
+*one input including:
+*@li x: input A Tensor.Must be one of the following types:float32,float16
+*
+*@par Attributes:
+*@li axis:A required int attribute that decides which dimension will be used to cal the hard_max
+*
+*@par output:
+*one output including:
+*@li y:A Tensor of the same type as x
+*
+*/
+REG_OP(HardMax)
+    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(axis, Int, -1)
+    .OP_END_FACTORY_REG(HardMax)
+
+/**
+* @brief Computes the dot product (inner product) of two tensors. This function does not broadcast.
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_x: A Tensor. the first tensor must be 1d. \n
+* @li input_y: A Tensor. the second tensor must be 1d. \n
+
+* @par Outputs:
+* @li output: A Tensor. Result of the two inputs, must be 1d. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch dot operator. \n
+*/
+REG_OP(Dot)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .OP_END_FACTORY_REG(Dot)
+	
+/**
+*@brief Returns a new tensor with boolean elements representing \n
+*if each element of input is “close” to the corresponding element of other \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+* @li x2: A tensor with the same type and shape of x1's. \n
+
+*@par Attributes:
+*@li rtol: An optional float.Defaults to 1e-05. \n
+*@li atol: An optional float.Defaults to 1e-08. \n
+*@li equal_nan: An optional bool.Defaults to false. \n
+
+*@par Outputs:
+*y: A Tensor bool with the same shape of x1's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator isclose. \n
+*/
+REG_OP(IsClose)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .ATTR(rtol, Float, 1e-05)
+    .ATTR(atol, Float, 1e-08)
+    .ATTR(equal_nan, Bool, false)
+    .OP_END_FACTORY_REG(IsClose)
+
+/**
+* @brief Returns the reverse tensor of the ArgMax operator of a tensor. \n
+
+* @par Inputs:
+* three input, including:
+* var: A Tensor of type float16, float32, int32 or int8. \n
+* indices: A Tensor of type int32. \n
+* updates: A Tensor of type float16, float32, int32 or int8. \n
+
+* @par Attributes:
+* @li dimension: An integer of type int, specifying the axis information of the index with the maximum value.\n
+
+* @par Outputs:
+* y: A Tensor of type float16, float32, int32 or int8. \n
+*
+*@attention Constraints:
+*@li indices: only support int32,and shape same to "updates"
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". 
+*@li y:A Tensor, the type and shape is same to "var" \n
+
+*@par Third-party framework compatibility
+* not support all scene like pytorch operator scatter
+* exp:
+* var.shape=[2,3,4,5], dim=2, the shape of indices and updates should be [2,3,5]
+* not support the shape of indices and updates is [2,3,2,5] like pytorch operator scatter. \n
+*/
+REG_OP(ArgMaxGrad)
+    .INPUT(var, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .REQUIRED_ATTR(dimension, Int)
+    .OP_END_FACTORY_REG(ArgMaxGrad)
+
+/**
+* @brief Returns the reverse tensor of the ArgMax operator of a tensor. \n
+
+* @par Inputs:
+* three input, including:
+* var: A Tensor of type float16, float32, int32 or int8. \n
+* indices: A Tensor of type int32. \n
+* updates: A Tensor of type float16, float32, int32 or int8. \n
+* assist: A Tensor of int32,also a assist matrix and it's shape must match the shape of var \n
+
+* @par Attributes:
+* @li dimension: An integer of type int, specifying the axis information of the index with the maximum value.\n
+
+* @par Outputs:
+* y: A Tensor of type float16, float32, int32 or int8. \n
+
+*@attention Constraints:
+*@li indices: only support int32,and shape same to "updates"
+*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". 
+*@li y:A Tensor, the type and shape is same to "var" \n
+
+*@par Third-party framework compatibility
+* not support all scene like pytorch operator scatter
+* exp:
+* var.shape=[2,3,4,5], dim=2, the shape of indices and updates should be [2,3,5]
+* not support the shape of indices and updates is [2,3,2,5] like pytorch operator scatter. \n
+*/
+REG_OP(ArgMaxGradD)
+    .INPUT(var, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .INPUT(assist, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .REQUIRED_ATTR(dimension, Int)
+    .OP_END_FACTORY_REG(ArgMaxGradD)
+
+/**
+*@brief Calculates the reversed outputs of the function "AddMatMatElements"
+*  c = c * beta + alpha * a * b
+
+*@par Inputs:
+*Three inputs, including:
+* @li c: A mutable Tensor. Must be one of the following types:
+*     float16, float32.
+* @li a: A mutable Tensor of the same type as "c".
+* @li b: A mutable Tensor of the same type as "c".
+* @li beta: A mutable scalar of the same type as "c".
+* @li alpha: A mutable scalar of the same type as "c". \n
+
+*@par Outputs:
+* @li c: A mutable Tensor. Has the same type as "c". \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator AddMatMatElements.
+*/
+REG_OP(AddMatMatElements)
+    .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(a, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(b, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(c, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(AddMatMatElements)
+
+/**
+*@brief Returns cosine similarity between x1 and x2,computed along dim. \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li input_x1: A tensor. Must be the following types:
+*     float32. \n
+
+*@par Inputs:
+*@li input_x2: A tensor. Must of the following types:
+*     float32. \n
+
+*@par Outputs:
+*@li output_y: A Tensor with the same type of input_x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator CosineSimilarity. \n
+*/
+REG_OP(CosineSimilarity)
+    .INPUT(input_x1, TensorType({DT_FLOAT}))  /* "First operand." */
+    .INPUT(input_x2, TensorType({DT_FLOAT}))  /* "Second operand." */
+    .OUTPUT(output_y, TensorType({DT_FLOAT})) /* "Result, has same element type as two inputs" */
+    .ATTR(dim, Int, 1)
+    .ATTR(eps, Float, 1e-8)
+    .OP_END_FACTORY_REG(CosineSimilarity)
+
+}  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h
index 598d3ad3..b09ac058 100644
--- a/third_party/fwkacllib/inc/ops/functional_ops.h
+++ b/third_party/fwkacllib/inc/ops/functional_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/get_data_ops.h b/third_party/fwkacllib/inc/ops/get_data_ops.h
index 33dc4f14..e5518ef8 100644
--- a/third_party/fwkacllib/inc/ops/get_data_ops.h
+++ b/third_party/fwkacllib/inc/ops/get_data_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/globalavgpool.h b/third_party/fwkacllib/inc/ops/globalavgpool.h
new file mode 100644
index 00000000..06f03d30
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/globalavgpool.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file globalavgpool.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_GLOBALAVERAGEPOOL_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_GLOBALAVERAGEPOOL_H_
+
+#include "graph/operator_reg.h"
+
+namespace ge {
+/**
+*@brief GlobalAveragePool consumes an input tensor X and applies average pooling across the values in the same channel.
+This is equivalent to AveragePool with kernel size equal to the spatial dimension of input tensor \n
+
+*@par Inputs:
+*@li x: Input data tensor from the previous operator; dimensions for image case are (N x C x H x W),
+where N is the batch size, C is the number of channels, and H and W are the height and the width of the data.
+For non image case, the dimensions are in the form of (N x C x D1 x D2 ... Dn), where N is the batch size.
+
+*@par Outputs:
+*y: Output data tensor from pooling across the input tensor. The output tensor has the same rank as the input.
+The first two dimensions of output shape are the same as the input (N x C), while the other dimensions are all 1
+
+*@par Restrictions:
+*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly.
+*/
+REG_OP(GlobalAveragePool)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(GlobalAveragePool)
+} // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_GLOBALAVGPOOL_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h
index b90b225e..497f6a68 100644
--- a/third_party/fwkacllib/inc/ops/hcom_ops.h
+++ b/third_party/fwkacllib/inc/ops/hcom_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -45,8 +45,6 @@ REG_OP(HcomAllGather)
     .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
     .REQUIRED_ATTR(rank_size, Int)
     .REQUIRED_ATTR(group, String)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomAllGather)
 
 /**
@@ -77,8 +75,6 @@ REG_OP(HcomAllReduce)
     .REQUIRED_ATTR(group, String)
     .ATTR(fusion, Int, 1)
     .ATTR(fusion_id, Int, -1)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomAllReduce)
 
 /**
@@ -91,7 +87,7 @@ REG_OP(HcomAllReduce)
   input of this rank will be broadcast to other ranks.
  * @li fusion: A required integer identifying if the op need to fusion,the 
   default value is none fusion
-  * @li fusion: A required integer identifying the fusion id if para fusion
+  * @li fusion_id: A required integer identifying the fusion id if para fusion
   is set.
  * @li group: A required string identifying the group name of ranks
   participating in the op.
@@ -109,10 +105,39 @@ REG_OP(HcomBroadcast)
     .REQUIRED_ATTR(group, String)
     .ATTR(fusion, Int, 0)
     .ATTR(fusion_id, Int, -1)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomBroadcast)
 
+/**
+ * @brief preforms reduction from others rank to rootrank
+ * @par Inputs:
+* @li root_rank: A required integer identifying the root rank in the op
+  the reduction result will be on this root rank
+ * x: A tensor. Must be one of the following types: int8, int16, int32, float16,
+  float32.
+ * @par Attributes:
+ * @li reduction: A required string identifying the reduction operation to
+  perform.The supported operation are: "sum", "max", "min", "prod".
+ * @li group: A required string identifying the group name of ranks
+  participating in the op.
+ * @li fusion: An optional integer identifying the fusion flag of the op.
+  0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id.
+ * @li fusion_id: An optional integer identifying the fusion id of the op.
+ * The HcomReduce ops with the same fusion id will be fused.
+ * @par Outputs:
+ * y: A Tensor. Has the same type as "x".
+ * @attention Constraints:
+ *"group" is limited to 128 characters. Use "hccl_world_group"
+  as the name of a world group.
+ */
+REG_OP(HcomReduce)
+    .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16}))
+    .REQUIRED_ATTR(root_rank, Int)
+    .REQUIRED_ATTR(reduction, String)
+    .REQUIRED_ATTR(group, String)
+    .ATTR(fusion, Int, 0)
+    .ATTR(fusion_id, Int, -1)
+    .OP_END_FACTORY_REG(HcomReduce)
 /**
  * @brief Performs reduction across all input tensors, scattering in equal
   blocks among ranks, each rank getting a chunk of data based on its rank
@@ -139,8 +164,6 @@ REG_OP(HcomReduceScatter)
     .REQUIRED_ATTR(reduction, String)
     .REQUIRED_ATTR(group, String)
     .REQUIRED_ATTR(rank_size, Int)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomReduceScatter)
 
 /**
@@ -167,8 +190,6 @@ REG_OP(HcomSend)
     .REQUIRED_ATTR(group, String)
     .REQUIRED_ATTR(sr_tag, Int)
     .REQUIRED_ATTR(dest_rank, Int)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomSend)
 
 /**
@@ -202,8 +223,6 @@ REG_OP(HcomReceive)
     .REQUIRED_ATTR(src_rank, Int)
     .REQUIRED_ATTR(shape, ListInt)
     .REQUIRED_ATTR(dtype, Type)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.0)
     .OP_END_FACTORY_REG(HcomReceive)
 
 /**
@@ -219,6 +238,15 @@ REG_OP(HcomRemoteRead)
     .REQUIRED_ATTR(dtype, Type)
     .OP_END_FACTORY_REG(HcomRemoteRead)
 
+/**
+ * @brief Performs Remote Ref Read of input tensors
+ * @par Inputs:
+ * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
+ * cache_var: The local base address
+ * local_offset: Skip step length
+ * @par Outputs:
+ * cache_var: The local base address
+ */
 REG_OP(HcomRemoteRefRead)
     .INPUT(remote, TensorType({DT_UINT64}))
     .INPUT(cache_var, TensorType({DT_UINT64}))
@@ -239,11 +267,90 @@ REG_OP(HcomRemoteWrite)
     .INPUT(local, TensorType::ALL())
     .OP_END_FACTORY_REG(HcomRemoteWrite)
 
+/**
+ * @brief Performs Remote Write of input tensors
+ * @par Inputs:
+ * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
+ * @par Inputs:
+ * local: A Tensor. whose value is length / size_of(Type)
+ */
 REG_OP(HcomRemoteScatterWrite)
     .INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
     .INPUT(local, TensorType::ALL())
     .OPTIONAL_INPUT(local_offset, TensorType({DT_UINT64}))
     .OP_END_FACTORY_REG(HcomRemoteScatterWrite)
 
+/**
+ * @brief All ranks send different amount of data to, and receive different
+  amount of data from, all ranks.
+ * @par Inputs:
+ * Five inputs, including:
+ * @li send_data: A tensor. the memory to send.
+ * @li send_counts: A list, where entry i specifies the number of elements in
+  send_data to send to rank i.
+ * @li send_displacements: A list, where entry i specifies the displacement
+  (offset from sendbuf) from which to send data to rank i.
+ * @li recv_counts: A list, where entry i specifies the number of 
+  elements to receive from rank i.
+ * @li recv_displacements: A list, , where entry i specifies the displacement
+  (offset from recv_data) to which data from rank i should be written.
+ * @par Outputs:
+ * recv_data: A Tensor  has same element type as send_data.
+ * @par Attributes:
+ * @li group: A string identifying the group name of ranks participating in
+  the op.
+* @attention all ranks participating in the op should be full-mesh networking
+  using the RDMA.
+ */
+REG_OP(HcomAllToAllV)
+    .INPUT(send_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .INPUT(send_counts, TensorType({DT_INT64}))
+    .INPUT(send_displacements, TensorType({DT_INT64}))
+    .INPUT(recv_counts, TensorType({DT_INT64}))
+    .INPUT(recv_displacements, TensorType({DT_INT64}))
+    .OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(group, String)
+    .OP_END_FACTORY_REG(HcomAllToAllV)
+
+/**
+ * @brief All ranks send different amount of data to, and receive different
+  amount of data from, all ranks. And concat all data descripting by addrinfo
+  togather into output gathered.
+ * @par Inputs:
+ * Four inputs, including:
+ * @li addrinfo: A tensor, descripting the memory info(address, length) to send.
+ * @li addrinfo_count_per_rank: A list, where entry i specifies the number of
+  elements in send_data to send to rank i.
+ * @li recv_counts: A list, where entry i specifies the number of 
+  elements to receive from rank i.
+ * @li recv_displacements: A list, , where entry i specifies the displacement 
+  (offset from recv_data) to which data from rank i should be written.
+ * @par Outputs:
+ * Two outputs, including:
+ * @li recv_data: A Tensor  has same element type as dtype.
+ * @li gathered: A Tensor  has same element type as dtype.
+ * @par Attributes:
+ * @li group: A string identifying the group name of ranks participating in
+  the op.
+ * @li dtype: Datatype of send buffer elements.
+ * @li addr_length: descripting the element memory length in the addrinfo.
+  -2: all element memory length in the addrinfo is the same, but it is unknown.
+  -1: all element memory length is unknown.
+  >0: all element memory length in the addrinfo is the same. the attr value is the memory length.
+ * @attention all ranks participating in the op should be full-mesh networking
+  using the RDMA.
+ */
+REG_OP(HcomGatherAllToAllV)
+    .INPUT(addrinfo, TensorType({DT_UINT64}))
+    .INPUT(addrinfo_count_per_rank, TensorType({DT_INT64}))
+    .INPUT(recv_counts, TensorType({DT_INT64}))
+    .INPUT(recv_displacements, TensorType({DT_INT64}))
+    .OUTPUT(recv_data, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .OUTPUT(gathered, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64}))
+    .REQUIRED_ATTR(group, String)
+    .REQUIRED_ATTR(dtype, Type)
+    .REQUIRED_ATTR(addr_length, Int)
+    .OP_END_FACTORY_REG(HcomGatherAllToAllV)
+
 } // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/hvd_ops.h b/third_party/fwkacllib/inc/ops/hvd_ops.h
index a49ec5ed..00299ef7 100644
--- a/third_party/fwkacllib/inc/ops/hvd_ops.h
+++ b/third_party/fwkacllib/inc/ops/hvd_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h
index ce3262f9..6909345a 100644
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,6 +24,22 @@
 #include "graph/operator_reg.h"
 
 namespace ge {
+/**
+*@brief Decode the frame(s) of a GIF-encoded image to a uint8 tensor . \n
+
+*@par Inputs:
+*@li contents:A Tensor of type string. 0-D. The GIF-encoded image. \n
+
+*@par Outputs:
+*image:A Tensor of type uint8. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow DecodeGif operator.
+*/
+REG_OP(DecodeGif)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image, TensorType({DT_UINT8}))
+    .OP_END_FACTORY_REG(DecodeGif)
 
 /**
 *@brief Adjust the hue of one or more images . \n
@@ -31,11 +47,12 @@ namespace ge {
 *@par Inputs:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
 interpretted as channels, and must be three. Inputs include:
-*@li images:A Tensor of type float. Images to adjust. At least 3-D.
+*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
+must be NHWC.
 *@li delta:A Tensor of type float. A float delta to add to the hue . \n
 
 *@par Outputs:
-*y:A Tensor of type float . \n
+*y:A Tensor of type float. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -57,11 +74,12 @@ REG_OP(AdjustHue)
 *@par Inputs:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
 interpretted as channels, and must be three. Inputs include:
-*@li images:A Tensor of type float. Images to adjust. At least 3-D.
+*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
+must be NHWC.
 *@li scale:A Tensor of type float. A float scale to add to the saturation . \n
 
 *@par Outputs:
-*y:A Tensor of type float . \n
+*y:A Tensor of type float. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -83,11 +101,12 @@ REG_OP(AdjustSaturation)
 *@par Inputs:
 *Input images is a tensor of at least 3 dimensions. The last 3 dimensions are
 interpreted as '[height, width, channels]'. Inputs include:
-*@li images:A Tensor of type float. Images to adjust. At least 3-D.
+*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format
+must be NHWC.
 *@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n
 
 *@par Outputs:
-*y:A Tensor of type float . \n
+*y:A Tensor of type float. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input images is a tensor of at least 3 dimensions. The last dimension is
@@ -112,7 +131,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n
 *Input images must be a 4-D tensor. Inputs include:
 *@li images:A Tensor. Must be one of the following types:uint8, uint16, int8,
 int16, int32, int64, float16, float, double. A 4-D tensor of shape
-[batch, image_height, image_width, depth].
+[batch, image_height, image_width, depth]. The format must be NHWC.
 *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
 *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with
 int32 values in [0, batch).
@@ -127,7 +146,7 @@ extrapolation, when applicable.
 NearestNeighbor . \n
 
 *@par Outputs:
-*y:A Tensor of type float . \n
+*y:A Tensor of type float. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input images must be a 4-D tensor . \n
@@ -193,7 +212,9 @@ boxes tensor . \n
 *@par Inputs:
 *Input images and grads must be a 4-D tensor. Inputs include:
 *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
+The format must be NHWC.
 *@li images: A 4-D tensor of shape [batch, image_height, image_width, depth].
+The format must be NHWC.
 Both image_height and image_width need to be positive.
 *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor
 specifies the coordinates of a box in the box_ind[i] image and is specified in
@@ -233,6 +254,7 @@ images tensor . \n
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
 *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth].
+The format must be NHWC.
 *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor
 specifies the coordinates of a box in the box_ind[i] image and is specified
 in normalized coordinates [y1, x1, y2, x2].
@@ -248,7 +270,8 @@ method: A string specifying the interpolation method. Only 'bilinear' is
 supported for now . \n
 
 *@par Outputs:
-*y:A 4-D tensor of shape [batch, image_height, image_width, depth] . \n
+*y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format
+must be NHWC. \n
 
 *@attention Constraints:
 *Input grads must be a 4-D tensor . \n
@@ -273,6 +296,7 @@ REG_OP(CropAndResizeGradImage)
 *@par Inputs:
 *Input x must be a 4-D tensor. Inputs include:
 *@li x: A 4-D float tensor of shape [batch_size, height, width, channels].
+The format must be NHWC.
 *@li size: A 1-D tensor of 2 elements containing the size of the glimpses to
 extract. The glimpse height must be specified first, following by the glimpse
 width.
@@ -293,7 +317,7 @@ uniform_noise . \n
 
 *@par Outputs:
 *y:A tensor representing the glimpses [batch_size, glimpse_height,
-glimpse_width, channels] . \n
+glimpse_width, channels]. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input x must be a 4-D tensor . \n
@@ -340,7 +364,8 @@ REG_OP(HSVToRGB)
 
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
-*@li images: 4-D with shape [batch, height, width, channels].
+*@li images: 4-D with shape [batch, height, width, channels]. The format must
+be NHWC.
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
 size for the images.
 *@li min: A Tensor of type float.
@@ -354,6 +379,7 @@ the values at the corner pixels. Defaults to false.
 
 *@par Outputs:
 *@li resized_images: 4-D with shape [batch, new_height, new_width, channels].
+The format must be NHWC.
 *@li y_min: A Tensor of type float.
 *@li y_max: A Tensor of type float . \n
 
@@ -381,7 +407,8 @@ REG_OP(QuantizedResizeBilinear)
 
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
-*@li images: 4-D with shape [batch, height, width, channels].
+*@li images: 4-D with shape [batch, height, width, channels]. The format must
+be NHWC.
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width.
 The new size for the images . \n
 
@@ -391,7 +418,8 @@ output tensors are aligned, preserving the values at the corner pixels.
 Defaults to false . \n
 
 *@par Outputs:
-*y: 4-D with shape [batch, new_height, new_width, channels] . \n
+*y: 4-D with shape [batch, new_height, new_width, channels]. The format must
+be NHWC. \n
 
 *@attention Constraints:
 *Input images can be of different types but output images are always float . \n
@@ -414,10 +442,10 @@ REG_OP(ResizeArea)
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
 *@li grads: A Tensor of type float. 4-D with shape [batch, height, width,
-channels].
+channels]. The format must be NHWC.
 *@li original_image: A Tensor. Must be one of the following types: float,
 double. 4-D with shape [batch, orig_height, orig_width, channels], The image
-tensor that was resized . \n
+tensor that was resized. The format must be NHWC. \n
 
 *@par Attributes:
 *@li align_corners: An optional bool. Defaults to False. If true, the centers
@@ -426,10 +454,10 @@ false.
 *@li half_pixel_centers: An optional bool. Defaults to False . \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as original_image . \n
+*y: A Tensor. Has the same type as original_image. The format must be NHWC. \n
 
 *@attention Constraints:
-*Input images can be of different types but output images are always float . \n
+*Input images can be of different types but output images are always float .
 
 *@par Third-party framework compatibility
 *Compatible with tensorflow ResizeBicubicGrad operator.
@@ -448,7 +476,8 @@ REG_OP(ResizeBicubicGrad)
 
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
-*@li images: 4-D with shape [batch, height, width, channels].
+*@li images: 4-D with shape [batch, height, width, channels]. The format
+must be NHWC.
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
 size for the images . \n
 
@@ -459,10 +488,11 @@ Defaults to false.
 *@li half_pixel_centers: An optional bool. Defaults to False . \n
 
 *@par Outputs:
-*y: 4-D with shape [batch, new_height, new_width, channels] . \n
+*y: 4-D with shape [batch, new_height, new_width, channels]. The format
+must be NHWC. \n
 
 *@attention Constraints:
-*Input images can be of different types but output images are always float . \n
+*Input images can be of different types but output images are always float .
 
 *@par Third-party framework compatibility
 *Compatible with tensorflow ResizeBicubic operator.
@@ -483,7 +513,7 @@ REG_OP(ResizeBicubic)
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
 *@li grads: A Tensor. Must be one of the following types: uint8, int8, int32,
-float16, float, double. 4-D with shape [batch, height, width, channels].
+float16, float, double. Must set the format, supported format list ["NCHW, NHWC"]
 *@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width.
 The original input size . \n
 
@@ -550,9 +580,8 @@ REG_OP(ResizeNearestNeighborV2GradD)
 
 *@par Inputs:
 *Input grads must be a 4-D tensor. Inputs include:
-*@li grads: A Tensor of type float32. 4-D with shape [batch, height, width,
-channels].
-*@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width,
+*@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"]
+*@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"]
 channels], The image tensor that was resized . \n
 
 *@par Attributes:
@@ -583,7 +612,7 @@ REG_OP(ResizeBilinearV2Grad)
 
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
-*@li x: 4-D with shape [batch, height, width, channels].
+*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new
 size for the images . \n
 
@@ -639,6 +668,62 @@ REG_OP(RGBToHSV)
 /**
 *@brief Generate a single randomly distorted bounding box for an image . \n
 
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li image_size: 1-D, containing [height, width, channels].
+*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
+boxes associated with the image. \n
+
+*@par Attributes:
+*@li seed: If either seed or seed2 are set to non-zero, the random number
+generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: A second seed to avoid seed collision.
+*@li min_object_covered: The cropped area of the image must contain at least
+this fraction of any bounding box supplied. The value of this parameter should
+be non-negative. In the case of 0, the cropped area does not need to overlap
+any of the bounding boxes supplied .
+*@li aspect_ratio_range: The cropped area of the image must have an aspect
+ratio = width / height within this range.
+*@li max_attempts: Number of attempts at generating a cropped region of the
+image of the specified constraints. After max_attempts failures, return the
+entire image.
+*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes
+supplied. If true, assume an implicit bounding box covering the whole input.
+If false, raise an error . \n
+
+*@par Outputs:
+*@li begin: 1-D, containing [offset_height, offset_width, 0].
+*@li size: 1-D, containing [target_height, target_width, -1].
+*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SampleDistortedBoundingBox operator.
+*/
+
+REG_OP(SampleDistortedBoundingBox)
+    .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .INPUT(bounding_boxes, TensorType({ DT_FLOAT }))
+    .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(bboxes, TensorType({ DT_FLOAT }))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .ATTR(min_object_covered, Float, 0.1f)
+    .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f })
+    .ATTR(area_range, ListFloat, { 0.05f, 1.0f })
+    .ATTR(max_attempts, Int, 100)
+    .ATTR(use_image_if_no_bounding_boxes, Bool, false)
+    .OP_END_FACTORY_REG(SampleDistortedBoundingBox)
+
+/**
+*@brief Generate a single randomly distorted bounding box for an image . \n
+
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li image_size: 1-D, containing [height, width, channels].
@@ -697,7 +782,7 @@ REG_OP(SampleDistortedBoundingBoxExt2)
 
 *@par Inputs:
 *Input x must be a 4-D tensor. Inputs include:
-*@li x: 4-D with shape [batch, height, width, channels].
+*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"].
 *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width.
 The new size for the images . \n
 
@@ -729,12 +814,12 @@ REG_OP(ResizeNearestNeighborV2)
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li images: A Tensor. Must be one of the following types: float. 4-D with
-shape [batch, height, width, depth]. A batch of images.
+shape [batch, height, width, depth]. A batch of images. The format must be NHWC.
 *@li boxes: A Tensor of type float32. 3-D with shape [batch,
 num_bounding_boxes, 4] containing bounding boxes . \n
 
 *@par Outputs:
-*A Tensor. Has the same type as images . \n
+*A Tensor. Has the same type as images. The format must be NHWC. \n
 
 *@attention Constraints:
 *Input images must be a 4-D tensor . \n
@@ -1002,6 +1087,88 @@ REG_OP(EncodePng)
     .ATTR(compression, Int, -1)
     .OP_END_FACTORY_REG(EncodePng)
 
+
+/**
+*@brief PNG-decode an image.
+*@par Inputs:
+*contents: 0-D. PNG-decoded image .
+
+*@par Attributes:
+*channels: graph channels \n
+*dtype: type of image
+
+*@par Outputs:
+*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels]
+where channels is: 1: for grayscale; 2: for grayscale + alpha; 3: for RGB;
+4: for RGBA . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow DecodePng operator.
+*/
+REG_OP(DecodePng)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image, TensorType({DT_UINT8, DT_UINT16}))
+    .ATTR(dtype, Type, DT_UINT8)
+    .ATTR(channels, Int, 0)
+    .OP_END_FACTORY_REG(DecodePng)
+
+/**
+*@brief Bmp-decode an image. \n
+
+*@par Inputs:
+*@li contents: A Tensor of type string. 0-D. The BMP-encoded image. \n
+
+*@par Attributes:
+*@li channels: Decode the desired number of color channels of the image. \n
+
+*@par Outputs:
+*image: A Tensor dtype of uint8.
+
+* @par Third-party framework compatibility
+* Compatible with tensorflow DecodeBmp operator.
+*/
+
+REG_OP(DecodeBmp)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image, TensorType({DT_UINT8}))
+    .ATTR(channels, Int, 0)
+    .OP_END_FACTORY_REG(DecodeBmp)
+
+/**
+*@brief Function parse image from string to int. \n
+
+*@par Inputs:
+*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
+*@li crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. \n
+
+*@par Attributes:
+*@li channels: An optional int. Defaults to 0. Number of color channels for the
+*decoded image.
+*@li ratio: An optional int. Defaults to 1. Downscaling ratio.
+*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower
+*but nicer upscaling of the chroma planes
+*@li try_recover_truncated: An optional bool. Defaults to False. If true try to
+*recover an image from truncated input.
+*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required
+fraction of lines before a truncated input is accepted.
+*@li dct_method: An optional string. Defaults to "". string specifying a hint
+*about the algorithm used for decompression. \n
+
+*@par Outputs:
+*image: A Tensor dtype of uint8.
+*/
+REG_OP(DecodeAndCropJpeg)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .INPUT(crop_window, TensorType({DT_INT32}))
+    .OUTPUT(image, TensorType({DT_UINT8}))
+    .ATTR(channels, Int, 0)
+    .ATTR(ratio, Int, 1)
+    .ATTR(fancy_upscaling, Bool, true)
+    .ATTR(try_recover_truncated, Bool, false)
+    .ATTR(acceptable_fraction, Float, 1.0)
+    .ATTR(dct_method, String, "")
+    .OP_END_FACTORY_REG(DecodeAndCropJpeg)
+
 /**
 *@brief Resizes "images" to "size" using bilinear interpolation . \n
 
@@ -1316,6 +1483,55 @@ REG_OP(CombinedNonMaxSuppression)
     .ATTR(clip_boxes, Bool, true)
     .OP_END_FACTORY_REG(CombinedNonMaxSuppression)
 
+/**
+*@brief Resizes "images" with "offset" using bilinear interpolation. \n
+
+*@par Inputs:
+*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`.
+*@li warp_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for offset point.
+
+*@par Outputs:
+*warp_img: A Tensor after resize. \n
+*/
+REG_OP(IMGWarp)
+    .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .INPUT(warp_offset, TensorType({DT_FLOAT32}))
+    .OUTPUT(warp_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .OP_END_FACTORY_REG(IMGWarp)
+
+/**
+*@brief Resizes "images" with "offset" using bilinear interpolation. \n
+
+*@par Inputs:
+*@li img: input image, A 4-D tensor of shape `[n, h, w, c]`.
+*@li map_offset: the resize offset A 4-D float tensor of shape `[n, h, w, 2]`, 2 means (x, y) for resize point.
+
+*@par Outputs:
+*map_img: A Tensor after resize. \n
+*/
+REG_OP(Remap)
+    .INPUT(img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .INPUT(map_offset, TensorType({DT_FLOAT32}))
+    .OUTPUT(map_img, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT32}))
+    .OP_END_FACTORY_REG(Remap)
+
+/**
+*@brief Resizes "images" with "offset" using bilinear interpolation. \n
+
+*@par Inputs:
+*@li img: input image, A 5-D tensor of shape `[n, 4, c, h, w]`,
+and 4 mean input[(h_top, w_left), (h_top, w_right), (h_bottom, w_left),  (h_bottom, w_right)].
+*@li warp_index: the resize offset A 4-D float tensor of shape `[n, 2, h, w]`, 2 means (x, y) for resize point.
+
+*@par Outputs:
+*remap_img: A Tensor after ResizeBilinear, A 4-D tensor of shape `[n, c, h, w]`. \n
+*/
+REG_OP(IMGWarpResize)
+    .INPUT(img, TensorType({DT_FLOAT32}))
+    .INPUT(warp_index, TensorType({DT_FLOAT32}))
+    .OUTPUT(warp_img, TensorType({DT_FLOAT32}))
+    .OP_END_FACTORY_REG(IMGWarpResize)
+
 /**
 *@brief Function spatial transformer . \n
 
@@ -1342,6 +1558,383 @@ REG_OP(SpatialTransformerD)
     .ATTR(use_default_theta, ListBool, {})
     .OP_END_FACTORY_REG(SpatialTransformerD)
 
-}  // namespace ge
+/**
+* @brief Resize the input tensor. \n
+currently, only support resize image tensor using nearest neighbor and linear interpolation.
+
+* @par Inputs:
+* Input x must be a 4-D tensor. Inputs include: \n
+* @li x: A Tensor. Must be one of the following types: uint8, int8, int16, \n
+int32, int64, float16, float, double. 4-D with shape [batch, height, width, channels] \n
+or shape [batch, channels, height, width].
+* @li roi: A 1-D float Tensor. only takes effect when attr coordinate_transformation_mode \n
+is "tf_crop_and_resize"
+* @li scales: A 1-D float Tensor, the scale array along each dimension, Only one of \n
+'scales' and 'sizes' can be specified.
+* @li sizes: A 1-D int64 Tensor, The size of the output tensor. nly one of \n
+'scales' and 'sizes' can be specified.  If 'size' is specified, then set scales \n
+to empty data (zero shape) in this operator's input list.
+
+* @par Attributes:
+* @li coordinate_transformation_mode: String. Defaults to half_pixel. how to transform \n
+the coordinate in the resized tensor to the coordinate in the original tensor. \n
+other optional: pytorch_half_pixel, align_corners, asymmetric, tf_half_pixel_for_nn, \n
+tf_crop_and_resize.
+* @li cubic_coeff_a: Float. Defaults to -0.75, only used in cubic interpolation. \n
+other optional: -0.5
+* @li exclude_outside: Int. Defaults to 0, If set to 1, the weight of sampling \n
+locations outside the tensor will be set to 0 and the weight will be renormalized \n
+so that their sum is 1.0.
+* @li extrapolation_value: Float. Defaults to 0.0f. When coordinate_transformation_mode \n
+is "tf_crop_and_resize" and x_original is outside the range [0, length_original - 1], \n
+this value is used as the corresponding output value.
+* @li mode: String. Defaults to nearest. Three interpolation modes: nearest (default), \n
+linear and cubic.
+* @li nearest_mode: String. Defaults to round_prefer_floor. Four modes: round_prefer_floor, \n
+round_prefer_ceil, floor, ceil. Only used by nearest interpolation.
+
+* @par Outputs:
+* y: A Tensor. Has the same type as x.
+
+* @attention Constraints: \n
+* Input x must be a 4-D tensor.
+
+* @par Third-party framework compatibility
+* Compatible with tensorflow ResizeNearestNeighborV2 operator.
+*/
+
+REG_OP(Resize)
+    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                                DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(scales, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(sizes, TensorType({DT_INT64}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
+                                DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(coordinate_transformation_mode, String, "half_pixel")
+    .ATTR(cubic_coeff_a, Float, -0.75)
+    .ATTR(exclude_outside, Int, 0)
+    .ATTR(extrapolation_value, Float, 0)
+    .ATTR(mode, String, "nearest")
+    .ATTR(nearest_mode, String, "round_prefer_floor")
+    .OP_END_FACTORY_REG(Resize)
+
+/**
+*@brief Function parse image from string to int. \n
+
+*@par Inputs:
+*@li contents: A Tensor of type string. 0-D. The JPEG-encoded image. \n
+
+*@par Attributes:
+*@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
+*@li ratio: An optional int. Defaults to 1. Downscaling ratio.
+*@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes
+*@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input.
+*@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted.
+*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n
+
+*@par Outputs:
+*image: A Tensor dtype of uint8.
+*/
+REG_OP(DecodeJpeg)
+    .INPUT(contents, TensorType({DT_STRING}))
+    .OUTPUT(image, TensorType({DT_UINT8}))
+    .ATTR(channels, Int, 0)
+    .ATTR(ratio, Int, 1)
+    .ATTR(fancy_upscaling, Bool, true)
+    .ATTR(try_recover_truncated, Bool, false)
+    .ATTR(acceptable_fraction, Float, 1.0)
+    .ATTR(dct_method, String, "")
+    .OP_END_FACTORY_REG(DecodeJpeg)
+
+/**
+*@brief Image warping using per-pixel flow vectors. \n
+
+*@par Inputs:
+*@li image: 4-D Tensor with shape `[batch, height, width, channels]`.
+*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
+
+*@par Outputs:
+*y: Returns 4-D with the same shape and dtype as `image`. \n
+*/
+REG_OP(DenseImageWarp)
+    .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DenseImageWarp)
+
+/**
+*@brief Calculate the resize_d function. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+*@par Attributes:
+*@li sizes: An optional listInt. \n
+*@li scales: An optional listFloat.
+    Defaults to none. \n
+*@li roi: An optional listInt.
+    Defaults to none. \n
+*@li coordinate_transformation_mode: An optional String.
+    Defaults to "half_pixel". \n
+*@li cubic_coeff_a: An optional float.
+    Defaults to -0.75. \n
+*@li exclude_outside: An optional int.
+    Defaults to 0. \n
+*@li extrapolation_value: An optional float.
+    Defaults to 0.0. \n
+*@li mode: An optional String.
+    Defaults to "nearest". \n
+*@li nearest_mode: An optional String.
+    Defaults to "round_prefer_floor". \n
+
+*@par Outputs:
+*y: A Tensor with the same type of x's,
+    shape depends on x and sizes. \n
+*/
+REG_OP(ResizeD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(sizes, ListInt)
+    .ATTR(scales, ListFloat, {})
+    .ATTR(roi, ListInt, {})
+    .ATTR(coordinate_transformation_mode, String, "half_pixel")
+    .ATTR(cubic_coeff_a, Float, -0.75)
+    .ATTR(exclude_outside, Int, 0)
+    .ATTR(extrapolation_value, Float, 0.0)
+    .ATTR(mode, String, "nearest")
+    .ATTR(nearest_mode, String, "round_prefer_floor")
+    .OP_END_FACTORY_REG(ResizeD)
+
+/**
+*@brief Calculate the resize_grad_d function. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li grads: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+*@par Attributes:
+*@li original_size: An optional listInt. \n
+*@li roi: An optional listInt.
+    Defaults to none. \n
+*@li scales: An optional listFloat.
+    Defaults to none. \n
+*@li coordinate_transformation_mode: An optional String.
+    Defaults to "half_pixel". \n
+*@li cubic_coeff_a: An optional float.
+    Defaults to -0.75. \n
+*@li exclude_outside: An optional int.
+    Defaults to 0. \n
+*@li extrapolation_value: An optional float.
+    Defaults to 0.0. \n
+*@li mode: An optional String.
+    Defaults to "nearest". \n
+*@li nearest_mode: An optional String.
+    Defaults to "round_prefer_floor". \n
+
+*@par Outputs:
+*y: A Tensor with the same type of x's,
+    shape depends on x and sizes. \n
+*/
+REG_OP(ResizeGradD)
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(original_size, ListInt)
+    .ATTR(roi, ListInt, {})
+    .ATTR(scales, ListFloat, {})
+    .ATTR(coordinate_transformation_mode, String, "half_pixel")
+    .ATTR(cubic_coeff_a, Float, -0.75)
+    .ATTR(exclude_outside, Int, 0)
+    .ATTR(extrapolation_value, Float, 0.0)
+    .ATTR(mode, String, "nearest")
+    .ATTR(nearest_mode, String, "round_prefer_floor")
+    .OP_END_FACTORY_REG(ResizeGradD)
+
+/**
+*@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n
+
+*@par Inputs:
+*@li grad: gradients with respect to DenseImageWarp output.
+*@li image: 4-D Tensor with shape `[batch, height, width, channels]`.
+*@li flow: 4-D Tensor with shape `[batch, height, width, 2]`. \n
+
+*@par Outputs:
+*grad_image: Returns 4-D with the same shape and dtype as `image`.
+*grad_flow: Returns 4-D with the same shape and dtype as `flow`. \n
+*/
+REG_OP(DenseImageWarpGrad)
+    .INPUT(grad, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(image, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(flow, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DenseImageWarpGrad)
+
+/**
+*@brief This operation samples input X by using interpolation based on flow field grid,
+ which is usually gennerated by affine_grid. The grid of shape [N, H, W, 2] is the concatenation of
+ (x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of
+ input data x and y is indexng the 3rd dimention (in height dimension), finally results is
+ the interpolation value of 4 nearest corner points. The output tensor shape will be [N, C, H, W].
+
+*@par Inputs:
+*@li x: 4-D Tensor with shape `[batch, channels, height, width]`.
+*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.
+
+*@par Attributes:
+*@li interpolation_mode: An optional string specifying the interpolation method. Only 'bilinear' is
+ supported for now .
+*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
+*@li align_corners: An optional bool. If "true", the centers of the corner
+ pixels of the input and output tensors are aligned. Defaults to "false" .
+
+*@par Outputs:
+*y: Returns 4-D Tensor with the same dtype as `X`.
+
+*@par Third-party framework compatibility
+*Compatible with pytorch GridSampler2D operator.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(GridSampler2D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(interpolation_mode, String, "bilinear")
+    .ATTR(padding_mode, String, "zeros")
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(GridSampler2D)
+
+/**
+*@brief This operation unnormalize input Grid, which is usually gennerated by affine_grid.
+
+*@par Inputs:
+*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.
+*@li assist: Assist matrix, a 4-D tensor of type float16.
+
+*@par Attributes:
+*@li align_corners: An optional bool. If "true", the centers of the corner
+ pixels of the input and output tensors are aligned. Defaults to "false" .
+
+*@par Outputs:
+*diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
+*position: Returns 4-D Tensor with the same shape as `grid`.
+*/
+REG_OP(GridUnnormal)
+    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(assist, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(diff, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(position, TensorType({DT_INT32}))
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(GridUnnormal)
+
+/**
+*@brief This operation unfold input X based on unnormalized grid, which is gennerated by GridUnnormal.
+
+*@par Inputs:
+*@li x: 4-D Tensor with shape `[batch, channels, height, width]`.
+*@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`.
+
+*@par Attributes:
+*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
+
+*@par Outputs:
+*y: Returns 4-D Tensor with the same dtype as `x`.
+*/
+REG_OP(ImageUnfold)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(position, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(padding_mode, String, "zeros")
+    .OP_END_FACTORY_REG(ImageUnfold)
+	
+/**
+*@brief This operation select images to warp_images according to offsets.
+
+*@par Inputs:
+*@li images: 4-D Tensor with shape `[batch, height, width, 3]`.
+*@li offsets: 4-D Tensor with shape `[batch, 4, new_height, new_width]`.
+
+*@par Outputs:
+*warp_images: Returns 5-D Tensor with shape
+`[batch, 4, new_height, new_width, 3]` and the same dtype as `images`.
+*/
+REG_OP(IMGWarpOffsets)
+    .INPUT(images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT}))
+    .INPUT(offsets, TensorType({DT_FLOAT, DT_INT32}))
+    .OUTPUT(warp_images, TensorType({DT_UINT8, DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(IMGWarpOffsets)
+
+/**
+*@brief This operation samples 3d input x by using interpolation based on flow field grid,
+ which is usually gennerated by affine_grid.
+
+*@par Inputs:
+*@li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`.
+*@li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`.
+
+*@par Attributes:
+*@li interpolation_mode: An optional string specifying the interpolation method.
+*@li padding_mode: An optional string specifying the pad method.
+*@li align_corners: An optional bool. If "true", the centers of the corner
+ pixels of the input and output tensors are aligned. Defaults to "false" .
+
+*@par Outputs:
+*y: Returns 5-D Tensor with the same dtype as `x`.
+
+*@par Third-party framework compatibility
+*Compatible with pytorch GridSampler3D operator.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(GridSampler3D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(interpolation_mode, String, "bilinear")
+    .ATTR(padding_mode, String, "zeros")
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(GridSampler3D)
 
+/**
+*@brief Computes the gradients of GridSampler3D.
+
+*@par Inputs:
+*@li grad: 5-D Tensor with shape `[batch, channels, depth, height, width]`.
+*@li x: 5-D Tensor with shape `[batch, channels, depth, height, width]`.
+*@li grid: flow field grid, 5-D Tensor with shape `[batch, depth, height, width, 2]`.
+
+*@par Attributes:
+*@li interpolation_mode: An optional string specifying the interpolation method.
+*@li padding_mode: An optional string specifying the pad method.
+*@li align_corners: An optional bool. If "true", the centers of the corner
+ pixels of the input and output tensors are aligned. Defaults to "false" .
+
+*@par Outputs:
+*dx: Returns 5-D Tensor with the same dtype and shape as `x`.
+*dgrid: Returns 5-D Tensor with the same dtype and shape as `grid`.
+
+*@par Third-party framework compatibility
+*Compatible with pytorch GridSampler3DGrad operator.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(GridSampler3DGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(dx, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(dgrid, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .ATTR(interpolation_mode, String, "bilinear")
+    .ATTR(padding_mode, String, "zeros")
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(GridSampler3DGrad)
+
+}  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h
index 9dde14a5..bcc3f1c3 100644
--- a/third_party/fwkacllib/inc/ops/internal_ops.h
+++ b/third_party/fwkacllib/inc/ops/internal_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h
index 7a6fbc59..69c77bf6 100644
--- a/third_party/fwkacllib/inc/ops/linalg_ops.h
+++ b/third_party/fwkacllib/inc/ops/linalg_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,8 +61,8 @@ REG_OP(CholeskyGrad)
 
 *@par Inputs:
 *The input x has to be symmetric and positive definite.Inputs include:
-*x:A Tensor. Must be one of the following types: double, float32. Shape
-is [..., M, M] . \n
+*x:A Tensor. Must be one of the following types: double, float32, float16,
+complex64, complex128. Shape is [..., M, M] . \n
 
 *@par Outputs:
 *y:A Tensor. Has the same type as x . \n
@@ -76,10 +76,31 @@ form square matrices.
 */
 
 REG_OP(Cholesky)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(Cholesky)
 
+/**
+*@brief Computes the outer product of two 1D vectors . \n
+
+*@par Inputs:
+*The input x1 and x2 has to be a 1D vector.Inputs include:
+*@li x1:A Tensor. Must be one of the following types: float16, float32. 
+Shape is [N] . \n
+*@li x2:A Tensor. Must have the same type as x. Shape is [M] . \n
+
+*@par Outputs:
+*y:A Tensor. Has the same type as x . \n
+*/
+
+REG_OP(Ger)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(Ger)
+
 /**
 *@brief Computes the sign and the log of the absolute value of the determinant
 of one or more square matrices . \n
@@ -87,8 +108,8 @@ of one or more square matrices . \n
 *@par Inputs:
 *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
-*x:A Tensor. Must be one of the following types: double, float32. Shape is
-[..., M, M] . \n
+*x:A Tensor. Must be one of the following types: double, float32,
+complex64, complex128. Shape is [..., M, M] . \n
 
 *@par Outputs:
 *@li y:A Tensor. Has the same type as x.
@@ -103,9 +124,9 @@ form square matrices. \n
 */
 
 REG_OP(LogMatrixDeterminant)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(LogMatrixDeterminant)
 
 /**
@@ -114,8 +135,8 @@ REG_OP(LogMatrixDeterminant)
 *@par Inputs:
 *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
-*x:A Tensor. Must be one of the following types: double, float32. Shape is
-[..., M, M] . \n
+*x:A Tensor. Must be one of the following types: double, float32, complex64,
+complex128. Shape is [..., M, M] . \n
 
 *@par Outputs:
 *y:A Tensor. Has the same type as x . \n
@@ -129,8 +150,8 @@ form square matrices.
 */
 
 REG_OP(MatrixDeterminant)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(MatrixDeterminant)
 
 /**
@@ -140,8 +161,7 @@ their adjoints (conjugate transposes) . \n
 *@par Inputs:
 *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
-*x:A Tensor. Must be one of the following types: double, float. Shape is
-[..., M, M] . \n
+*x:A Tensor of input. Shape is [..., M, M] . \n
 
 *@par Attributes:
 *adjoint:An optional bool. Defaults to False.Boolean indicating whether to
@@ -159,8 +179,8 @@ form square matrices.  \n
 */
 
 REG_OP(MatrixInverse)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(adjoint, Bool, false)
     .OP_END_FACTORY_REG(MatrixInverse)
 
@@ -169,8 +189,7 @@ REG_OP(MatrixInverse)
 
 *@par Inputs:
 *The input rhs must have the same type as matrix. Inputs include:
-*@li matrix:A Tensor. Must be one of the following types: double, float.
-Shape is [..., M, M].
+*@li matrix:A Tensor of input. Shape is [..., M, M].
 *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n
 
 *@par Attributes:
@@ -189,9 +208,9 @@ dimensions form square matrices.  \n
 */
 
 REG_OP(MatrixSolve)
-    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(adjoint, Bool, false)
     .OP_END_FACTORY_REG(MatrixSolve)
 
@@ -221,8 +240,8 @@ dimensions form square matrices.  \n
 */
 
 REG_OP(MatrixSolveLs)
-    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .INPUT(l2, TensorType({DT_DOUBLE}))
     .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
     .ATTR(fast, Bool, true)
@@ -234,8 +253,7 @@ matrices by backsubstitution . \n
 
 *@par Inputs:
 *The input rhs must have the same type as matrix. Inputs include:
-*@li matrix: A Tensor. Must be one of the following types: double, float.
-Shape is [..., M, M].
+*@li matrix: A Tensor. Shape is [..., M, M].
 *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n
 
 *@par Attributes:
@@ -256,9 +274,9 @@ dimensions form square matrices.  \n
 */
 
 REG_OP(MatrixTriangularSolve)
-    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(lower, Bool, true)
     .ATTR(adjoint, Bool, false)
     .OP_END_FACTORY_REG(MatrixTriangularSolve)
@@ -268,8 +286,7 @@ REG_OP(MatrixTriangularSolve)
 
 *@par Inputs:
 *The input shape of x must be [..., M, N]. Inputs include:
-*x:A Tensor whose shape is [..., M, N]. Must be one of the following types:
-double, float . \n
+*x:A Tensor whose shape is [..., M, N]. \n
 
 *@par Attributes:
 *full_matrices: An optional bool. Defaults to False. If true, compute
@@ -289,9 +306,12 @@ dimensions form matrices of size [M, N].  \n
 */
 
 REG_OP(Qr)
-    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
-    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
-    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
+    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
     .ATTR(full_matrices, Bool, false)
     .OP_END_FACTORY_REG(Qr)
 
@@ -320,12 +340,40 @@ form square matrices.   \n
 */
 
 REG_OP(SelfAdjointEig)
-    .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT }))
-    .OUTPUT(eigen_value, TensorType({ DT_DOUBLE, DT_FLOAT }))
-    .OUTPUT(eigen_vector, TensorType({ DT_DOUBLE, DT_FLOAT }))
+    .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(eigen_value, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(eigen_vector, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
     .ATTR(compute_v, Bool, true)
     .OP_END_FACTORY_REG(SelfAdjointEig)
 
+/**
+*@brief Computes the sign and the log of the absolute value of the determinant
+of one or more square matrices . \n
+
+*@par Inputs:
+*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
+form square matrices. Inputs include:
+*x:A Tensor. Must be one of the following types: double, float32, float16
+Shape is [..., M, M] . \n
+
+*@par Outputs:
+*@li y:A Tensor. Has the same type as x.
+*@li sign:A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
+form square matrices. \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow LogMatrixDeterminant operator.
+*/
+
+REG_OP(Slogdet)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(sign, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
+    .OP_END_FACTORY_REG(Slogdet)
+
 /**
 *@brief Computes the singular value decompositions of one or more matrices . \n
 
@@ -384,8 +432,8 @@ of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n
 */
 
 REG_OP(Lu)
-    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OUTPUT(p, TensorType({DT_INT32, DT_INT64}))
     .REQUIRED_ATTR(output_idx_type, Type)
     .OP_END_FACTORY_REG(Lu)
@@ -404,8 +452,8 @@ y: Shape is `[..., M, M]` . \n
 */
 
 REG_OP(MatrixSquareRoot)
-    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(MatrixSquareRoot)
 
 /**
@@ -424,9 +472,9 @@ y: Tensor of shape `[..., M, K]` containing the solutions \n
 */
 
 REG_OP(TridiagonalSolve)
-    .INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(diagonals, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(partial_pivoting, Bool, true)
     .OP_END_FACTORY_REG(TridiagonalSolve)
 
diff --git a/third_party/fwkacllib/inc/ops/list_ops.h b/third_party/fwkacllib/inc/ops/list_ops.h
new file mode 100644
index 00000000..a1b622e9
--- /dev/null
+++ b/third_party/fwkacllib/inc/ops/list_ops.h
@@ -0,0 +1,504 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*!
+ * \file list_ops.h
+ * \brief
+ */
+#ifndef OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
+#define OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
+
+#include <algorithm>
+#include "graph/operator_reg.h"
+#include "graph/operator.h"
+
+namespace ge {
+
+/**
+*@brief Creates and returns an empty tensor list. \n
+
+*@par Inputs:
+*@li element_shape: A shape compatible with that of elements in the list.
+*@li max_num_elements: The maximum number of elements. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li handle: An empty tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow EmptyTensorList operator.
+*/
+REG_OP(EmptyTensorList)
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(max_num_elements, TensorType({DT_INT32}))
+    .OUTPUT(handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(EmptyTensorList)
+
+/**
+*@brief Returns a list which has the passed-in `Tensor` as last element
+and the other elements of the given list in `input_handle`. \n
+
+*@par Inputs:
+*@li input_handle: The old list.
+*@li tensor: The tensor to put on the list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle:A list with the elements of old list followed by tensor. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListPushBack operator.
+*/
+REG_OP(TensorListPushBack)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListPushBack)
+
+/**
+*@brief The last element of the input list as well as a
+list with all but that element. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li element_shape: A shape compatible with that of elements in the list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle:A list with the elements of the old list followed by tensor.
+*@li tensor:The withdrawn last element of the list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListPopBack operator.
+*/
+REG_OP(TensorListPopBack)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(element_shape, TensorType({DT_INT32}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListPopBack)
+
+/**
+*@brief The number of tensors in the input tensor list. \n
+
+*@par Inputs:
+*@li input_handle: The input list. \n
+
+*@par Outputs:
+*@li length:The number of tensors in the list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListLength operator.
+*/
+REG_OP(TensorListLength)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(length, TensorType({DT_INT32}))
+    .OP_END_FACTORY_REG(TensorListLength)
+
+/**
+*@brief The shape of elements in the input tensor list. \n
+
+*@par Inputs:
+*@li input_handle: The input list. \n
+
+*@par Attributes:
+*@li shape_type: The type of shape in the list. \n
+
+*@par Outputs:
+*@li element_shape:A shape compatible with that of elements in the list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListElementShape operator.
+*/
+REG_OP(TensorListElementShape)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .OUTPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .ATTR(shape_type, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListElementShape)
+
+/**
+*@brief List of the given size with empty elements. \n
+
+*@par Inputs:
+*@li element_shape: A shape compatible with that of elements in the list.
+*@li num_elements: The number of elements to reserve. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list.
+*@li shape_type: The type of shape in the list. \n
+
+*@par Outputs:
+*@li handle: An output tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListReserve operator.
+*/
+REG_OP(TensorListReserve)
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(num_elements, TensorType({DT_INT32}))
+    .OUTPUT(handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .ATTR(shape_type, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListReserve)
+
+/**
+*@brief Get input tensor list elements of index position. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li index: A tensor of position.
+*@li element_shape: A shape compatible with that of elements in the list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li item: An output tensor value of index position . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListGetItem operator.
+*/
+REG_OP(TensorListGetItem)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(index, TensorType({DT_INT32}))
+    .INPUT(element_shape, TensorType({DT_INT32}))
+    .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListGetItem)
+
+/**
+*@brief Sets the index-th position of the list to contain the given tensor. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li index: The position in the list to which the tensor will be assigned.
+*@li item: The element to be assigned to that position. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle: An output tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListSetItem operator.
+*/
+REG_OP(TensorListSetItem)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(index, TensorType({DT_INT32}))
+    .INPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListSetItem)
+
+/**
+*@brief Push tensor to list. \n
+
+*@par Inputs:
+*@li input_handles: The input tensor lists.
+*@li tensor: The tensor push into tensor list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handles: The output tensor lists. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListPushBackBatch operator.
+*/
+REG_OP(TensorListPushBackBatch)
+    .INPUT(input_handles, TensorType({DT_VARIANT}))
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(output_handles, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListPushBackBatch)
+
+/**
+*@brief Stacks all tensors in the list. \n
+
+*@par Inputs:
+*@li input_handle: The input tensor list.
+*@li element_shape: A shape compatible with that of elements in the tensor. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list.
+*@li num_elements: The number of elements in the list. \n
+
+*@par Outputs:
+*@li tensor: The tensor of list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListStack operator.
+*/
+REG_OP(TensorListStack)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(element_shape, TensorType({DT_INT32}))
+    .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .ATTR(num_elements, Int, -1)
+    .OP_END_FACTORY_REG(TensorListStack)
+
+/**
+*@brief Concats all tensors in the list along the 0th dimension.
+Requires that all tensors have the same shape except the first dimension. \n
+
+*@par Inputs:
+*@li input_handle: The input list.
+*@li element_shape: The shape of the uninitialized elements in the list.
+If the first dimension is not -1, it is assumed that all list elements have
+the same leading dim.
+*@li leading_dims: The list of leading dims of uninitialized list elements. Used if
+the leading dim of input_handle.element_shape or the element_shape input arg
+is not already set. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li tensor: The concated result.
+*@li lengths: Output tensor containing sizes of the 0th dimension of tensors
+in the list, used for computing the gradient. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListConcatV2 operator.
+*/
+REG_OP(TensorListConcatV2)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(leading_dims, TensorType({DT_INT64}))
+    .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(lengths, TensorType({DT_INT64}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListConcatV2)
+
+/**
+*@brief Splits a tensor into a list. \n
+
+*@par Inputs:
+*@li tensor: The input tensor.
+*@li element_shape: A shape compatible with that of elements in the tensor.
+*@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle: The list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListSplit operator.
+*/
+REG_OP(TensorListSplit)
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(lengths, TensorType({DT_INT64}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListSplit)
+
+/**
+*@brief Creates a TensorList which, when stacked, has the value of `tensor`. \n
+
+*@par Inputs:
+*@li tensor: The input tensor.
+*@li element_shape: The shape of elements in the list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle: An output tensor list . \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListFromTensor operator.
+*/
+REG_OP(TensorListFromTensor)
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListFromTensor)
+
+/**
+*@brief Resizes the list. \n
+
+*@par Inputs:
+*@li input_handle: The input tensor list.
+*@li size: size of the output list. \n
+
+*@par Outputs:
+*@li output_handle: The output tensor list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListResize operator.
+*/
+REG_OP(TensorListResize)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(size, TensorType({DT_INT32}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .OP_END_FACTORY_REG(TensorListResize)
+
+/**
+*@brief Creates a Tensor by indexing into the TensorList. \n
+
+*@par Inputs:
+*@li input_handle: The input tensor list.
+*@li indices: The indices used to index into the list.
+*@li element_shape: The shape of elements in the list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li values: The tensor. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListGather operator.
+*/
+REG_OP(TensorListGather)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(element_shape, TensorType({DT_INT32}))
+    .OUTPUT(values, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListGather)
+
+/**
+*@brief Creates a TensorList by indexing into a Tensor. \n
+
+*@par Inputs:
+*@li tensor: The input tensor.
+*@li indices: The indices used to index into the list.
+*@li element_shape: The shape of the elements in the list (can be less specified than
+the shape of the tensor).
+*@li num_elements: The size of the output list. Must be large enough to accommodate
+the largest index in indices. If -1, the list is just large enough to include
+the largest index in indices. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle: The TensorList. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListScatterV2 operator.
+*/
+REG_OP(TensorListScatterV2)
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(element_shape, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(num_elements, TensorType({DT_INT32}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListScatterV2)
+
+/**
+*@brief Scatters tensor at indices in an input list. \n
+
+*@par Inputs:
+*@li input_handle: The input tensor list.
+*@li tensor: The input tensor.
+*@li indices: The indices used to index into the list. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output_handle: The TensorList. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListScatterIntoExistingList operator.
+*/
+REG_OP(TensorListScatterIntoExistingList)
+    .INPUT(input_handle, TensorType({DT_VARIANT}))
+    .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
+        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
+        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(output_handle, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListScatterIntoExistingList)
+
+/**
+*@brief Concat two tensor lists to a new tensor list. \n
+
+*@par Inputs:
+*@li input_a: The input tensor list A.
+*@li input_b: The input tensor list B. \n
+
+*@par Attributes:
+*@li element_dtype: The type of elements in the list. \n
+
+*@par Outputs:
+*@li output: The output list. \n
+
+*@par Third-party framework compatibility.
+*Compatible with tensorflow TensorListConcatLists operator.
+*/
+REG_OP(TensorListConcatLists)
+    .INPUT(input_a, TensorType({DT_VARIANT}))
+    .INPUT(input_b, TensorType({DT_VARIANT}))
+    .OUTPUT(output, TensorType({DT_VARIANT}))
+    .ATTR(element_dtype, Type, DT_INT32)
+    .OP_END_FACTORY_REG(TensorListConcatLists)
+}   // namespace ge
+
+#endif  // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h
index bc8ae2b8..03be7757 100644
--- a/third_party/fwkacllib/inc/ops/logging_ops.h
+++ b/third_party/fwkacllib/inc/ops/logging_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h
index b37ab048..5d928e5a 100644
--- a/third_party/fwkacllib/inc/ops/lookup_ops.h
+++ b/third_party/fwkacllib/inc/ops/lookup_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h
index 149e0e37..319bcf70 100644
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -222,6 +222,24 @@ REG_OP(Bucketize)
     .REQUIRED_ATTR(boundaries, ListFloat)
     .OP_END_FACTORY_REG(Bucketize)
 
+/**
+*@brief Returns a new tensor with the truncated integer values of the elements of input. \n
+
+*@par Inputs:
+*One inputs, including:
+*   @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n
+
+*@par Outputs:
+*y: A tensor with the same type and shape of input_x \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Trunc. \n
+*/
+REG_OP(Trunc)
+    .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
+    .OP_END_FACTORY_REG(Trunc)
+	
 /**
 *@brief Computes the sum along sparse segments of a tensor . \n
 
@@ -365,6 +383,27 @@ REG_OP(GetNext)
     .ATTR(channel_name, String, "")
     .OP_END_FACTORY_REG(GetNext)
 
+/**
+*@brief Get dynamic dims after GetNext. \n
+
+*@par Inputs:
+*input: A nested structure of Tensor objects, from GetNext's output. \n
+
+*@par Attributes:
+*@li shape_info: GE shape_info for each inputs, -1 means unknow dim.
+*@li N: Inputs number. \n
+
+*@par Outputs:
+*dims: GE unknow dims, a vector of int64. \n
+*/
+
+REG_OP(GetDynamicDims)
+    .DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(dims, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(shape_info, ListInt)
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(GetDynamicDims)
+
 /**
 *@brief End of sequence . \n
 
@@ -494,6 +533,29 @@ REG_OP(NextAfter)
     .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
     .OP_END_FACTORY_REG(NextAfter)
 
+/**
+*@brief Calculate the P-norm distance between vectors  function. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+*@par Attributes:
+*@li  p: An optional float.Defaults to 2. \n
+
+*@par Outputs:
+*y: A Tensor with the same type and shape of input_x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Pdist. \n
+*/
+REG_OP(Pdist)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Float, 2.0)
+    .OP_END_FACTORY_REG(Pdist)
+
 /**
  *@brief Compute element-wise finiteness, return a boolean tensor.
 
@@ -624,6 +686,7 @@ REG_OP(NLLLoss)
     .OUTPUT(y, TensorType({DT_FLOAT}))
     .OUTPUT(total_weight, TensorType({DT_FLOAT}))
     .ATTR(reduction, String, "mean")
+    .ATTR(ignore_index, Int, -100)
     .OP_END_FACTORY_REG(NLLLoss)
 
 /**
@@ -653,6 +716,7 @@ REG_OP(NLLLossGrad)
     .INPUT(total_weight, TensorType({DT_FLOAT}))
     .OUTPUT(x_grad, TensorType({DT_FLOAT}))
     .ATTR(reduction, String, "mean")
+    .ATTR(ignore_index, Int, -100)
     .OP_END_FACTORY_REG(NLLLossGrad)
 
 /**
@@ -710,6 +774,9 @@ REG_OP(IFMR)
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(WtsARQ)
@@ -741,6 +808,9 @@ REG_OP(WtsARQ)
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(ActsULQ)
@@ -748,8 +818,8 @@ REG_OP(ActsULQ)
   .INPUT(clamp_min, TensorType({DT_FLOAT16, DT_FLOAT}))
   .INPUT(clamp_max, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-  .OUTPUT(clamp_min_mask, TensorType({DT_BOOL}))
-  .OUTPUT(clamp_max_mask, TensorType({DT_BOOL}))
+  .OUTPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
   .ATTR(fixed_min, Bool, false)
   .ATTR(num_bits, Int, 8)
@@ -768,12 +838,15 @@ REG_OP(ActsULQ)
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(ActsULQInputGrad)
   .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
-  .INPUT(clamp_min_mask, TensorType({DT_BOOL}))
-  .INPUT(clamp_max_mask, TensorType({DT_BOOL}))
+  .INPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
+  .INPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(x_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OP_END_FACTORY_REG(ActsULQInputGrad)
 
@@ -790,11 +863,14 @@ REG_OP(ActsULQInputGrad)
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(ActULQClampMaxGrad)
   .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
-  .INPUT(clamp_max_mask, TensorType({DT_BOOL}))
+  .INPUT(clamp_max_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
   .INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(clamp_max_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OP_END_FACTORY_REG(ActULQClampMaxGrad)
@@ -812,15 +888,208 @@ REG_OP(ActULQClampMaxGrad)
 
 *@par Third-party framework compatibility
 *Compatible with mindspore
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 
 REG_OP(ActULQClampMinGrad)
   .INPUT(y_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
-  .INPUT(clamp_min_mask, TensorType({DT_BOOL}))
+  .INPUT(clamp_min_mask, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT}))
   .INPUT(x_clamped_loss, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
   .OP_END_FACTORY_REG(ActULQClampMinGrad)
 
+/**
+* @brief Computes Lp norm.
+
+* @par Inputs:
+* @li x: An ND tensor of type float16, float32. \n
+*
+* @par Attributes:
+* @li p: Int, "inf" or "-inf", default value is 2.
+* @li axes: ListInt, {} means all axes will be computed.
+* @li keepdim: Bool, default is false.
+* @li epsilon: Float, default is 1e-12. \n
+
+* @par Outputs:
+* @li y: An ND tensor of type float16, float32. The shape of y is depending
+* on axes and keepdim. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator LpNorm.
+*/
+REG_OP(LpNorm)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Int, 2)
+    .ATTR(axes, ListInt, {})
+    .ATTR(keepdim, Bool, false)
+    .ATTR(epsilon, Float, 1e-12)
+    .OP_END_FACTORY_REG(LpNorm)
+
+/**
+* @brief get complex.
+
+* @par Inputs:
+* @li real: An ND tensor of type  float32. double
+* @li imag: An ND tensor of type  float32. double \n
+*
+* @par Outputs:
+* @li out: An ND tensor of type complex64, complex128 \n
+*/
+REG_OP(Complex)
+    .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(Tout, Type, DT_COMPLEX64)
+    .OP_END_FACTORY_REG(Complex)
+
+/**
+* @brief  deal complex.
+
+* @par Inputs:
+* @li input: An ND tensor of type complex64, complex128 \n
+*
+* @par Outputs:
+* @li output: An ND tensor of type float32. double \n
+*/
+REG_OP(Imag)
+    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(Tout, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(Imag)
+
+/**
+* @brief  deal complex.
+
+* @par Inputs:
+* @li input: An ND tensor of type complex64, complex128 \n
+*
+* @par Outputs:
+* @li output: An ND tensor of type float32. double \n
+*/
+REG_OP(Angle)
+    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(Tout, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(Angle)
+
+/**
+*@brief Computes the gradient of SoftMarginLossGrad. \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li predict: A tensor. Must be one of the following types:
+*     float16, float32. \n
+* @li label: A tensor with same shape of predict. Must be one of the following types:
+*     float16, float32. \n
+* @li dout: A tensor with same shpae of predcit. Must be one of the following types:
+*     float16, float32. \n
+
+*@par Attributes:
+* @li reduction: Specifies the reduction to apply to the output:
+*     'none' | 'mean' | 'sum'. Default: 'mean'. \n
+
+*@par Outputs:
+* gradient: A Tensor with the same type of predict. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator SoftMarginLoss Backward. \n
+*/
+REG_OP(SoftMarginLossGrad)
+    .INPUT(predict, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(dout, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SoftMarginLossGrad)
+
+/**
+*@brief Calculate the cross product of two tensors. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x1: A tensor. Must be one of the following types:
+*     float16, float32, int32, int8, uint8, int16. \n
+* @li x2: A tensor. Must be one of the following types:
+*     float16, float32, int32, int8, uint8, int16. \n
+
+*@par Attributes:
+*@li dim: the dimination of compute.Defaults to -65530. \n
+
+*@par Outputs:
+*y: A Tensor with the same type and shape of x1's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator cross. \n
+*/
+REG_OP(Cross)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_INT16}))
+    .ATTR(dim, Int, -65530)
+    .OP_END_FACTORY_REG(Cross)
+
+/**
+ *@brief Computes batched the p-norm distance between each pair of
+ *the two collections of row vectors. \n
+
+ *@par Inputs:
+ *Two inputs, including:
+ * @li x1: A tensor with shpae: BxPXM. Must be one of the following types:
+ *     float16, float32. \n
+ * @li x2: A tensor with shpae: BxRxM. Must be one of the following types:
+ *     float16, float32. \n
+
+ *@par Attributes:
+ * @li p: An optional float >= 0 or inf. Defaults to 2.0. \n
+
+ *@par Outputs:
+ * y: A Tensor with the same type of x1's and with shape BxPxR. \n
+
+ *@par Third-party framework compatibility
+ *Compatible with the Pytorch operator Cdist. \n
+ */
+REG_OP(Cdist)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(p, Float, 2.0)
+    .OP_END_FACTORY_REG(Cdist)
+
+/**
+*@brief  Computes the grad of x1 in cdist. \n
+
+*@par Inputs:
+*Four inputs, including:
+ * @li grad: Grad with shape BxPxR. Must be one of the following types:
+*     float16, float32. \n
+* @li x1: A tensor with shpae: BxPXM. Must be one of the following types:
+*     float16, float32. \n
+* @li x2: A tensor with shpae: BxRxM. Must be one of the following types:
+*     float16, float32. \n
+* @li cdist: Output tensor of cdist forward with shpae: BxPXR.
+*     Must be one of the following types: float16, float32. \n
+
+*@par Attributes:
+* @li p: An optional float >= 0 or inf. Defaults to 2.0. \n
+
+*@par Outputs:
+* y: A Tensor with the same type and shape of x1's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Cdist Backward. \n
+*/
+REG_OP(CdistGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(cdist, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(p, Float, 2.0)
+    .OP_END_FACTORY_REG(CdistGrad)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index ed23d3f6..b317be37 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,8 +38,8 @@ namespace ge {
 * float32, int32. Has format [ND, NHWC] . \n
 
 *@par Attributes:
-*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
-*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
+*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
+*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
 
 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
@@ -70,8 +70,8 @@ REG_OP(MatMul)
 * float32, int32. Has format [ND, NHWC] . \n
 
 *@par Attributes:
-*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
-*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
+*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
+*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
 
 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
@@ -91,6 +91,36 @@ REG_OP(MatMulV2)
     .ATTR(offset_x, Int, 0)
     .OP_END_FACTORY_REG(MatMulV2)
 
+/**
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A matrix Tensor. 2D. Must be one of the following types: int8.
+* @li x2: A matrix Tensor. 2D. Must be one of the following types: int8.
+* @li compress_index: A compress index matrix of type int8.
+* @li bias: A 1D Tensor. Must be one of the following types: int32, float16.
+
+*@par Attributes:
+*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
+*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
+
+*@par Outputs:
+*y: The result matrix Tensor. 2D. Must be one of the following types: float16,
+* int32. \n
+
+*/
+REG_OP(MatMulV2Compress)
+    .INPUT(x1, TensorType({DT_INT8}))
+    .INPUT(x2, TensorType({DT_INT8}))
+    .INPUT(compress_index, TensorType({DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_INT32, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_FLOAT16}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .ATTR(transpose_x1, Bool, false)
+    .ATTR(transpose_x2, Bool, false)
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(MatMulV2Compress)
 
 /**
 *@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n
@@ -149,15 +179,15 @@ REG_OP(GEMM)
 *@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
 
 *@par Inputs:
-*Three inputs, including:
+*Two inputs, including:
 * @li x1: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
 * @li x2: A matrix Tensor. Must be one of the following types: float16,
 * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
 
 *@par Attributes:
-*@li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
-*@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
+*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
+*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
 
 *@par Outputs:
 *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
@@ -175,6 +205,42 @@ REG_OP(BatchMatMul)
     .ATTR(adj_x2, Bool, false)
     .OP_END_FACTORY_REG(BatchMatMul)
 
+
+/**
+* @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x1: A matrix Tensor. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ].
+* @li x2: A matrix Tensor. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
+* @li bias: A matrix Tensor. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n
+
+* @par Attributes:
+* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
+* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
+
+* @par Outputs:
+* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator BatchMatmul.
+*/
+
+REG_OP(BatchMatMulV2)
+    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .ATTR(adj_x1, Bool, false)
+    .ATTR(adj_x2, Bool, false)
+    .ATTR(offset_x, Int, 0)
+    .OP_END_FACTORY_REG(BatchMatMulV2)
+
 /**
 *@brief Computes half the L2 norm of a tensor without the sqrt . \n
 
@@ -334,7 +400,7 @@ REG_OP(MatrixSetDiagD)
  * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32,
  * uint64
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32, int64
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor.
 *Must be one of the following types: float16, float32, int8, uint8, double,
  * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32,
@@ -378,6 +444,9 @@ REG_OP(ScatterNdUpdate)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TensorScatterUpdate.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TensorScatterUpdate)
     .INPUT(x, TensorType::BasicType())
@@ -386,6 +455,34 @@ REG_OP(TensorScatterUpdate)
     .OUTPUT(y, TensorType::BasicType())
     .OP_END_FACTORY_REG(TensorScatterUpdate)
 
+/**
+*@brief Uses "updates" to update tensor "data" by "indices". \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li data: An ND Tensor . \n
+*Must be one of the following types: float16, float32, int32, int8, uint8
+*@li indices: An ND Tensor of type int32 or int64
+*@li updates: An Tensor. Same shape as indices. format:NCHW, NHWC . \n
+*Must be one of the following types: float16, float32, int32, int8, uint8
+
+*@par Attributes:
+*@li axis: An optional attribute. Defaults to 0.
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "data" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the ONNX operator ScatterElements.
+*/
+REG_OP(ScatterElements)
+    .INPUT(data, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .ATTR(axis, Int, 0)
+    .OP_END_FACTORY_REG(ScatterElements)
+
 /**
 *@brief Adds sparse "updates" to a variable reference . \n
 
@@ -394,7 +491,7 @@ REG_OP(TensorScatterUpdate)
 *@li var: An ND Tensor . \n
 
 *Must be one of the following types: float16, float32, int32, int8, uint8
-*@li indices: An ND Tensor of type int32 or int64.
+*@li indices: An ND Tensor of type int32 or int64
 
 
 *@li updates: An Tensor. format:NCHW, NHWC . \n
@@ -412,10 +509,10 @@ REG_OP(TensorScatterUpdate)
 * Compatible with the TensorFlow operator ScatterAdd.
 */
 REG_OP(ScatterAdd)
-    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .INPUT(indices, TensorType::IndexNumberType())
-    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterAdd)
 
@@ -428,7 +525,7 @@ REG_OP(ScatterAdd)
 *Must be one of the following types: float16, float, int32, int8, uint8
 
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 
@@ -443,10 +540,10 @@ REG_OP(ScatterAdd)
 * Compatible with the TensorFlow operator ScatterDiv.
 */
 REG_OP(ScatterDiv)
-    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterDiv)
 
@@ -458,7 +555,7 @@ REG_OP(ScatterDiv)
 *@li var: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@par Attributes:
@@ -472,10 +569,10 @@ REG_OP(ScatterDiv)
 * Compatible with the TensorFlow operator ScatterNdAdd.
 */
 REG_OP(ScatterNdAdd)
-    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .INPUT(indices, TensorType::IndexNumberType())
-    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterNdAdd)
 
@@ -499,6 +596,9 @@ REG_OP(ScatterNdAdd)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TensorScatterAdd.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TensorScatterAdd)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
@@ -515,7 +615,7 @@ REG_OP(TensorScatterAdd)
 *@li var: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32, int64
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 
@@ -530,10 +630,10 @@ REG_OP(TensorScatterAdd)
 * Compatible with the TensorFlow operator ScatterNdSub.
 */
 REG_OP(ScatterNdSub)
-    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .INPUT(indices, TensorType::IndexNumberType())
-    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterNdSub)
 
@@ -557,6 +657,9 @@ REG_OP(ScatterNdSub)
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator TensorScatterSub.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TensorScatterSub)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
@@ -573,7 +676,7 @@ REG_OP(TensorScatterSub)
 *@li var: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32, int64
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@par Attributes:
@@ -587,10 +690,10 @@ REG_OP(TensorScatterSub)
 * Compatible with the TensorFlow operator ScatterSub.
 */
 REG_OP(ScatterSub)
-    .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .INPUT(indices, TensorType::IndexNumberType())
-    .INPUT(updates, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterSub)
 
@@ -761,7 +864,7 @@ REG_OP(ConfusionMatrix)
 *@li var: An ND Tensor.
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor . \n
 
 *Must be one of the following types: float16, float, int32, int8, uint8
@@ -778,7 +881,7 @@ REG_OP(ConfusionMatrix)
 */
 REG_OP(ScatterMul)
     .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
-    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(indices, TensorType::IndexNumberType())
     .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
@@ -791,13 +894,13 @@ REG_OP(ScatterMul)
 *@par Inputs:
 * Three inputs, including:
 *@li var: An ND Tensor.
-*Must be one of the following types: float16, float, int32
+*Must be one of the following types: float16, float, int32, int8, uint8
 
 *@li indices: An ND Tensor.
-*Must be one of the following types: int32
+*Must be one of the following types: int32 or int64
 
 *@li updates: An ND Tensor.
-*Must be one of the following types: float16, float, int32
+*Must be one of the following types: float16, float, int32, int8, uint8
 
 *@par Attributes:
 *use_locking: An optional bool. Defaults to "False". If "True", the operation
@@ -810,10 +913,10 @@ REG_OP(ScatterMul)
 * Compatible with the TensorFlow operator ScatterMin.
 */
 REG_OP(ScatterMin)
-    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
-    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterMin)
 
@@ -824,13 +927,13 @@ REG_OP(ScatterMin)
 * Three inputs, including:
 *@li var: An ND Tensor . \n
 
-*Must be one of the following types: float16, float, int32
+*Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An NCHW, NHWC, or ND Tensor . \n
 
-*Must be one of the following types: int32
+*Must be one of the following types: int32 or int64
 *@li updates: An NCHW, NHWC, or ND Tensor . \n
 
-*Must be one of the following types: float16, float, int32
+*Must be one of the following types: float16, float, int32, int8, uint8
 
 *@par Attributes:
 *use_locking: An optional bool. Defaults to "False".
@@ -843,10 +946,10 @@ REG_OP(ScatterMin)
 * Compatible with the TensorFlow operator ScatterMax.
 */
 REG_OP(ScatterMax)
-    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
-    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterMax)
 
@@ -860,7 +963,7 @@ REG_OP(ScatterMax)
 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor . \n
 
-*Must be one of the following types: int32
+*Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor . \n
 
 *Must be one of the following types: float16, float, int32, int8, uint8
@@ -876,10 +979,10 @@ REG_OP(ScatterMax)
 * Compatible with the TensorFlow operator ScatterUpdate.
 */
 REG_OP(ScatterUpdate)
-    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
-    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
+    .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .INPUT(indices, TensorType::IndexNumberType())
+    .INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterUpdate)
 
@@ -979,6 +1082,137 @@ REG_OP(MatrixDiagV2)
     .OUTPUT(output, TensorType::BasicType())
     .OP_END_FACTORY_REG(MatrixDiagV2)
 
+/**
+* @brief Add updates to var_out according to axis and indices.
+
+* @par Inputs:
+* Three inputs, including:
+* @li var: A Tensor. Must be one of the following types:
+*     float16, float32, int32, int8, uint8.
+* @li indices: A Tensor of the indices, type should be int32.
+* @li updates: A Tensor of the same type as "var".
+
+* @par Attributes:
+* @li axis: An required int to specify the axis to perform indices add.
+
+* @par Outputs:
+* @li var_out: A Tensor. Same as input "var".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator index_add.
+
+* @par Restrictions:
+* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(IndexAdd)
+    .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .OUTPUT(var_out, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .ATTR(axis, Int, 0)
+    .OP_END_FACTORY_REG(IndexAdd)
+
+/**
+*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types:
+*    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
+*@li diagonal:(int, optional) – the diagonal to consider。\n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator Triu.
+*/
+REG_OP(Triu)
+    .INPUT(x, TensorType::BasicType())
+    .ATTR(diagonal, Int, 0)
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Triu)
+
+/**
+*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types:
+*    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
+*@li diagonal:(int, optional) – the diagonal to consider。\n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator Tril.
+*/
+REG_OP(Tril)
+    .INPUT(x, TensorType::BasicType())
+    .ATTR(diagonal, Int, 0)
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Tril)
+/**
+*@brief Concatenates a list of N tensors along the first dimension.
+*@par Inputs:
+* Two inputs, including:
+* @li values: A list of Tensors. Must be one of the following types:  int32, float16, float32.
+*     Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
+*     It's a dynamic input.
+* @li shape: A Tensor of the same type as "x".
+* The final shape of the result. Should be equal to the shapes of any input
+* but with the number of input values in the first dimension . \n
+
+*@par Attributes:
+*equation: The subscripts for the Einstein summation. \n
+*N: tensor size of input \n
+
+*@par Outputs:
+*@li y: Sums the product of the elements of the input operands along dimensions specified
+ using a notation based on the Einstein summation convention. \n
+
+*@attention Constraints:
+*Input N must be Int. \n
+
+*@par Third-party framework compatibility
+*Compatible with Pytorch einsum operator.
+*/
+REG_OP(Einsum)
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(equation, String)
+    .REQUIRED_ATTR(N, Int)
+    .OP_END_FACTORY_REG(Einsum)
+
+/**
+*@brief Returns a 2-D tensor with ones on the diagonal and zeros elsewhere. \n
+
+*@par Inputs:
+*No inputs
+
+*@par Attributes:
+*@li num_rows: An required int. \n
+*@li num_columns: An optional int.Defaults to 0. \n
+*@li batch_shape: An optional ListInt.Defaults to []. \n
+*@li dtype: An optional int.Defaults to 0. \n
+
+*@par Outputs:
+*y: A Tensor with targeted type and shape. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Eye. \n
+*/
+REG_OP(Eye)
+    .OUTPUT(y, TensorType::BasicType())    /* "Result, has targeted element type" */
+    .REQUIRED_ATTR(num_rows, Int)
+    .ATTR(num_columns, Int, 0)
+    .ATTR(batch_shape, ListInt, {})
+    .ATTR(dtype, Int, 0)
+    .OP_END_FACTORY_REG(Eye)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
index 0c6a5dff..9629976e 100644
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -144,6 +144,64 @@ REG_OP(BatchNorm)
 /**
 *@brief Performs batch normalization . \n
 
+*@par Inputs:
+* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
+*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
+*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NDC1HWC0. Specifies the scaling factor.
+*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NC1HWC0. Specifies the offset.
+*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
+operation is used for training.
+*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be
+5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
+if the operation is used for training . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
+*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
+*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
+
+*@par Outputs:
+* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
+*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
+*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NDC1HWC0. Specifies the mean of "x".
+*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
+Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x".
+*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
+Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
+*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
+then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
+
+*@par Third-party framework compatibility
+*@li Compatible with the TensorFlow operator fused_batch_norm.
+*@li Compatible with the TensorFlow operator fused_batch_norm_v2.
+*/
+REG_OP(BatchNorm3D)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NCDHW")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNorm3D)
+/**
+*@brief Performs batch normalization . \n
+
 *@par Inputs:
 * Five inputs, including: (NHWC or NCHW supported)
 *@li x: A 4D Tensor of type float16 or float32.
@@ -242,6 +300,52 @@ REG_OP(BatchNormGrad)
 /**
 *@brief Performs the backpropagation of BatchNorm . \n
 
+*@par Inputs:
+* Five inputs, including:
+*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient.
+*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0.
+*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0.
+*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm.
+*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
+*@li data_format: An optional string. Defaults to "NCDHW".
+*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n
+
+*@par Outputs:
+*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
+*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale".
+*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset".
+*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output.
+*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be operator BatchNorm . \n
+
+*@see BatchNorm
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad.
+*/
+REG_OP(BatchNorm3DGrad)
+    .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_5, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NCDHW")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNorm3DGrad)
+
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
 *@par Inputs:
 * Five inputs, including:
 *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient.
@@ -315,35 +419,7 @@ REG_OP(BNInference)
     .ATTR(use_global_stats, Bool,true)
     .ATTR(mode, Int,1)
     .OP_END_FACTORY_REG(BNInference)
-/**
-*@brief aicpu batch normalization host  . \n
 
-*@par Inputs:
-
-*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
-*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
-*@li momentum: An optional float, mean and variance's Scale factor
-*@par Attributes:
-*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
-*@li use_global_stats: mean inference mode , only can be "True".
-*@li mode: An optional attr, not use
-*@par Outputs:
-*@li alpha: A Tensor of type float16 or float32 for the cpu calculate mean
-*@li beta: A Tensor of type float16 or float32 for the cpu calculate variance
-*/
-REG_OP(BnHost)
-    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .ATTR(epsilon, Float, 0.00001)
-    .ATTR(mode, Int, 1)
-    .ATTR(use_global_stats, Bool, true)
-    .OUTPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(mu, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OP_END_FACTORY_REG(BnHost)
 /**
 *@brief Performs batch normalization . \n
 
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index 35296870..98473c65 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -365,6 +365,25 @@ REG_OP(BiasAddGrad)
  * 4-D with shape [batch, out_height, out_width, out_channels]
  * or [batch, out_channels, out_height, out_width].
  * Gradients with respect to the output of the convolution.
+ *\n
+ *\n
+ * The following are the supported data types and data formats:
+*@verbatim
+    | Tensor    | out_bckprop | filter  | y
+    ------------|-------------|---------|--------
+    | Data Type | float16     | float16 | float16
+    |           |-------------|---------|--------
+    |           | float32     | float32 | float32
+    |           |-------------|---------|--------
+    |           | float64     | float64 | float64
+    ------------|-------------|---------|--------
+    | Format    | NCHW        | NCHW    | NCHW
+    |           | NHWC        | HWCN    | NHWC
+@endverbatim
+ * For float32 and float64 type, the actual calculation on the chip is based on
+ * float16.
+ *\n
+ *
 *@par Attributes:
  * Five attributes:
  * @li strides: A tuple/list of 4 integers. The stride of the sliding window
@@ -377,8 +396,53 @@ REG_OP(BiasAddGrad)
  * channels.
  * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
  * "NHWC". Specify the data format of the input and output data.
+ *\n
+ *\n
+ * The following value range restrictions must be met:
+*@verbatim
+    | Name             | Field    | Scope
+    -------------------|----------|--------------
+    | input_size       | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Filter           | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | out_backprop     | H*strideH| [1, 4096]
+    |                  | W*strideW| [1, 4096]
+    -------------------|----------|--------------
+    | y(fmap)          | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Stride           | H        | [1, 63]
+    |                  | W        | [1, 63]
+    -------------------|----------|--------------
+    | Padding          | Top      | [0, 255]
+    |                  | Bottom   | [0, 255]
+    |                  | Left     | [0, 255]
+    |                  | Right    | [0, 255]
+    -------------------|----------|--------------
+    | Dilation         | H        | [1, 255]
+    |                  | W        | [1, 255]
+
+@endverbatim
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
+ *\n
+ *
 *@par Outputs:
  * y: A Tensor. Has the same type as filter,and has same format as input_size.
+ *\n
+ *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (fmap_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv2d_backprop_input
 */
@@ -454,6 +518,21 @@ REG_OP(Conv2DBackpropInputD)
  * @li bias: An optional tensor. Must have the same type as "y".
  * @li offset_w: An optional 1D tensor for quantized deconvolution.
  * Type is int8. Reserved.\n
+ *\n
+ *\n
+ * The following are the supported data types and data formats:
+*@verbatim
+    | Tensor    | x       | filter  | bias    | y
+    ------------|---------|---------|---------|--------
+    | Data Type | float16 | float16 | float16 | float16
+    |           |---------|---------|---------|--------
+    |           | int8    | int8    | int32   | int32
+    ------------|---------|---------|---------|--------
+    | Format    | NCHW    | NCHW    | ND      | NCHW
+@endverbatim
+ * For int8, a dequant or requant operator must be followed.
+ *\n
+ *
 *@par Attributes:
  * Six attributes:
  * @li strides: A tuple or list of 2 integers. The stride of the sliding window
@@ -467,9 +546,54 @@ REG_OP(Conv2DBackpropInputD)
  * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n
   Specify the data format of the input and output data.
  * @li offset_x: An optional integer for quantized deconvolution.
- * Defaults to "0".
+ * The negative offset added to the input image for int8 type. Ensure offset_x
+ * within the effective range of int8 [-128, 127]. Defaults to "0".
+ *\n
+ *\n
+ * The following value range restrictions must be met:
+*@verbatim
+    | Name             | Field    | Scope
+    -------------------|----------|--------------
+    | x (out_backprop) | H*strideH| [1, 4096]
+    |                  | W*strideW| [1, 4096]
+    -------------------|----------|--------------
+    | Filter           | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | y (fmap)         | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Stride           | H        | [1, 63]
+    |                  | W        | [1, 63]
+    -------------------|----------|--------------
+    | Padding          | Top      | [0, 255]
+    |                  | Bottom   | [0, 255]
+    |                  | Left     | [0, 255]
+    |                  | Right    | [0, 255]
+    -------------------|----------|--------------
+    | Dilation         | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | Offset_x         |          | [-128, 127]
+
+@endverbatim
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
+ *\n
+ *
 *@par Outputs:
  * y: A Tensor. 4D tensor with shape [batch, channels, height, width].
+ *\n
+ *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (fmap_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
  * When type of x is float16, the type of y must be float16.
  * When type of x is int8, the type of y must be int32.
 */
@@ -502,6 +626,25 @@ REG_OP(Deconvolution)
  * [batch, out_height, out_width, out_channels] or [batch, out_channels,
  * out_height, out_width]. Gradients with respect to the output of the
  * convolution.
+ *\n
+ *\n
+ * The following are the supported data types and data formats:
+*@verbatim
+    | Tensor    | x       | out_backprop | y
+    ------------|---------|--------------|---------
+    | Data Type | float16 |    float16   | float16
+    |           |---------|--------------|---------
+    |           | float32 |    float32   | float32
+    |           |---------|--------------|---------
+    |           | float64 |    float64   | float64
+    |-----------|---------|--------------|---------
+    | Format    | NCHW    |     NCHW     | NCHW
+    |           | NHWC    |     NHWC     | HWCN
+@endverbatim
+ * For float32 and float64 type of x and outbackprop, the actual calculation on the chip
+ * is based on float16.
+ *\n
+ *
 *@par Attributes:
  * Five attributes:
  * @li strides: A tuple/list of 4 integers. The stride of the sliding window
@@ -514,8 +657,52 @@ REG_OP(Deconvolution)
  * channels.
  * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to
  * "NHWC". Specify the data format of the input and output data.
+ *\n
+*\n
+* The following value range restrictions must be met:
+*@verbatim
+    | Name             | Field    | Scope
+    -------------------|----------|--------------
+    | x(fmap)          | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Filter Size      | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | out_backprop     | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | y                | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Stride           | H        | [1, 63]
+    |                  | W        | [1, 63]
+    -------------------|----------|--------------
+    | Padding          | Top      | [0, 255]
+    |                  | Bottom   | [0, 255]
+    |                  | Left     | [0, 255]
+    |                  | Right    | [0, 255]
+    -------------------|----------|--------------
+    | Dilation         | H        | [1, 255]
+    |                  | W        | [1, 255]
+
+@endverbatim
+ * In Ascend910, out_backprop's H and W not support 1 when
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ *\n
+ *
 *@par Outputs:
  * y: A Tensor. Has the same type as x, has the same format as filter_size.
+ *\n
+ *     out_backprop_height = (in_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (in_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv2d_backprop_filter
 */
@@ -597,16 +784,14 @@ REG_OP(Conv2DBackpropFilterD)
     | Tensor    | x       | filter  | bias    | y
     ------------|---------|---------|---------|--------
     | Data Type | float16 | float16 | float16 | float16
-    |           |---------|---------|---------|--------
     |           | float32 | float32 | float32 | float32
-    |           |---------|---------|---------|--------
     |           | int8    | int8    | int32   | int32
     ------------|---------|---------|---------|--------
     | Format    | NCHW    | NCHW    | ND      | NCHW
     |           | NHWC    | HWCN    |         | NHWC
 @endverbatim
 * For float32 type, the actual calculation on the chip is based on
-* float16. For int8, a dequant or requant operator must be followed.
+* float16.
 *\n
 *
 *@par Attributes:
@@ -617,8 +802,7 @@ REG_OP(Conv2DBackpropFilterD)
 * (top, bottom, left, right) side of the input.
 *@li dilations: Optional. A list of 4 integers. The dilation factor for each
 * dimension of input. The dimension order is determined by the data format of
-* "x". The N and C dimensions must be set to 1. The H and W dimensions must be
-* set to 1 for int8 type. Defaults to [1, 1, 1, 1].
+* "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1].
 *@li groups: Optional. An integer of type int32. The number of blocked
 * connections from input channels to output channels. In_channels and
 * out_channels must both be divisible by "groups". Defaults to 1.
@@ -652,6 +836,8 @@ REG_OP(Conv2DBackpropFilterD)
     | Offset_x         |          | [-128, 127]
 
 @endverbatim
+* The W dimension of the input image supports cases exceeding 4096, but it may
+* cause compilation errors.
 *\n
 *
 *@par Outputs:
@@ -666,21 +852,6 @@ REG_OP(Conv2DBackpropFilterD)
 *     out_width = (in_width + pad_left + pad_right -
 *                  (dilation_w * (filter_width - 1) + 1))
 *                 / stride_w + 1
-*
-*@attention Constraints:
-*@li The following restrictions on the output must be met:
-*@verbatim
-    | Output  | Restrictions
-    ----------|--------------------------------
-    | H == 1  | H * W(input) == H * W(filter)
-    | W == 1  |
-    ----------|--------------------------------
-    | H != 1  | W(input) == W(filter)
-    | W == 1  | Only for Ascend310 Hi3796V300CS
-@endverbatim
-* "H * W (input)" indicates the image size after padding and "H * W (filter)"
-* indicates the filter size after dilation."W(input)" and W(filter) indicate
-* the same rule on the W dimension.
 *\n
 *
 *@par Quantization supported or not
@@ -778,7 +949,7 @@ REG_OP(Conv2DCompress)
 * With the format "HWCN" , the data is stored in the order of: [filter_height,
 * filter_width, in_channels / groups, out_channels].
 *@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format
-* "NHWC", the data is stored in the order of: [batch, in_height, in_width,
+* "NHWC", the data is stored in the order of: [batch, out_height, out_width,
 * deformable_groups * filter_height * filter_width * 3].
 *@li bias: An optional 1D tensor of additive biases to the filter outputs.
 * The data is stored in the order of: [out_channels].
@@ -816,31 +987,20 @@ REG_OP(Conv2DCompress)
 *@li deformable_groups: Optional. An integer of type int32. The number of
 * deformable group partitions. In_channels must be divisible by
 * "deformable_groups". Defaults to 1.
+*@li modulated: Optional. Specify version of DeformableConv2D, true means v2,
+* false means v1, currently only support v2.
 *\n
 *\n
 * The following value range restrictions must be met:
 *@verbatim
     | Name              | Field  | Scope
     --------------------|--------|----------------------------
-    | Input Image Size  | H      | [1, 100000]
-    |                   | W      | [1, 4096]
-    --------------------|--------|----------------------------
-    | Filter Size       | H      | [1, 255]
-    |                   | W      | [1, 255]
+    | Input Image Size  | H      | [1, 100000 / filter_height]
+    |                   | W      | [1, 4096 / filter_width]
     --------------------|--------|----------------------------
-    | Stride            | H      | [1, 63]
+    | Filter Size       | H      | [1, 63]
     |                   | W      | [1, 63]
-    --------------------|--------|----------------------------
-    | Padding           | Top    | [0, 255]
-    |                   | Bottom | [0, 255]
-    |                   | Left   | [0, 255]
-    |                   | Right  | [0, 255]
-    ------------ -------|--------|----------------------------
-    | Dilation          | H      | [1, 255]
-    |                   | W      | [1, 255]
 @endverbatim
-* "W(input)" indicate the image width after padding and W(filter) indicates the
-* filter width after dilation.
 *\n
 *
 *@par Outputs:
@@ -855,21 +1015,7 @@ REG_OP(Conv2DCompress)
 *     out_width = (in_width + pad_left + pad_right -
 *                  (dilation_w * (filter_width - 1) + 1))
 *                 / stride_w + 1
-*
-*@attention Constraints:
-*@li The following restrictions on the output must be met:
-*@verbatim
-    | Output  | Restrictions
-    ----------|--------------------------------
-    | H == 1  | H * W(input) == H * W(filter)
-    | W == 1  |
-    ----------|--------------------------------
-    | H != 1  | W(input) == W(filter)
-    | W == 1  | Only for Ascend310 Hi3796V300CS
-@endverbatim
-* "H * W(input)" indicates the image size after padding and "H * W(filter)"
-* indicates the filter size after dilation. "W(input)" and W(filter) indicate
-* the same rule on the W dimension.
+*\n
 *
 *@par Quantization supported or not
 *@li No
@@ -891,6 +1037,7 @@ REG_OP(DeformableConv2D)
     .ATTR(groups, Int, 1)
     .ATTR(data_format, String, "NHWC")
     .ATTR(deformable_groups, Int, 1)
+    .ATTR(modulated, Bool, true)
     .OP_END_FACTORY_REG(DeformableConv2D)
 
 /**
@@ -916,12 +1063,12 @@ REG_OP(DeformableConv2D)
 
 *@par Attributes:
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * @li dilations: A list of 5 integers. Specifies the dilation factor for each
- * dimension of "x", now only support [1,1,1,1,1]
- * The N and C dimensions must be 1. Has the same format as "x".
+ * dimension of "x".
+ * The N, C and D dimensions must be 1. Has the same format as "x".
  * @li offset_x: An optional int. Input offset, used for quantized inference.
  * Defaults to 0. Reserved . \n
 
@@ -967,8 +1114,8 @@ REG_OP(Conv3D)
 
 *@par Required Attributes:
  * @li strides: A list of 5 integers. Specifies the stride of the sliding window
- * for each dimension of "x".
- * The N and C dimensions must be 1. Has the same format as "x".
+ * for each dimension of "out_backprop".
+ * The N and C dimensions must be 1. Has the same format as "out_backprop".
  * @li pads: A list of 6 integers.
  * Supports only padding along the D, H and W dimensions in sequence of head,
  * tail, top, bottom, left and right . \n
@@ -976,14 +1123,15 @@ REG_OP(Conv3D)
 *@par Attributes:
  * Three attributes:
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * @li dilations: A tuple/list of 5 integers, The dilation factor for each
- * dimension of the input, now only support [1,1,1,1,1]
+ * dimension of the input.
+ * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
 
 *@par Outputs:
- * y: A Tensor. Has the same type as filter,and has same format as input_size
+ * y: A Tensor. Has the same type as filter,and has same format as "input_size"
 
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv3d_backprop_input
@@ -1011,8 +1159,8 @@ REG_OP(Conv3DBackpropInput)
 
 *@par Required Attributes:
  * @li strides: A list of 5 integers. Specifies the stride of the sliding window
- * for each dimension of "x".
- * The N and C dimensions must be 1. Has the same format as "x".
+ * for each dimension of "out_backprop".
+ * The N and C dimensions must be 1. Has the same format as "out_backprop".
  * @li pads: A list of 6 integers. Supports only padding along the D, H and W
  * dimensions in sequence of head, tail, top, bottom, left and right.
  * @li input_size: A tuple/list of type int32, int64. An integer vector
@@ -1023,13 +1171,14 @@ REG_OP(Conv3DBackpropInput)
 *@par Attributes:
  * Three attributes:
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * @li dilations: A tuple/list of 5 integers, The dilation factor for each
- * dimension of input, now only support [1,1,1,1,1]
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
 *@par Outputs:
- * y: A Tensor. Has the same type and data format as out_backprop.
+ * y: A Tensor. Has the same type and data format as "out_backprop".
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv3d_backprop_input
 
@@ -1072,9 +1221,7 @@ REG_OP(Conv3DBackpropInputD)
 * @li c_t: A optinal Tensor dtype of float16, float32. The cell state at time t . \n
 
 *@par Third-party framework compatibility:
-* Compatible with the Pytorch operator adds.
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+* Compatible with the Caffe operator LSTM.
 */
 REG_OP(LSTM)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -1121,14 +1268,15 @@ REG_OP(LSTM)
 *@par Attributes:
  * Three attributes:
  * @li dilations: A tuple/list of 5 integers, The dilation factor for each
- * dimension of input, now only support [1,1,1,1,1].
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
 
 *@par Outputs:
- * y: A Tensor that has the same type as x
+ * y: A Tensor that has the same type as "x"
  * and the format is NDHWC, NCDHW or DHWCN.
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv3d_backprop_filter
@@ -1172,9 +1320,10 @@ REG_OP(Conv3DBackpropFilter)
 *@par Attributes:
  * Three attributes:
  * @li dilations: A tuple/list of 5 integers, The dilation factor for each
- * dimension of input, now only support [1,1,1,1,1].
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
 
@@ -1224,15 +1373,16 @@ REG_OP(Conv3DBackpropFilterD)
 *@par Attributes:
  * Five attributes:
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li dilations: A tuple/list of 5 integers,
- * The dilation factor for each dimension of input, now only support [1,1,1,1,1]
+ * The dilation factor for each dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * @li output_padding: The size will be added in the output shape.
  * @li offset_x: Input offset_x value. Reserved.
 *@par Outputs:
- * y: A Tensor. Has the same type and format as x.
+ * y: A Tensor. Has the same type and format as "x".
 */
 REG_OP(Conv3DTranspose)
     .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
@@ -1273,15 +1423,16 @@ REG_OP(Conv3DTranspose)
 *@par Attributes:
  * Five attributes:
  * @li dilations: A tuple/list of 5 integers, The dilation factor for each
- * dimension of input, now only support [1,1,1,1,1]
+ * dimension of input.
+ * The N, C and D dimensions must be 1. Has the same format as "x".
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * @li output_padding: The size will be added in the output shape.
  * @li offset_x: Input offset_x value. Reserved.
 *@par Outputs:
- * y: A Tensor. Has the same type and format as x.
+ * y: A Tensor. Has the same type and format as "x".
 *@par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv3DTranspose instead.
 */
@@ -1316,6 +1467,22 @@ REG_OP(Conv3DTransposeD)
  * or [out_channels, in_channel, filter_height, filter_width].
  * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND".
  * @li offset_w: An optional 1D tensor for quantized inference. Reserved.
+ *\n
+ *\n
+ * The following are the supported data types and data formats:
+*@verbatim
+    | Tensor    | x       | filter  | bias    | y
+    ------------|---------|---------|---------|--------
+    | Data Type | float16 | float16 | float16 | float16
+    |           |---------|---------|---------|--------
+    |           | int8    | int8    | int32   | int32
+    ------------|---------|---------|---------|--------
+    | Format    | NCHW    | NCHW    | ND      | NCHW
+    |           | NHWC    | HWCN    |         | NHWC
+@endverbatim
+ * For int8, a dequant or requant operator must be followed.
+ *\n
+ *
 *@par Required Attributes:
  * @li strides: A required tuple/list of 4 integers. The stride of the sliding
  * window for H/W dimension. The index of H/W is same as data_format.
@@ -1333,10 +1500,58 @@ REG_OP(Conv3DTransposeD)
  * @li output_padding: The size will be added in the output shape. Defaults
  * to [0, 0, 0, 0].
  * @li offset_x: An optional int. Input offset, used for quantized inference.
- * Defaults to "0".
+ * The negative offset added to the input image for int8 type. Ensure offset_x
+ * within the effective range of int8 [-128, 127]. Defaults to "0".
+ *\n
+ *\n
+ * The following value range restrictions must be met:
+*@verbatim
+    | Name             | Field    | Scope
+    -------------------|----------|--------------
+    | input_size       | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | x (out_backprop) | H*strideH| [1, 4096]
+    |                  | W*strideW| [1, 4096]
+    -------------------|----------|--------------
+    | filter           | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | y (fmap)         | H        | [1, 4096]
+    |                  | W        | [1, 4096]
+    -------------------|----------|--------------
+    | Stride           | H        | [1, 63]
+    |                  | W        | [1, 63]
+    -------------------|----------|--------------
+    | Padding          | Top      | [0, 255]
+    |                  | Bottom   | [0, 255]
+    |                  | Left     | [0, 255]
+    |                  | Right    | [0, 255]
+    -------------------|----------|--------------
+    | Dilation         | H        | [1, 255]
+    |                  | W        | [1, 255]
+    -------------------|----------|--------------
+    | Offset_x         |          | [-128, 127]
+
+@endverbatim
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when
+ * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
+ * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
+ *\n
+ *
 *@par Outputs:
  * y: A Tensor. A Tensor of type float16 or int32, and has same format as
  * input_size.
+ *\n
+ *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
+ *                           (dilation_h * (filter_height - 1) + 1))
+ *                           / stride_h + 1
+ *\n
+ *     out_backprop_width = (fmap_width + pad_left + pad_right -
+ *                          (dilation_w * (filter_width - 1) + 1))
+ *                          / stride_w + 1
+ *\n
+ *
 */
 REG_OP(Conv2DTranspose)
     .INPUT(input_size, TensorType({DT_INT32, DT_INT64}))
@@ -1405,21 +1620,22 @@ REG_OP(Conv2DTransposeD)
 /**
 *@brief Computes the deformed convolution output with the expected input
 *@par Inputs:
- * Four inputs:
+ * Two inputs:
  * @li x: A Tensor of type float16,float32
  * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
 *@par Required Attributes:
  * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
  * height and width for H/W dimension.
- * @li pads: A tuple/list of 4 integers.Padding added to each dimension
+ * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
  * of the input.
  * @li ksize: A tuple/list of 2 integers.kernel size.
 *@par Attributes:
- * Three attributes:
+ * Four attributes:
  * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
  * of input.  Defaults to [1, 1, 1, 1]
  * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
  * @li deformable_groups: Specify the c-axis grouping number of input x.
+ * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1
 *@par Outputs:
  * y: A Tensor. A Tensor of type float16, float32.
 */
@@ -1433,7 +1649,69 @@ REG_OP(DeformableOffsets)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
     .ATTR(data_format, String, "NCHW")
     .ATTR(deformable_groups, Int, 1)
+    .ATTR(modulated, Bool, true)
     .OP_END_FACTORY_REG(DeformableOffsets)
 
+/**
+*@brief Computes the gradients of DeformableOffsets with respect to input and offsets
+*@par Inputs:
+ * Three inputs:
+ * @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output
+ * @li x: A Tensor of type float16,float32.
+ * @li offsets: A Tensor of type float16,float32.Deformation offset parameter.
+*@par Required Attributes:
+ * @li strides: A tuple/list of 4 integers.The stride of the sliding window for
+ * height and width for H/W dimension.
+ * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension
+ * of the input.
+ * @li ksize: A tuple/list of 2 integers.kernel size.
+*@par Attributes:
+ * Three attributes:
+ * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
+ * of input.  Defaults to [1, 1, 1, 1]
+ * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
+ * @li deformable_groups: Specify the c-axis grouping number of input x.
+ * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1.
+*@par Outputs:
+ * grad_x: A Tensor of type float16, float32. Gradients with respect to input_x
+ * grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets
+*/
+REG_OP(DeformableOffsetsGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(grad_offsets, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .REQUIRED_ATTR(ksize, ListInt)
+    .ATTR(dilations, ListInt, {1, 1, 1, 1})
+    .ATTR(data_format, String, "NCHW")
+    .ATTR(deformable_groups, Int, 1)
+    .ATTR(modulated, Bool, true)
+    .OP_END_FACTORY_REG(DeformableOffsetsGrad)
+
+/**
+*@brief Computes the deformed dilation output with the expected input
+*@par Inputs:
+ * One inputs:
+ * @li x: A Tensor of type int8, float16, float32
+*@par Required Attributes:
+ * @li dilations: A tuple/list of integers.
+*@par Attributes:
+ * Two attributes:
+ * @li padding_value: default value filling in blank
+ * @li pads: A tuple/list of integers.
+*@par Outputs:
+ * y: A Tensor. A Tensor of type int8, float16, float32.
+*/
+REG_OP(Dilation)
+    .INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(dilations, ListInt)
+    .ATTR(pads, ListInt, {})
+    .ATTR(padding_value, Float, 0.0)
+    .OP_END_FACTORY_REG(Dilation)
+
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index a013fb33..5fa40ad6 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -254,22 +254,22 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
- REG_OP(PriorBox)
-     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .REQUIRED_ATTR(min_size, ListFloat)
-     .REQUIRED_ATTR(max_size, ListFloat)
-     .REQUIRED_ATTR(aspect_ratio, ListFloat)
-     .ATTR(img_h, Int, 0)
-     .ATTR(img_w, Int, 0)
-     .ATTR(step_h, Float, 0.0)
-     .ATTR(step_w, Float, 0.0)
-     .ATTR(flip, Bool, true)
-     .ATTR(clip, Bool, false)
-     .ATTR(offset, Float, 0.5)
-     .ATTR(variance, ListFloat, {0.1})
-     .OP_END_FACTORY_REG(PriorBox);
+REG_OP(PriorBox)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(min_size, ListFloat)
+    .REQUIRED_ATTR(max_size, ListFloat)
+    .REQUIRED_ATTR(aspect_ratio, ListFloat)
+    .ATTR(img_h, Int, 0)
+    .ATTR(img_w, Int, 0)
+    .ATTR(step_h, Float, 0.0)
+    .ATTR(step_w, Float, 0.0)
+    .ATTR(flip, Bool, true)
+    .ATTR(clip, Bool, false)
+    .ATTR(offset, Float, 0.5)
+    .ATTR(variance, ListFloat, {0.1})
+    .OP_END_FACTORY_REG(PriorBox);
 
 /**
 *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
@@ -306,25 +306,25 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul
 *@par Restrictions:
 *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
 */
- REG_OP(PriorBoxD)
-     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .REQUIRED_ATTR(min_size, ListFloat)
-     .REQUIRED_ATTR(max_size, ListFloat)
-     .ATTR(img_h, Int, 0)
-     .ATTR(img_w, Int, 0)
-     .ATTR(step_h, Float, 0.0)
-     .ATTR(step_w, Float, 0.0)
-     .ATTR(flip, Bool, true)
-     .ATTR(clip, Bool, false)
-     .ATTR(offset, Float, 0.5)
-     .ATTR(variance, ListFloat, {0.1})
-     .OP_END_FACTORY_REG(PriorBoxD);
+REG_OP(PriorBoxD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(min_size, ListFloat)
+    .REQUIRED_ATTR(max_size, ListFloat)
+    .ATTR(img_h, Int, 0)
+    .ATTR(img_w, Int, 0)
+    .ATTR(step_h, Float, 0.0)
+    .ATTR(step_w, Float, 0.0)
+    .ATTR(flip, Bool, true)
+    .ATTR(clip, Bool, false)
+    .ATTR(offset, Float, 0.5)
+    .ATTR(variance, ListFloat, {0.1})
+    .OP_END_FACTORY_REG(PriorBoxD);
 
 /**
 *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
@@ -358,22 +358,22 @@ is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaul
 *@par Restrictions:
 *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
 */
- REG_OP(PriorBoxDV2)
-     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
-     .REQUIRED_ATTR(min_size, ListFloat)
-     .REQUIRED_ATTR(max_size, ListFloat)
-     .ATTR(img_h, Int, 0)
-     .ATTR(img_w, Int, 0)
-     .ATTR(step_h, Float, 0.0)
-     .ATTR(step_w, Float, 0.0)
-     .ATTR(flip, Bool, true)
-     .ATTR(clip, Bool, false)
-     .ATTR(offset, Float, 0.5)
-     .ATTR(variance, ListFloat, {0.1})
-     .OP_END_FACTORY_REG(PriorBoxDV2);
+REG_OP(PriorBoxDV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(min_size, ListFloat)
+    .REQUIRED_ATTR(max_size, ListFloat)
+    .ATTR(img_h, Int, 0)
+    .ATTR(img_w, Int, 0)
+    .ATTR(step_h, Float, 0.0)
+    .ATTR(step_w, Float, 0.0)
+    .ATTR(flip, Bool, true)
+    .ATTR(clip, Bool, false)
+    .ATTR(offset, Float, 0.5)
+    .ATTR(variance, ListFloat, {0.1})
+    .OP_END_FACTORY_REG(PriorBoxDV2);
 
 /**
 *@brief Performs Position Sensitive ROI Pooling . \n
@@ -531,10 +531,10 @@ as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(Yolo)
-    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OUTPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OUTPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OUTPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(boxes, Int, 3)
     .ATTR(coords, Int, 4)
     .ATTR(classes, Int, 80)
@@ -584,10 +584,10 @@ REG_OP(Yolo)
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(YoloV2DetectionOutput)
-    .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases, ListFloat)
     .ATTR(boxes, Int, 5)
     .ATTR(coords, Int, 4)
@@ -598,7 +598,7 @@ REG_OP(YoloV2DetectionOutput)
     .ATTR(score_threshold, Float, 0.5)
     .ATTR(iou_threshold, Float, 0.45)
     .ATTR(pre_nms_topn, Int, 512)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV2DetectionOutput)
 
@@ -647,12 +647,12 @@ REG_OP(YoloV2DetectionOutput)
 *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead.
 */
 REG_OP(YoloV2DetectionOutputD)
-    .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases, ListFloat)
     .ATTR(boxes, Int, 5)
     .ATTR(coords, Int, 4)
@@ -663,7 +663,7 @@ REG_OP(YoloV2DetectionOutputD)
     .ATTR(score_threshold, Float, 0.5)
     .ATTR(iou_threshold, Float, 0.45)
     .ATTR(pre_nms_topn, Int, 512)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV2DetectionOutputD)
 
@@ -707,16 +707,16 @@ REG_OP(YoloV2DetectionOutputD)
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(YoloV3DetectionOutput)
-    .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases_low, ListFloat)
     .REQUIRED_ATTR(biases_mid, ListFloat)
     .REQUIRED_ATTR(biases_high, ListFloat)
@@ -729,7 +729,7 @@ REG_OP(YoloV3DetectionOutput)
     .ATTR(score_threshold, Float, 0.5)
     .ATTR(iou_threshold, Float, 0.45)
     .ATTR(pre_nms_topn, Int, 512)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV3DetectionOutput)
 
@@ -776,22 +776,22 @@ s
 *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead.
 */
 REG_OP(YoloV3DetectionOutputD)
-    .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(windex1, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(windex2, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(windex3, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(hindex1, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(hindex2, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(hindex3, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(coord_data_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(coord_data_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(coord_data_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(obj_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_low, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_mid, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(classes_prob_high, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(img_info, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(windex1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(windex2, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(windex3, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hindex1, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hindex2, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(hindex3, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases_low, ListFloat)
     .REQUIRED_ATTR(biases_mid, ListFloat)
     .REQUIRED_ATTR(biases_high, ListFloat)
@@ -804,7 +804,7 @@ REG_OP(YoloV3DetectionOutputD)
     .ATTR(score_threshold, Float, 0.5)
     .ATTR(iou_threshold, Float, 0.45)
     .ATTR(pre_nms_topn, Int, 512)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV3DetectionOutputD)
 
@@ -848,7 +848,7 @@ There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yol
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(YoloV3DetectionOutputV2)
-    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases, ListFloat)
     .ATTR(boxes, Int, 3)
     .ATTR(coords, Int, 4)
@@ -862,7 +862,7 @@ REG_OP(YoloV3DetectionOutputV2)
     .ATTR(N, Int, 10)
     .ATTR(resize_origin_img_to_net, Bool, false)
     .ATTR(out_box_dim, Int, 3)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV3DetectionOutputV2)
 
@@ -910,9 +910,9 @@ REG_OP(YoloV3DetectionOutputV2)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead.
 */
 REG_OP(YoloV3DetectionOutputV2D)
-    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
     .REQUIRED_ATTR(biases, ListFloat)
     .ATTR(boxes, Int, 3)
     .ATTR(coords, Int, 4)
@@ -926,7 +926,7 @@ REG_OP(YoloV3DetectionOutputV2D)
     .ATTR(N, Int, 10)
     .ATTR(resize_origin_img_to_net, Bool, false)
     .ATTR(out_box_dim, Int, 3)
-    .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(box_out_num, TensorType({DT_INT32}))
     .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D)
 
@@ -968,8 +968,9 @@ REG_OP(SPP)
 * Three inputs, including:
 *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
 * map.
-*@li rois: A tensor of type float16 or float32, with shape
+*@li rois: A tensor of type float16 or float32, with 3D shape
 * [batch, 5, roi_max_num], describing the RIOs.
+* roi_max_num must be less than or equal to 6000 and must be divided by 16.
 *@li roi_actual_num: A  optional tensor of type int32, with shape [batch, 8], specifying
 * the number of ROIs per batch . \n
 
@@ -1201,35 +1202,6 @@ REG_OP(RpnProposalsD)
     .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
     .OP_END_FACTORY_REG(RpnProposalsD)
 
-/**
-*@brief Computes Score Filte Pre-Sort function.
-
-*@par Inputs:
-*Inputs include:
-* @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
-* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
-
-*@par Attributes:
-* @li score_threshold: required, float, threahold of topk process.
-* @li k: required, Int, threahold of topk process.
-* @li score_filter: bool, mark of score_filter. Defaults to "true"
-* @li core_max_num: int, max number of core. Defaults to "8"
-*@par Outputs:
-* @li sorted_proposal: A Tensor. Must be float16.
-*                      N-D with shape [8*6002, 8].
-* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
-*/
-
-REG_OP(ScoreFiltePreSort)
-    .INPUT(rois, TensorType({DT_FLOAT16}))
-    .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
-    .OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16}))
-    .OUTPUT(proposal_num, TensorType({ DT_UINT32}))
-    .REQUIRED_ATTR(score_threshold, Float)
-    .REQUIRED_ATTR(k, Int)
-    .ATTR(score_filter, Bool, true)
-    .ATTR(core_max_num, Int, 8)
-    .OP_END_FACTORY_REG(ScoreFiltePreSort)
 
 /**
 *@brief Computes Score Filte Pre-Sort function.
@@ -1383,6 +1355,7 @@ REG_OP(DecodeWheelsTarget)
 
 *@attention Constraints:
 * Only computation of float16 data is supported.
+* Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory
 */
 REG_OP(BatchMultiClassNonMaxSuppression)
     .INPUT(boxes, TensorType({DT_FLOAT16}))
@@ -1464,9 +1437,9 @@ REG_OP(NormalizeBBox)
 * y: A Tensor. Must have the same type as box_predictions.
 */
 REG_OP(DecodeBboxV2)
-    .INPUT(boxes, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(anchors, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(anchors, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0})
     .ATTR(decode_clip, Float, 0.0)
     .ATTR(reversed_box, Bool, false)
@@ -1477,7 +1450,8 @@ REG_OP(DecodeBboxV2)
 *
 *@par Inputs:
 *Inputs include:
-* x: A Tensor. Must be float16 or float32.
+* x: A Tensor. Dtype support: flaot16, flaot, int16, int8,
+                          uint8, int32, int64.
 *
 *@par Attributes:
 * @li axis: optional, int.
@@ -1485,16 +1459,364 @@ REG_OP(DecodeBboxV2)
 *
 *@par Outputs:
 * @li y1: A Tensor. Must have the same type as x.
-* @li y2: A Tensor. Indices of y1 in x.Dtype must be int32.
+* @li y2: A Tensor. Indices of y1 in x. Dtype must be int32.
+*
 */
 REG_OP(Sort)
-    .INPUT(x, TensorType({ DT_FLOAT16 }))
-    .OUTPUT(y1, TensorType({ DT_FLOAT16 }))
-    .OUTPUT(y2, TensorType({ DT_INT32 }))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
+                          DT_UINT8, DT_INT32, DT_INT64}))
+    .OUTPUT(y1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT16, DT_INT8,
+                            DT_UINT8, DT_INT32, DT_INT64}))
+    .OUTPUT(y2, TensorType({DT_INT32}))
     .ATTR(axis, Int, -1)
     .ATTR(descending, Bool, false)
     .OP_END_FACTORY_REG(Sort)
 
+/**
+*@brief Computes iou for input bboxes and gtboxes.
+
+*@par Inputs:
+* Two inputs, including:
+*@li bboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1),
+*@li gtboxes: boxes, a 4D Tensor of type float16 with the shape (x0, x1, y0, y1).\n
+
+*@par Attributes:
+*@li mode: A optional attribute of type string, whether judge the mode of iou. \n
+
+*@par Outputs:
+*@li overlap: A 2D Tensor of type float16 with shape [n, m]. \n
+
+*@attention Constraints:
+* Only computation of float16 data is supported.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS DEPRECATED. Please use Iou instead.
+*/
+REG_OP(PtIou)
+    .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(mode, String, "iou")
+    .OP_END_FACTORY_REG(PtIou)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input boxes and  scores must be float16 type. Inputs include:
+*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
+The single box data format is indicated by center_point_box.
+*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
+whether boxes overlap too much with respect to IOU.
+*@li score_threshold: A 0-D float tensor representing the threshold for
+deciding when to remove boxes based on score . \n
+
+*@par Attributes:
+*center_point_box:Integer indicate the format of the box data. 
+The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] 
+where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair 
+of box corners and the coordinates can be provided as normalized 
+(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
+1 - the box data is supplied as [x_center, y_center, width, height].
+ Mostly used for Pytorch models. \n
+
+*@par Outputs:
+*@li selected_indices: A 2-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size. \n
+
+*@attention Constraints:
+*Input boxes and  scores must be float16 type . \n
+
+*@par Third-party framework compatibility
+*Compatible with onnx NonMaxSuppression operator.
+
+*@par Restrictions:
+*Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+
+REG_OP(NonMaxSuppressionV6)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .ATTR(center_point_box, Int, 0)
+    .ATTR(max_boxes_size, Int, 0)
+    .OP_END_FACTORY_REG(NonMaxSuppressionV6)
+
+/**
+*@brief Greedily selects a subset of bounding boxes in descending order of
+score . \n
+
+*@par Inputs:
+*Input boxes and  scores must be float16 type. Inputs include:
+*@li boxes: A input tensor with shape [num_batches,spatial_dimension,4].
+The single box data format is indicated by center_point_box.
+*@li scores: A input tensor with shape [num_batches,num_classes,spatial_dimension]
+*@li max_output_size: A scalar integer tensor representing the maximum number
+of boxes to be selected by non max suppression.
+*@li iou_threshold: A 0-D float tensor representing the threshold for deciding
+whether boxes overlap too much with respect to IOU.
+*@li score_threshold: A 0-D float tensor representing the threshold for
+deciding when to remove boxes based on score . \n
+*@li index_id: A input tensor with shape [num_batches,num_classes,spatial_dimension,3]
+the last dim representing (batch_id,class_id,index_id)  . \n
+
+*@par Attributes:
+*center_point_box:Integer indicate the format of the box data. 
+The default is 0. 0 - the box data is supplied as [y1, x1, y2, x2] 
+where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair 
+of box corners and the coordinates can be provided as normalized 
+(i.e., lying in the interval [0, 1]) or absolute.Mostly used for TF models.
+1 - the box data is supplied as [x_center, y_center, width, height].
+ Mostly used for Pytorch models. \n
+
+*@par Outputs:
+*@li selected_indices: A 2-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size. \n
+
+*@attention Constraints:
+*Input boxes and  scores must be float16 type . \n
+
+*@par Third-party framework compatibility
+*Compatible with onnx NonMaxSuppression operator.
+*/
+
+REG_OP(NonMaxSuppressionV7)
+    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(scores, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(max_output_size, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(iou_threshold, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(score_threshold, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(index_id, TensorType({DT_FLOAT16}))
+    .OUTPUT(selected_indices, TensorType({DT_INT32}))
+    .ATTR(center_point_box, Int, 0)
+    .ATTR(max_boxes_size, Int, 0)
+    .OP_END_FACTORY_REG(NonMaxSuppressionV7)
+
+/**
+*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li features: A 5HD Tensor list of type float32 or float16.
+*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
+* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
+
+*@par Attributes:
+*@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois".
+*@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates.
+*@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features"
+* to the original image.
+*@li pooled_height: A optional attribute of type int32, specifying the H dimension.
+*@li pooled_width: A optional attribute of type int32, specifying the W dimension.
+*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency
+* of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois",
+* which is a floating point number. Defaults to "0".
+*@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n
+*@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n
+
+*@par Outputs:
+* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
+* The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height",
+* "pooled_width", and "features", respectively.
+
+*@par Third-party framework compatibility
+*Compatible with mmdetection SingleRoIExtractor operator.
+*/
+REG_OP(RoiExtractor)
+    .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(finest_scale, Int, 56)
+    .ATTR(roi_scale_factor, Float, 0)
+    .ATTR(spatial_scale, ListFloat, {1.f / 4, 1.f / 8, 1.f / 16, 1.f / 32})
+    .ATTR(pooled_height, Int, 7)
+    .ATTR(pooled_width, Int, 7)
+    .ATTR(sample_num, Int, 0)
+    .ATTR(pool_mode, String, "avg")
+    .ATTR(aligned, Bool, true)
+    .OP_END_FACTORY_REG(RoiExtractor)
+
+/**
+*@brief Performs Position Sensitive PS ROI Pooling . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
+* map, dimension C1 must be equal to
+* (int(output_dim+15)/C0))*group_size*group_size.
+*@li rois: A tensor of type float16 or float32, with shape
+* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
+* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
+* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
+* greater than or equal to "0.0" . \n
+
+*@par Attributes:
+*@li output_dim: A required int32, specifying the number of output channels,
+* must be greater than 0.
+*@li group_size: A required int32, specifying the number of groups to encode
+* position-sensitive score maps, must be within the range (0, 128).
+*@li spatial_scale: A required float32, scaling factor for mapping the input
+* coordinates to the ROI coordinates . \n
+
+*@par Outputs:
+*y: An NC1HWC0 tensor of type float16 or float32, describing the result
+* feature map . \n
+
+*@attention Constraints:
+* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
+*/
+REG_OP(PSROIPoolingV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(spatial_scale, Float)
+    .REQUIRED_ATTR(output_dim, Int)
+    .REQUIRED_ATTR(group_size, Int)
+    .OP_END_FACTORY_REG(PSROIPoolingV2)
+
+/**
+*@brief Performs Position Sensitive PS ROI Pooling Grad . \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: An NC1HWC0 tensor of type float16 or float32, describing the result
+* feature map . \n
+*@li rois: A tensor of type float16 or float32, with shape
+* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
+* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
+* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
+* greater than or equal to "0.0" . \n
+
+*@par Attributes:
+*@li output_dim: A required int32, specifying the number of output channels,
+* must be greater than 0.
+*@li group_size: A required int32, specifying the number of groups to encode
+* position-sensitive score maps, must be within the range (0, 128).
+*@li spatial_scale: A required float32, scaling factor for mapping the input
+* coordinates to the ROI coordinates . \n
+*@li input_size: A required listInt, mapping the gradinput size: (H, W)
+
+*@par Outputs:
+*y: An NC1HWC0 tensor of type float16 or float32, describing the feature
+* map, dimension C1 must be equal to
+* (int(output_dim+15)/C0))*group_size*group_size.
+
+*@attention Constraints:
+* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
+*/
+REG_OP(PSROIPoolingGradV2D)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(spatial_scale, Float)
+    .REQUIRED_ATTR(output_dim, Int)
+    .REQUIRED_ATTR(group_size, Int)
+    .REQUIRED_ATTR(input_size, ListInt)
+    .OP_END_FACTORY_REG(PSROIPoolingGradV2D)
+
+/**
+*@brief Generate the responsible flags of anchor in a single feature map.
+
+*@par Inputs:
+*@li gt_bboxes: Ground truth box, 2-D Tensor with shape `[batch, 4]`.
+
+*@par Attributes:
+*@li featmap_size: The size of feature maps, listint.
+*@li strides: Stride of current level, listint.
+*@li num_base_anchors: The number of base anchors.
+
+*@par Outputs:
+*flags: The valid flags of each anchor in a single level.
+*/
+REG_OP(AnchorResponseFlags)
+    .INPUT(gt_bboxes, TensorType({DT_FLOAT}))
+    .OUTPUT(flags, TensorType({DT_UINT8}))
+    .REQUIRED_ATTR(featmap_size, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(num_base_anchors, Int)
+    .OP_END_FACTORY_REG(AnchorResponseFlags)
+
+/**
+*@brief Generates bounding boxes based on yolo's "anchor" and "ground-truth" boxes.
+* It is a customized mmdetection operator . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li anchor_boxes: anchor boxes generated by the yolo training set.
+*  A 2D Tensor of type float32 or float16 with shape (N, 4). "N" indicates the number
+* of ROIs, "N" indicates the number of ROIs, and the value "4" refers to (tx, ty, tw, th).
+*@li gt_bboxes: target of the transformation, e.g, ground-truth boxes.
+*  A 2D Tensor of type float32 or float16 with shape (N, 4).
+* "N" indicates the number of ROIs, and 4 indicates "dx", "dy", "dw", and "dh" .
+*@li stride: Scale for each box.
+*  A 1D Tensor of type int32 shape (N,).
+* "N" indicates the number of ROIs. \n
+
+*@par Attributes:
+*@li performance_mode: select performance mode, "high_precision" or "high_performance".
+* select "high_precision" when input type is float32, the output tensor precision
+* will be smaller than 0.0001, select "high_performance" when input type is float32,
+* the ops will be best performance, but precision will be only smaller than 0.005.
+
+*@par Outputs:
+*encoded_bboxes: Bboxes generated based on "anchor_boxes" and "gt_bboxes". Have the
+* same format and type as "anchor_boxes".
+*
+*@attention Constraints:
+* input anchor boxes only support maximum N=20480. \n
+*/
+REG_OP(YoloBoxesEncode)
+    .INPUT(anchor_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gt_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(stride, TensorType({DT_INT32}))
+    .ATTR(performance_mode, String, "high_precision")
+    .OUTPUT(encoded_bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(YoloBoxesEncode)
+
+/**
+*@brief Performs Position Sensitive PS ROI Pooling Grad.
+
+*@par Inputs:
+* Eight inputs, including:
+*@li assigned_gt_inds: Tensor of type float16 or float32, shape (n, )
+*@li overlaps: A Tensor. Datatype is same as assigned_gt_inds. IOU between gt_bboxes and bboxes. shape(k, n)
+*@li box_responsible_flags: A Tensor. Support uint8. Flag to indicate whether box is responsible.
+*@li max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=0).
+*@li argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=0).
+*@li gt_max_overlaps: A Tensor. Datatype is same as assigned_gt_inds. overlaps.max(axis=1).
+*@li gt_argmax_overlaps: A Tensor. Support int32. overlaps.argmax(axis=1).
+*@li num_gts: A Tensor. Support int32. real k. shape (1, )
+
+*@par Attributes:
+*@li output_dim: float. IOU threshold for positive bboxes.
+*@li group_size: float. minimum iou for a bbox to be considered as a positive bbox
+*@li spatial_scale: bool. whether to assign all bboxes with the same highest overlap with some gt to that gt.
+
+*@par Outputs:
+*@li assigned_gt_inds_pos: A Tensor. Support float16/float32. shape (n, ).
+*/
+REG_OP(GridAssignPositive)
+    .INPUT(assigned_gt_inds, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(box_responsible_flags, TensorType({ DT_UINT8 }))
+    .INPUT(max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(argmax_overlaps, TensorType({ DT_INT32 }))
+    .INPUT(gt_max_overlaps, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(gt_argmax_overlaps, TensorType({ DT_INT32 }))
+    .INPUT(num_gts, TensorType({ DT_INT32 }))
+    .OUTPUT(assigned_gt_inds_pos, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(pos_iou_thr, Float)
+    .REQUIRED_ATTR(min_pos_iou, Float)
+    .REQUIRED_ATTR(gt_max_assign_all, Bool)
+    .OP_END_FACTORY_REG(GridAssignPositive)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
+
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index 35c4c7d4..b44c0780 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -55,7 +55,9 @@ REG_OP(LogSoftmaxGrad)
 *Two inputs, including:
 * @li features: A Tensor. Must be one of the following types: half, float32, double.
 *    A "batch_size * num_classes" matrix.
-* @li labels: A Tensor of the same type as "features". batch_size vector with values in [0, num_classes).
+* @li labels: A Tensor. Must be one of the following types: 'int32', 'int64'.
+*             batch_size vector with values in [0, num_classes).
+*             This is the label for the given minibatch entry.
 
 
 *@par Outputs:
@@ -105,6 +107,9 @@ REG_OP(SoftmaxCrossEntropyWithLogits)
 * @li grad_softmax: A Tensor. Has the same shape and type as "softmax".
 * The format is NC1HWC0 or DN . \n
 
+*@par Attributes:
+* axes: An optional list of ints. Defaults to "{-1}" . \n
+
 *@par Outputs:
 *grad_x: A Tensor. Has the same shape and type as "softmax" . \n
 
@@ -115,6 +120,7 @@ REG_OP(SoftmaxGrad)
     .INPUT(softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .INPUT(grad_softmax, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
     .OUTPUT(grad_x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
+    .ATTR(axes, ListInt, {-1})
     .OP_END_FACTORY_REG(SoftmaxGrad)
 
 /**
@@ -160,20 +166,20 @@ REG_OP(SigmoidCrossEntropyWithLogits)
     .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits)
 
 /**
-*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n
+*@brief Computes the sigmoid cross entropy loss of "predict" and "target".
 
 *@par Inputs:
 * four inputs, including:
 *@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value.
-*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n
-*@li weight: An multi-dimensional Tensor, specifying the weight value. \n
+*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value.
+*@li weight: An multi-dimensional Tensor, specifying the weight value.
 *@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n
 
 *@par Attributes:
-*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n
+*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean". \n
 
 *@par Outputs:
-*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n
+*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict". \n
 
 *@par Third-party framework compatibility
 * Compatible with PyTorch operator BCEWithLogitsLoss.
@@ -330,6 +336,41 @@ REG_OP(SoftmaxV2)
     .ATTR(axes, ListInt, {-1})
     .OP_END_FACTORY_REG(SoftmaxV2)
 
+/**
+*@brief Function softmax with dropoutDoMaskV3D
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A mutable Tensor. The type only support float16.
+* @li mask: A mutable Tensor. Must met all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should met the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128
+
+*@par Attributes:
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+*     shape of "keep_prob" should be (1,) or [1,].
+*     Has the same type as "x" . \n
+* @li axes: A list of int. The dimension softmax would be performed on. Defaults
+*     to "[-1]" . \n
+
+*@par Outputs:
+*y1: A mutable Tensor. Has the same type as "x".
+*y2: A mutable Tensor. Has the same type as "x". \n
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(SoftmaxV2WithDropOutDoMaskV3D)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y1, TensorType({DT_FLOAT16}))
+    .OUTPUT(y2, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(keep_prob, Float)
+    .ATTR(axes, ListInt, {-1})
+    .OP_END_FACTORY_REG(SoftmaxV2WithDropOutDoMaskV3D)
+
 /**
 *@brief Computes log softmax activations . \n
 
@@ -427,6 +468,33 @@ REG_OP(MVN)
     .ATTR(eps, Float, 1e-9)
     .OP_END_FACTORY_REG(MVN)
 
+/**
+*@brief Normalizes the input . \n
+
+*@par Inputs:
+* One input:
+*x: An NCHW tensor of type float16 or float32 . \n
+
+*@par Attributes:
+*@li eps: An optional float32 epsilon for not dividing by zero. Defaults to "1e-9" . \n
+*@li axes: A list of Intefers, along which axis to reduce. Defaults to "[0, 2, 3]" . \n
+
+*@par Outputs:
+*y: An NCHW tensor of type float16 or float32 . \n
+
+*@attention Constraints:
+* The input tensor must have the NCHW format, whose shape length must be 4.
+*@par Third-party framework compatibility
+* Compatible with the ONNX operator MeanVarianceNormalization.
+*/
+
+REG_OP(MVNV2)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))  /* "Result, has same element type as inputs" */
+    .ATTR(eps, Float, 1e-9)
+    .ATTR(axes, ListInt, {0, 2, 3})
+    .OP_END_FACTORY_REG(MVNV2)
+
 /**
 *@brief Normalizes the input "x1" . \n
 
@@ -498,6 +566,31 @@ REG_OP(LayerNorm)
     .ATTR(epsilon, Float, 0.0000001)
     .OP_END_FACTORY_REG(LayerNorm)
 
+/**
+*@brief Returns a tensor where each sub-tensor of input along dimension 
+*       dim is normalized such that the p-norm of the sub-tensor is lower than the value maxnorm. \n
+
+*@par Inputs:
+*One input, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Attributes:
+* @li p: Specify L_p norm, the type is float. 
+* @li dim: The processed dim, the type is int.
+* @li maxnorm: Threshold for comparison, the type is float.  \n
+
+*@par Outputs:
+*One outputs, including:
+* @li y: shape and dtype of output, should be same shape and type as input.
+*/
+REG_OP(Renorm)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(p, Float)
+    .REQUIRED_ATTR(dim, Int)
+    .REQUIRED_ATTR(maxnorm, Float)
+    .OP_END_FACTORY_REG(Renorm)
+
 /**
 *@brief LayerNormGrad operator interface implementation
 *  calculating: dy, x, variance, mean, gamma
@@ -586,6 +679,48 @@ REG_OP(LayerNormXBackprop)
     .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
     .OP_END_FACTORY_REG(LayerNormXBackprop)
 
+/**
+*@brief LayerNormXBackpropV2 operator interface implementation
+*  calculating: dy, x, variance, mean, gamma
+*  pd_xl = data_dy*data_gamma
+*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
+*           np.power((data_variance + EPSLON), (-1.5))),
+*           reduce_axis, keepdims=True)
+*  pd_mean = np.sum(((-1.0)*pd_xl
+*            np.power((data_variance + EPSLON), (-0.5))),
+*            reduce_axis, keepdims=True)
+*            + pd_var*(1.0/m)
+*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
+*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
+*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
+*  res_for_gamma = (data_x - data_mean) * np.power((data_variance + EPSLON), (-0.5))
+
+*@par Inputs:
+*Five inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+* @li res_for_gamma: A Tensor. Must be one of the following types: float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LayerNormXBackpropV2)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(LayerNormXBackpropV2)
+
 /**
 *@brief LayerNormBetaGammaBackprop operator interface implementation
 *  calculating: dy, x, variance, mean
@@ -629,6 +764,35 @@ REG_OP(LayerNormBetaGammaBackprop)
     .REQUIRED_ATTR(shape_gamma, ListInt)
     .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop)
 
+/**
+*@brief LayerNormBetaGammaBackpropV2 operator interface implementation
+*  calculating: dy, x, variance, mean
+*  pd_gamma = np.sum((data_dy*res_for_gamma), param_axis, keepdims=True)
+*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
+
+*@par Inputs:
+*Three inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LayerNormBetaGammaBackpropV2)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(res_for_gamma, TensorType({DT_FLOAT}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(shape_gamma, ListInt)
+    .OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2)
+
 /**
 *@brief Return "output" according to the algorithm of dropout_do_mask:
 *  scale_x = x *(1 / keep_prob)
@@ -656,7 +820,68 @@ REG_OP(DropOutDoMask)
     .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16}))
     .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
     .OP_END_FACTORY_REG(DropOutDoMask)
-	
+
+/**
+*@brief Return "output" according to the algorithm of dropout_do_mask:
+*  scale_x = x *(1 / keep_prob)
+*  output = select(mask == 1, scale_x, 0)
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A mutable Tensor. Must be one of the following types:
+*     float16, float32
+* @li mask: A mutable Tensor. Must met all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should met the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+*     shape of "keep_prob" should be (1,) or [1,].
+*     Has the same type as "x" . \n
+
+*@par Output:
+*y: A mutable Tensor. Has the same type as "x".
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DropOutDoMaskV3)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DropOutDoMaskV3)
+
+/**
+*@brief Return "output" according to the algorithm of dropout_do_mask:
+*  scale_x = x *(1 / keep_prob)
+*  output = select(mask == 1, scale_x, 0)
+
+*@par Inputs:
+*Two inputs, including:
+* @li x: A mutable Tensor. Must be one of the following types:
+*     float16, float32
+* @li mask: A mutable Tensor. Must met all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should met the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128
+*@par Attributes:
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+*     shape of "keep_prob" should be (1,) or [1,].
+*     Has the same type as "x" . \n
+
+*@par Output:
+*y: A mutable Tensor. Has the same type as "x".
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DropOutDoMaskV3D)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(keep_prob, Float)
+    .OP_END_FACTORY_REG(DropOutDoMaskV3D)
+
 /**
 *@brief Scales the input . \n
 
@@ -703,7 +928,7 @@ REG_OP(Scale)
 
 *@par Inputs:
 *One input, including:
-*@li x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32 . \n
+*x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32 . \n
 
 *@par Attributes:
 *@li depth_radius: An optional int32, specifying the half-width of the normalization window. Defaults to "5".
@@ -960,24 +1185,532 @@ REG_OP(INInferV2D)
     .OP_END_FACTORY_REG(INInferV2D)
 
 /**
-*@brief Performs instance normalization for inference of InHost part.
+* @brief InstanceNorm operator interface implementation.
 
-*@par Inputs:\n
-* One input, including: (NC1HWC0 supported)
-* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance.
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li beta: A Tensor. Must be one of the following types: float16, float32.
+
+* @par Attributes:
+* @li data_format: An attribute of type String \n
+* @li epsilon: An attribute of type Float. \n
+
+* @par Outputs:
+*Three outputs, including:
+* @li y: A Tensor. Has the same type as "x". \n
+* @li mean: A Tensor. Has the same type as "x". \n
+* @li variance: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* Can be used by onnx InstanceNormalization
+*/
+REG_OP(InstanceNorm)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(data_format, String, "NDHWC")
+    .ATTR(epsilon, Float, 1e-6)
+    .OP_END_FACTORY_REG(InstanceNorm)
+
+/**
+*@brief InstanceNormGrad operator interface implementation.
+
+*@par Inputs:
+*Five inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(InstanceNormGrad)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(InstanceNormGrad)
+
+/**
+*@brief InstanceNormXBackprop operator interface implementation.
+
+*@par Inputs:
+*Five inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Outputs:
+*Two outputs, including:
+* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+* @li res_for_gamma: A Tensor. Must be one of the following types: float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(InstanceNormXBackprop)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(res_for_gamma, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(InstanceNormXBackprop)
+
+/**
+*@brief InstanceNormBetaGammaBackprop operator interface implementation.
+
+*@par Inputs:
+*Two inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li res_for_gamma: A Tensor. Must be one of the following types: float32.\n
+
+*@par Outputs:
+*Two outputs, including:
+* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(InstanceNormBetaGammaBackprop)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(res_for_gamma, TensorType({DT_FLOAT}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(InstanceNormBetaGammaBackprop)
+
+/**
+* @brief Computes Kl_div_loss_grad or Kl_div_loss_backward. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li grad: A Tensor. Must be one of the following types: float16, float32.
+* Required.
+* @li input: A Tensor. Has the same type as "grad". Required.
+* @li target: A Tensor. Has the same type as "grad". Required. \n
+
+* @par Attributes:
+* @li reduction: An optional attribute of type String. Defaults to "mean". \n
+* @li log_target: An optional attribute of type Bool. Defaults to false. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "grad". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator KlDivLossGrad.
+*/
+REG_OP(KlDivLossGrad)
+    .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .ATTR(log_target, Bool, false)
+    .OP_END_FACTORY_REG(KlDivLossGrad)
+
+/**
+* @brief Computes l1_loss_grad or l1_loss_backward. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li grads: A Tensor. Must be one of the following types: float16, float32.
+* Required.
+* @li predict: A Tensor. Has the same type as "grads". Required.
+* @li label: A Tensor. Has the same type as "grads". Required. \n
+
+* @par Attributes:
+* @li reduction: An optional attribute of type String. Defaults to "mean". \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator L1LossGrad.
+*/
+REG_OP(L1LossGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(L1LossGrad)
+
+/**
+* @brief Computes loss of lp, p=1,2,3....
+
+* @par Inputs:
+* @li predict: An ND tensor of type float16, float32.
+* @li label: An ND tensor of type float16, float32. \n
+
+* @par Attributes:
+* @li p: A required int attribute that decides which loss to compute, now the p only can be 1 to compute l1_loss.
+* @li reduction: An optional string.Defaults to "mean". \n
+
+* @par Outputs:
+* @li y: An ND tensor tensor with the same shape and type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator LpLoss.
+*/
+REG_OP(LpLoss)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .REQUIRED_ATTR(p, Int)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(LpLoss)
+
+/**
+* @brief Computes gradients of mse loss.
+
+* @par Inputs:
+* @li predict: An ND tensor of type float16, float32.
+* @li label: An ND tensor of type float16, float32.
+* @li dout: An ND tensor of type float16, float32. \n
+
+* @par Attributes:
+* @li reduction: An optional string.Defaults to "mean". \n
+
+* @par Outputs:
+* @li y: An ND tensor tensor with the same shape and type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator MseLossGrad.
+*/
+REG_OP(MseLossGrad)
+    .INPUT(predict, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(label, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(dout, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT32, DT_FLOAT16}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(MseLossGrad)
+
+/**
+* @brief Computes mse loss.
+* @par Inputs:
+* two inputs, including:
+*  @li predict: An ND Tensor of dtype float16 or float32.
+*  @li label: An ND Tensor of dtype float16 or float32.\n
+*
+* @par Attributes:
+*  @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n
+*
+* @par Outputs:
+*  @li y: when reduction=sum/mean, y is scale. when reduction=none, y has
+*    same type and shape as "predict".\n
+*/
+REG_OP(MseLoss)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(MseLoss)
+
+/**
+* @brief Calculates the reversed outputs of the function "smooth_l1_loss_v2". \n
+
+* @par Inputs:
+* Three Inputs, including:
+* @li predict: A Tensor. Must be one of the following types:
+*     float16, float32.
+* @li label: A Tensor. Has the same type as "predict".
+* @li dout: A Tensor. Has the same type as "predict". \n
+
+* @par Attributes:
+* Two Attributes, including:
+* @li sigma: An optional float. Defaults to 1.0. \n
+
+* @li reduction: An optional string. Defaults to "mean",
+*    Must be one of the following: "none", "mean", "sum". \n
+
+* @par Outputs:
+* @li gradient: A Tensor. Has the same type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SmoothL1LossBackward.
+*/
+REG_OP(SmoothL1LossGradV2)
+    .INPUT(predict, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(label, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(dout, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(sigma, Float, 1.0)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SmoothL1LossGradV2)
+
+/**
+* @brief Creates a criterion that uses a squared term if the absolute
+* element-wise error falls below beta and an L1 term otherwise. It is
+* less sensitive to outliers than the MSELoss and in some cases prevents
+* exploding gradients.
+
+* @par Inputs:
+* @li predict: A multi-dimensional Tensor of type float16 or float32,
+* specifying the predictive value. \n
+* @li label: A multi-dimensional Tensor of type float16 or float32,
+* specifying the target value. \n
+
+* @par Attributes:
+* @li sigma: An optional int. Specifies the threshold of loss. Defaults
+* to "1.0". \n
+* @li reduction: An optional str. Specifies the reduction to apply to
+* the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
+* 'mean': the sum of the output will be divided by the number of elements in
+* the output,'sum': the output will be summed. Default: 'mean'. \n
+
+* @par Outputs:
+* @li loss: Indicates the loss between the predictive value and target value.
+* Has the same dimensions as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator smooth_l1_loss. \n
+*/
+REG_OP(SmoothL1LossV2)
+    .INPUT(predict, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(label, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(loss, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(sigma, Float, 1.0)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SmoothL1LossV2)
+
+/**
+* @brief Computes Centralization. result = x - mean(x, axes)
+
+* @par Inputs:
+* @li x: An ND tensor of type float16, float32.
+* @par Attributes:
+* @li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType.
+* Must be in the range [-rank(x), rank(x)).
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* custom operator \n
+*/
+REG_OP(Centralization)
+    .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(axes, ListInt, {-1})
+    .OP_END_FACTORY_REG(Centralization)
+
+/**
+*@brief Roll the tensor along the given dimension(s).
+* Elements that are shifted beyond the last position are re-introduced at the first position.
+* If a dimension is not specified, the tensor will be flattened before rolling and then restored to the original shape. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x: A tensor . Must be one of the following types:
+*     float16, float32, int32, uint32, int8, uint8. \n
 
 *@par Attributes:
-* epsilon: An optional float32, specifying the small value added to
-variance to avoid dividing by zero. Defaults to "0.00001" . \n
+* @li shifts: The number of places by which the elements of the tensor are shifted. \n
+* @li dims: Axis along which to roll. \n
 
-*@par Outputs:\n
-* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt.
+*@par Outputs:
+* y: A Tensor with the same type and shape of x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Roll. \n
 */
-REG_OP(InHost)
-     .INPUT(variance, TensorType({DT_FLOAT}))
-     .OUTPUT(variance_sqrt, TensorType({DT_FLOAT}))
-     .ATTR(epsilon, Float, 0.00001)
-     .OP_END_FACTORY_REG(InHost)
+REG_OP(Roll)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8}))
+    .REQUIRED_ATTR(shifts, ListInt)
+    .ATTR(dims, ListInt, {})
+    .OP_END_FACTORY_REG(Roll)
+
+/**
+ *@brief Calculate the loss. Creates a criterion that optimizes a two-class classification
+ logistic loss between input_x and input_y (containing 1 or -1). \n
+
+ *@par Inputs:
+ *One inputs, including:
+ * @li input_x: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * @li input_y: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+
+ *@par Attributes:
+ *@li lambd: An optional string.Defaults to "mean". \n
+
+ *@par Outputs:
+ *output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n
+ *          while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,)
+
+ *@par Third-party framework compatibility
+ *Compatible with the Pytorch operator SoftMarginLoss. \n
+ */
+REG_OP(SoftMarginLoss)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(reduction, String, "mean")
+    .OUTPUT(output_z, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(SoftMarginLoss)
+
+/**
+* @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2.
+
+* @par Inputs:
+* @li predict: An ND tensor of type float16, float32.
+* @li target: An ND tensor of type float16, float32.
+* @li dout: An ND tensor of type float16, float32.
+* @li weight: An optional ND tensor of type float16, float32.
+* @li pos_weight: An optional ND tensor of type float16, float32. \n
+
+* @par Attributes:
+* @li reduction: An optional string.Defaults to "mean". \n
+
+* @par Outputs:
+* @li gradient: An ND tensor tensor with the same shape and type as "predict". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad.
+*/
+REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
+    .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2)
+/**
+ * @brief Calculate the PoissonNllLoss function. 
+ *        target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n
+
+ * @par Inputs:
+ * Two inputs, including:
+ * @li input_x: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * 
+ * @par Inputs:
+ * @li target: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+
+ * @par Attributes:
+ * four Attributes, including:
+ * @li log_input: An optional bool. Defaults to "True" \n
+ * 
+ *  @par Attributes:
+ * @li full: An optional bool. Defaults to "False" \n
+ * 
+ *  @par Attributes:
+ * @li eps: An optional float. Defaults to "1e-8" \n
+ * 
+ *  @par Attributes:
+ * @li reduction: An optional string. Defaults to "mean" \n
+
+ * @par Outputs:
+ * loss: A Tensor has same element type as two inputs. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator PoissonNllLoss. \n
+ */
+REG_OP(PoissonNllLoss)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(log_input, Bool, true)
+    .ATTR(full, Bool, false)
+    .ATTR(eps, Float, 1e-8)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(PoissonNllLoss)
+/**
+ *@brief rnn_gen_mask
+ * @par Inputs:
+ * @li seq_length: A ND Tensor of type int32. Recoed the current length of each batch.\n
+ *
+ * @par Attributes:
+ * @li num_step: A required int.\n
+ * @li hidden_size: A required int. \n
+ *
+ * 
+ * @par Output:
+ * y: A mutable Tensor of type float16, with the shape of [num_step, batch_size, hidden_size]. \n
+ *
+ */
+REG_OP(RnnGenMask)
+    .INPUT(seq_length, TensorType({DT_INT32}))
+    .OUTPUT(seq_mask, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(num_step, Int)
+    .REQUIRED_ATTR(hidden_size, Int)
+    .OP_END_FACTORY_REG(RnnGenMask)
+
+/**
+* @brief Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss) 
+*        between input x (a 2D mini-batch Tensor) and output y (which is a 2D Tensor of target class indices) \n
+ 
+* @par Inputs:
+* Two inputs, including:
+* @li x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+* 
+* @par Inputs:
+* @li target: A tensor. Must be the following types:
+*     int32. \n
+
+* @par Attributes:
+* @li reduction: An optional string. Defaults to "mean" \n
+
+* @par Outputs:
+* y: A Tensor has same element type as input x. \n
+* is_target: A Tensor has same element type as input target. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator MultiLabelMarginLoss. \n
+*/
+REG_OP(MultilabelMarginLoss)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(target, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(is_target, TensorType({DT_INT32}))
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(MultilabelMarginLoss)
+
+/**
+*@brief Performs batch normalization . \n
+*@par Inputs:
+* Two inputs
+*@li input_x: A Tensor. Support float32. shape (n, c, d).
+*@li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n
+*@par Attributes:
+*@li normalize_type: Str. Support "per_feature" or "all_features".
+*@li epsilon: An optional float32, specifying the small value added to
+variance to avoid dividing by zero. Defaults to "0.00001" . \n
+*@par Outputs:
+* One outputs
+*@li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n
+*/
+REG_OP(NormalizeBatch)
+    .INPUT(input_x, TensorType({ DT_FLOAT }))
+    .INPUT(seq_len, TensorType({ DT_INT32 }))
+    .OUTPUT(output_y, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(normalize_type, String)
+    .ATTR(epsilon, Float, 0.00001)
+    .OP_END_FACTORY_REG(NormalizeBatch)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h
index 9edc469a..49fd02fa 100644
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,7 +20,144 @@
  */
 #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
 #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
-
+#include "graph/operator_reg.h"
 #include "nn_pooling_ops.h"
 
+namespace ge {
+/**
+* @brief Says whether the targets are in the top "k" predictions . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor.
+* @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
+* @li k: A 1D Tensor of the same type as "targets".
+* Specifies the number of top elements to look at for computing precision . \n
+
+* @par Outputs:
+* precision: A Tensor of type bool . \n
+
+* @attention Constraints:
+* @li targets must be non-negative tensor.
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator InTopKV2.
+*/
+REG_OP(InTopKV2)
+    .INPUT(predictions, TensorType({DT_FLOAT}))
+    .INPUT(targets, TensorType(IndexNumberType))
+    .INPUT(k, TensorType({IndexNumberType}))
+    .OUTPUT(precision, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(InTopKV2)
+
+/**
+*@brief Performs batch normalization . \n
+
+*@par Inputs:
+* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
+*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
+*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
+if input "x" is with format NC1HWC0. Specifies the scaling factor.
+*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
+if input "x" is with format NC1HWC0. Specifies the offset.
+*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
+if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
+operation is used for training.
+*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
+5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
+if the operation is used for training . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
+*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
+*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
+
+*@par Outputs:
+* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
+*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
+*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
+if input "x" is with format NC1HWC0. Specifies the mean of "x".
+*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
+*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
+*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
+then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
+*/
+REG_OP(FusedBatchNormV2)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NHWC")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(FusedBatchNormV2)
+
+/**
+ * @brief: Large amount of data sort.First operator of TopK.
+ * @par Inputs:
+ * two input, including:
+ * @li input_data: A Tensor. Data to be sorted. Support float16
+ * @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data.
+ * @par Attributes:
+ * @li k_num: Int.Number to be sorted.
+ * @par Outputs:
+ * 1 output, including:
+ * @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
+ */
+REG_OP(SegmentSort)
+    .INPUT(input_data, TensorType({DT_FLOAT16}))
+    .INPUT(input_index, TensorType({DT_FLOAT16}))
+    .OUTPUT(output_proposal, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(k_num, Int)
+    .OP_END_FACTORY_REG(SegmentSort)
+
+/**
+ * @brief: Large amount of data sort.Second operator of TopK.
+ * @par Inputs:
+ * two input, including:
+ * @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16
+ * @par Attributes:
+ * @li k_num: Int.Number to be sorted.
+ * @par Outputs:
+ * 1 output, including:
+ * @li output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
+ */
+REG_OP(MultiMerge)
+    .INPUT(input_proposal, TensorType({DT_FLOAT16}))
+    .OUTPUT(output_proposal, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(k_num, Int)
+    .OP_END_FACTORY_REG(MultiMerge)
+
+/**
+ * @brief: Large amount of data sort.Third operator of TopK.
+ * @par Inputs:
+ * two input, including:
+ * @li input_proposal: A Tensor. Proposal sorted for each channel. Support float16
+ * @par Attributes:
+ * @li k_num: Int.Number to be sorted.
+ * @par Outputs:
+ * 2 output, including:
+ * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
+ * @li output_index: A Tensor. int32. Data index.
+ */
+REG_OP(SingleMerge)
+    .INPUT(input_proposal, TensorType({DT_FLOAT16}))
+    .OUTPUT(output_data, TensorType({DT_FLOAT16}))
+    .OUTPUT(output_index, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(k_num, Int)
+    .OP_END_FACTORY_REG(SingleMerge)
+}// namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
index ab35ba47..80a21333 100644
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -182,6 +182,128 @@ REG_OP(AvgPool3D)
     .ATTR(data_format, String, "NDHWC")
     .OP_END_FACTORY_REG(AvgPool3D)
 
+
+/**
+*@brief Performs average pooling on the input.
+
+*@par Inputs:
+*@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.
+*@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout.
+*@li multiplier: An optional tensor of float16, float32, double.
+
+*@par Attributes:
+*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
+*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
+*@li pads: List of ints, implicit zero paddings on both sides of the input.
+*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
+*@li count_include_pad: When true, will include the zero-padding in the averaging calculation.
+*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
+*@li data_format: A string, format of input data . \n
+
+*@par Outputs:
+*y: The average pooled output tensor . \n
+
+*@attention Constraints:
+*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator AvgPool3D.
+*/
+REG_OP(AvgPool3DD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, true)
+    .ATTR(divisor_override, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(AvgPool3DD)
+
+/**
+* @brief Computes AvgPool3DGrad function.
+
+* @par Inputs:
+* @li orig_input_shape: An NDHWC tensor of type int32.
+* @li grads: An NDHWC tensor of type float16, float32, or double.
+
+* @par Attributes:
+* @li ksize: List of ints that has length 5. The size of the window for each dimension of the input tensor.
+* @li strides:List of ints that has length 5. The stride of the sliding window for each dimension of the input tensor.
+* @li pads: List of ints, implicit zero paddings on both sides of the input.
+* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
+* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
+* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
+* @li data_format: A string, format of input data.
+
+* @par Outputs:
+* @output: A mutable tensor with the same shape and type as "orig_input_shape".
+
+* @attention Constraints:
+* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator AvgPoolGrad.
+*/
+
+REG_OP(AvgPool3DGrad)
+    .INPUT(orig_input_shape, TensorType({DT_INT32}))
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(output, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, true)
+    .ATTR(divisor_override, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(AvgPool3DGrad)
+
+/**
+* @brief Performs average pooling on the input.
+
+* @par Inputs:
+* @li grads: An NDHWC tensor of type float16.
+* @li filter: An optional tensor of type float16, fractal_z_3d layout.
+* @li multiplier: An optional tensor of float16.
+
+* @par Attributes:
+* @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor.
+* @li ksize: List of ints that has length 5. The size of the window for each dimension of the input tensor.
+* @li strides:List of ints that has length 5. The stride of the sliding window for each dimension of the input tensor.
+* @li pads: List of ints, implicit zero paddings on both sides of the input.
+* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
+* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
+* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
+* @li data_format: A string, format of input data . \n
+
+* @par Outputs:
+* @output: The average pooled output tensor . \n
+
+* @attention Constraints:
+* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator AvgPool3DGradD.
+*/
+REG_OP(AvgPool3DGradD)
+    .INPUT(grads, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16}))
+    .OUTPUT(output, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(orig_input_shape, ListInt)
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(count_include_pad, Bool, true)
+    .ATTR(divisor_override, Int, 0)
+    .ATTR(data_format, String, "NDHWC")
+    .OP_END_FACTORY_REG(AvgPool3DGradD)
+
 /**
 *@brief Performs max_pool_ext2 on the input . \n
 
@@ -278,8 +400,8 @@ No default value.
 specifying the stride of the sliding window for each dimension of
 the input tensor. No default value.
 *@li padding: A required string type of float16.
-*@li pads: A list type of int32. Default value {0, 0, 0}.
-*@li dilation: A list type of int32. Default value {1, 1, 1}.
+*@li pads: A list type of int32. Default value {0,0,0,0,0,0}.
+*@li dilation: A list type of int32. Default value {1,1,1,1,1,1}.
 *@li ceil_mode: A ceil mode number of int32 . Default value 0.
 *@li data_format: An optional string. Defaults to "NDHWC" . \n
 
@@ -302,12 +424,37 @@ REG_OP(MaxPool3D)
     .REQUIRED_ATTR(ksize, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(padding, String)
-    .ATTR(pads, ListInt, {0,0,0})
-    .ATTR(dilation, ListInt, {1,1,1})
+    .ATTR(pads, ListInt, {0,0,0,0,0,0})
+    .ATTR(dilation, ListInt, {1,1,1,1,1,1})
     .ATTR(ceil_mode, Int, 0)
     .ATTR(data_format, String, "NDHWC")
     .OP_END_FACTORY_REG(MaxPool3D)
 
+/**
+*@brief Applies a 2D adaptive max pooling over an input signal conposed of several input planes. \n
+* The output is of size H x W, for any input size. 
+
+* @par Inputs:
+* One input, including:
+* @li x: A Tensor. Must be one of the following data types:
+*     float16, float32, float64. \n
+
+* @par Attributes:
+* @li output_size: A required list of 2 ints
+*    specifying the size (H,W) of the output tensor. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same data type as "x" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator AdaptiveMaxPool2d.
+*/
+REG_OP(AdaptiveMaxPool2d)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE}))
+    .OUTPUT(argmax, TensorType::IndexNumberType())
+    .REQUIRED_ATTR(output_size, ListInt)
+    .OP_END_FACTORY_REG(AdaptiveMaxPool2d)
 
 /**
 * @brief Computes second-order gradients of the maxpooling3d function . \n
@@ -477,8 +624,9 @@ REG_OP(MaxPoolV2)
 
 *@par Inputs:
 * One input:
-*x: An NC1HWC0 Tensor. Supported type: float, double, int32,
- * uint8, int16, int8, int64, uint16, half, uint32, uint64 . \n
+* x: An 4D Tensor. Supported type: float, double, int32,
+ * uint8, int16, int8, int64, uint16, half, uint32, uint64.
+ * Must set the format, supported format list ["NCHW, NHWC"]. \n
 
 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values,
@@ -490,8 +638,8 @@ REG_OP(MaxPoolV2)
 *@li padding: A required string. No default value . \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type and format as input "x".
-*argmax: A Tensor. Has the same type and format as input "x".
+*@li y: A Tensor. Has the same type and format as input "x".
+*@li argmax: A Tensor. Has the same type and format as input "x".
 *@attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
  * ksize[1] * ksize[2] <= 255.
@@ -517,10 +665,12 @@ REG_OP(MaxPoolWithArgmax)
 
 *@par Inputs:
 * Three inputs, including:
-*@li x: An NC1HWC0 tensor. Supported type: float, double, int32,
+*@li x: An 4d tensor. Supported type: float, double, int32,
  * uint8, int16, int8, int64, uint16, half, uint32, uint64.
-*@li grad: An NC1HWC0 tensor. Supported type: float, double, int32,
+ * Must set the format, supported format list ["NCHW, NHWC"]
+*@li grad: An 4d tensor. Supported type: float, double, int32,
  * uint8, int16, int8, int64, uint16, half, uint32, uint64.
+ * Must set the format, supported format list ["NCHW, NHWC"]
 *@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n
 
 *@par Attributes:
@@ -741,7 +891,7 @@ REG_OP(AvgPoolV2Grad)
 * @brief Computes gradients of averagev2 pooling function.
 
 * @par Inputs:
-* @li input_grad: An NHWC tensor of type float16, float32, or double.
+*input_grad: An NHWC tensor of type float16, float32, or double.
 
 * @par Attributes:
 * @li orig_input_shape: A required tuple or list of type int32.
@@ -759,10 +909,10 @@ REG_OP(AvgPoolV2Grad)
 * @li data_format: An optional string. Defaults to "NHWC".
 
 * @par Outputs:
-* @out_grad: A mutable tensor with the same shape and type as "orig_input".
+*out_grad: A mutable tensor with the same shape and type as "orig_input".
 
 * @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator AvgPoolGrad.
+*Compatible with the TensorFlow operator AvgPoolGrad.
 */
 REG_OP(AvgPoolV2GradD)
     .INPUT(input_grad, TensorType({DT_FLOAT16}))
@@ -1037,6 +1187,7 @@ REG_OP(MaxPool3DGrad)
     .OUTPUT(y, TensorType::RealNumberType())
     .REQUIRED_ATTR(ksize, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
+    .ATTR(padding, String, "SAME")
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(data_format, String, "NDHWC")
     .OP_END_FACTORY_REG(MaxPool3DGrad)
@@ -1107,7 +1258,7 @@ REG_OP(AvgPool1DD)
 
 *@par Inputs:
 * One input:
-*x: An NC1HWC0 Tensor of type float16.
+*x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"].
 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
 * each dimension of the input tensor. No default value.
@@ -1148,9 +1299,9 @@ REG_OP(MaxPoolWithArgmaxV2)
 
 *@par Inputs:
 * Three inputs, including:
-*@li x: An NC1HWC0 tensor of type float16.
-*@li grad: An NC1HWC0 tensor of type float16.
-*@li argmx: An NC1HWC0 tensor of type uint16 or int64 . \n
+*@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]
+*@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]
+*@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n
 
 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
@@ -1291,5 +1442,306 @@ REG_OP(MaxPoolV3Grad)
     .ATTR(global_pooling, Bool, false)
     .ATTR(ceil_mode, Bool, false)
     .OP_END_FACTORY_REG(MaxPoolV3Grad)
+
+/**
+*@brief Performs Dilation2D on the input . \n
+
+*@par Inputs:
+*x: A tensor of shape is 4d, format is support NHWC.
+*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x. \n
+
+*@par Attributes:
+*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1.
+*@li rates: A required list of 4 ints. The rates of the N and C dimensions are 1.
+*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
+*@li pads: An optional list of 4 ints.
+*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
+*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n
+
+*@par Outputs:
+*y: The output tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Dilation2D.
+*/
+REG_OP(Dilation2D)
+    .INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(filter,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .OUTPUT(y,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(rates, ListInt)
+    .ATTR(padding_mode, String, "SAME")
+    .ATTR(pads, ListInt, {0,0,0,0})
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Dilation2D)
+
+/**
+*@brief Performs Dilation2DBackpropFilter on the input. \n
+
+*@par Inputs:
+*x: A tensor of shape is 4d, format is support NHWC.
+*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
+*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
+
+*@par Attributes
+*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
+*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1.
+*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
+*@li pads: A optional list of 4 ints.
+*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
+*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n
+
+*@par Outputs:
+*y: The output tensor. Has the same type and format as input "filter" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Dilation2DBackpropFilter.
+*/
+
+REG_OP(Dilation2DBackpropFilter)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(filter,
+           TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(out_backprop,
+           TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .OUTPUT(y,
+            TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(rates, ListInt)
+    .ATTR(padding_mode, String, "SAME")
+    .ATTR(pads, ListInt, {0, 0, 0, 0})
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Dilation2DBackpropFilter)
+
+/**
+*@brief Performs Dilation2DBackpropInput on the input. \n
+
+*@par Inputs:
+*x: A tensor of shape is 4d, format is support NHWC.
+*filter: A tensor of shape is 3d, the type is same with x, and the c dimension is same with x.
+*out_backprop: Has the same type and format as input x and the c dimension is same with x. \n
+
+*@par Attributes
+*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimension are 1.
+*@li rates: A required list of 4 ints, the rates of the N and C dimensions are 1.
+*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID.
+*@li pads: A optional list of 4 ints.
+*@li ceil_mode: An optional bool. Defaults to "false". Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
+*@li data_format: An optional string, specifying the data format of "rates" and "strides", either "NCHW" or "NHWC" (default). \n
+
+*@par Outputs:
+*y: The output tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Dilation2DBackpropInput.
+*/
+
+REG_OP(Dilation2DBackpropInput)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(filter,
+           TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .INPUT(out_backprop,
+           TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .OUTPUT(y,
+            TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(rates, ListInt)
+    .ATTR(padding_mode, String, "SAME")
+    .ATTR(pads, ListInt, {0, 0, 0, 0})
+    .ATTR(ceil_mode, Bool, false)
+    .ATTR(data_format, String, "NHWC")
+    .OP_END_FACTORY_REG(Dilation2DBackpropInput)
+
+/**
+* @brief Applies a 2D adaptive average pooling over  
+*       an input signal composed of several input planes.  \n
+
+* @par Inputs:
+* One input, including:
+* @li x: A Tensor. Must be one of the following data types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li output_size: A required list of 2 ints
+*    specifying the size (H,W) of the output tensor. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same data type as "x" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator AdaptiveAvgPool2d.
+*/
+REG_OP(AdaptiveAvgPool2d)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(output_size, ListInt)
+    .OP_END_FACTORY_REG(AdaptiveAvgPool2d)
+
+/**
+* @brief Compute gradients of adaptive averagev2 pooling function.
+
+* @par Inputs:
+* @li input_grad: A Tensor. Must be one of the following data types:
+* float16, float32.
+
+* @par Attributes:
+* @li orig_input_shape: A required tuple or list of type int32.
+
+* @par Outputs:
+* @li output_grad: A tensor with the same type as "input_grad".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator AdaptiveAvgPool2dGrad.
+*/
+REG_OP(AdaptiveAvgPool2dGrad)
+    .INPUT(input_grad, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(output_grad, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(orig_input_shape, ListInt)
+    .OP_END_FACTORY_REG(AdaptiveAvgPool2dGrad)
+
+/**
+* @brief Performs the backpropagation of MaxPoolWithGradArgmaxV1.
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: An NC1HWC0 tensor of type float16.
+* @li grad: An NC1HWC0 tensor of type float16.
+* @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n
+
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
+* each dimension of the input tensor. No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
+* each dimension of the input tensor. No default value.
+* @li pads: A required listint. \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type and format as input "x". \n
+
+* @attention Constraints:
+* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
+* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
+* @li "pads" is listint.
+* @li "ceil_mode" defaults to False.
+* @li "data_format" defaults to "NC1HWC0". \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1.
+*/
+
+REG_OP(MaxPoolGradWithArgmaxV1)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .INPUT(grad, TensorType({DT_FLOAT16}))
+    .INPUT(argmax, TensorType({DT_UINT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dtype, Int, 3)
+    .ATTR(dilation, ListInt, {1, 1, 1, 1})
+    .ATTR(ceil_mode, Bool, false)
+    .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV1)
+
+/**
+* @brief Performs max pooling on the input and outputs both max values and indices.
+
+* @par Inputs:
+* One input:
+* x: An NC1HWC0 Tensor of type float16. \n
+
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for
+* each dimension of the input tensor. No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for
+* each dimension of the input tensor. No default value.
+* @li pads: A required string. No default value. \n
+
+* @par Outputs:
+* y: A Tensor. Has the same type and format as input "x".
+* argmax:  A Tensor. type:uint16, format:NC1HWC0. \n
+
+* @attention Constraints:
+* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
+* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1,
+* strides[2] <= 63, strides[2] >= 1.
+* @li "pads" is listint.
+* @li "ceil_mode" defaults to False.
+* @li "data_format" defaults to "NC1HWC0". \n
+
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator MaxPoolWithArgmaxV1.
+*/
+REG_OP(MaxPoolWithArgmaxV1)
+    .INPUT(x, TensorType({DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT16}))
+    .OUTPUT(argmax, TensorType({DT_UINT16}))
+    .REQUIRED_ATTR(ksize, ListInt)
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
+    .ATTR(dtype, Int, 3)
+    .ATTR(dilation, ListInt, {1, 1, 1, 1})
+    .ATTR(ceil_mode, Bool, false)
+    .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1)
+
+/**
+*@brief Randomly sample a subset of positive and negative examples,and overwrite
+the label vector to the ignore value (-1) for all elements that are not
+included in the sample.\n
+
+* @par Inputs:
+* One input:
+* labels: shape of labels,(N, ) label vector with values. \n
+
+* @par Attributes:
+* @li batch_size_per_images: A require attribute of type int.
+* @li positive_fraction: A require attribute of type float.
+
+*@par Outputs:
+*y: The result of subSample. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator SubSample.
+*@par Restrictions:
+*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
+*/
+REG_OP(SubSample)
+    .INPUT(labels, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(batch_size_per_images, Int)
+    .REQUIRED_ATTR(positive_fraction, Float)
+    .OP_END_FACTORY_REG(SubSample)
+
+/**
+*@brief Randomly sample a subset of positive and negative examples,and overwrite
+the label vector to the ignore value (-1) for all elements that are not
+included in the sample.\n
+
+* @par Inputs:
+* two inputs, including:
+* @li labels: shape of labels,(N, ) label vector with values:.
+* @li shuffle_matrix: random matrix with shape (N, ). \n
+
+* @par Attributes:
+* @li batch_size_per_images: A require attribute of type int.
+* @li positive_fraction: A require attribute of type float.
+
+*@par Outputs:
+*y: The result of subSample. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator SubSampleLabels.
+*@par Restrictions:
+*Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
+*/
+REG_OP(SubSampleLabels)
+    .INPUT(labels, TensorType({DT_INT32}))
+    .INPUT(shuffle_matrix, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(batch_size_per_images, Int)
+    .REQUIRED_ATTR(positive_fraction, Float)
+    .OP_END_FACTORY_REG(SubSampleLabels)
+
 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 047fd6da..75e91aee 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -2101,6 +2101,55 @@ REG_OP(FusedMulApplyMomentumExtern)
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern)
 
+/**
+*@brief Updates '*var' according to the momentum scheme.
+*   accum = accum * momentum - x1 * x2 * lr
+*   if use_nesterov is True:
+*       var += accum * momentum - x1 * x2 * lr
+*   else:
+*       var += accum
+*
+*@par Inputs:
+*@li var: A mutable tensor. Must be one of the data types defined in
+*    TensorType::NumberType(). Should be from a Variable().
+*@li accum: A mutable tensor. Has the same type as "var". Should be from a
+*    Variable().
+*@li lr: A tensor for the learning rate. Has the same type as "var". Should be
+*    from a Variable().
+*@li x1: A Tensor has type TensorType::NumberType().
+*@li momentum: A scalar. Has the same type as "var".
+*@li x2: A scalar has the same type as "var".
+*
+*@par Attributes:
+*@li use_nesterov: An optional bool. Defaults to "False".
+*    If "True", var will be updated by using Nesterov momentum.
+*@li use_locking: An optional bool. Defaults to "False".
+*    If "True", updating of the "var" tensor is protected by a lock;
+*    otherwise the behavior is undefined, but may exhibit less contention.
+*
+*@par Outputs:
+* var: A mutable tensor. Has the same type as input "var".
+*
+*@attention Constraints:
+* The input tensors must have the same shape.
+*
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
+*
+*/
+REG_OP(FusedMulApplyKerasMomentum)
+    .INPUT(var, TensorType::NumberType())
+    .INPUT(accum, TensorType::NumberType())
+    .INPUT(lr, TensorType::NumberType())
+    .INPUT(x1, TensorType::NumberType())
+    .INPUT(momentum, TensorType::NumberType())
+    .INPUT(x2, TensorType::NumberType())
+    .OUTPUT(var, TensorType::NumberType())
+    .OUTPUT(accum, TensorType::NumberType())
+    .ATTR(use_locking, Bool, false)
+    .ATTR(use_nesterov, Bool, false)
+    .OP_END_FACTORY_REG(FusedMulApplyKerasMomentum)
+
 /**
 *@brief Update "g" according to the LARS algorithm . \n
 
diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h
index 7834591c..b27b1fa0 100644
--- a/third_party/fwkacllib/inc/ops/no_op.h
+++ b/third_party/fwkacllib/inc/ops/no_op.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
index e0e5dfc6..ca1c24eb 100644
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -223,7 +223,29 @@ REG_OP(Relu6Grad)
     .INPUT(features, TensorType::RealNumberType())
     .OUTPUT(backprops, TensorType::RealNumberType())
     .OP_END_FACTORY_REG(Relu6Grad)
-
+/**
+*@brief Calculate the elu_grad_v2 function. 
+*Applies the element-wise function:
+* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
+*@par Inputs:
+*One inputs, including:
+* @li grads: A tensor. Must be one of the following types:
+*     float16, float32. 
+* @li activations: A tensor. Must be one of the following types:
+*     float16, float32. 
+*
+*@par Outputs:
+*y: A Tensor with the same type and shape of grads's.
+* 
+*@par Attributes:
+*@li alpha: scalar parameter, default value = 1.0
+*/	
+REG_OP(EluGradV2)
+    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(alpha, Float, 1.0)
+    .OP_END_FACTORY_REG(EluGradV2)
 /**
 * @brief Compute sigmoid of "x" element-wise . \n
 
@@ -508,6 +530,42 @@ REG_OP(Elu)
     .ATTR(alpha, Float, 1.0)
     .OP_END_FACTORY_REG(Elu)
 
+/**
+*@brief Continuously Differentiable Exponential Linear Uints:
+*       Perform the linear uint element-wise on the input tensor X using formula:
+*       max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n
+
+*@par Inputs:
+*x: A float16, float32, for the input data type . \n
+
+*@par Attributes:
+*alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
+
+*@par Attributes:
+*alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
+
+*@par Attributes:
+*alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
+
+*@par Outputs:
+*y: A float16, float32, for the normalized result . \n
+
+*@attention Constraints:
+*@li The input is of type float16 or float32 . \n
+
+*@par Multiple batches supported or not
+*Supported
+*@par Third-party framework compatibility
+*@li Compatible with ONNX's Celu operator
+*/
+REG_OP(Celu)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .ATTR(alpha1, Float, 1.0)
+    .ATTR(alpha2, Float, 1.0)
+    .ATTR(alpha3, Float, 1.0)
+    .OP_END_FACTORY_REG(Celu)
+
 /**
 *@brief Computes gradients for the exponential linear (Elu) operation.
 *
@@ -640,6 +698,352 @@ REG_OP(Mish)
     .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 }))
     .OP_END_FACTORY_REG(Mish)
 
+/**
+ * @brief: pytorch mish_grad operator.
+ * @par Inputs:
+ * three input, including:
+ * @li grad: A Tensor. shape, datatype and format is same as x
+ * @li x: A Tensor. Must be one of the following types: float16, float32
+ * @li tanhx: A Tensor. shape, datatype and format is same as x
+ * @par Outputs:
+ * 1 output, including:
+ * @li x_grad: A Tensor. shape, datatype and format is same as x
+ */
+
+REG_OP(MishGrad)
+    .INPUT(grad, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .INPUT(x, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OPTIONAL_INPUT(tanhx, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OUTPUT(x_grad, TensorType({ DT_FLOAT,DT_FLOAT16 }))
+    .OP_END_FACTORY_REG(MishGrad)
+
+/**
+ * @brief pytorch hardtanh_backward operator.
+ *
+ * @par Inputs:
+ * 2 inputs, including:
+ * @li result, minimum tensor of the linear region range,
+ * datatype: float16/float32, format:ND/5HD.
+ * @li grad, maximum tensor of the linear region range,
+ * datatype:float16/float32, format:ND/5HD. \n
+
+ * @par Attributes:
+ * 2 attributes, including:
+ * @li min_val, minimum value of the linear region range, datatype:float.
+ * @li max_val, maximum value of the linear region range, datatype:float. \n
+
+ * @par Outputs:
+ * 1 output, including:
+ * @li y, hardtanh_backward output tensor, datatype and format is same as
+ * input result. \n
+
+ * @attention Constraints:
+ * This operator only supports dataType: float16/float32, format: ND/5HD. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator HardtanhGrad.
+ */
+REG_OP(HardtanhGrad)
+    .INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */
+    .INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT }))   /* "Second operand." */
+    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT }))     /* "Result, has same element type as two inputs" */
+    .ATTR(min_val, Float, -1.0)
+    .ATTR(max_val, Float, 1.0)
+    .OP_END_FACTORY_REG(HardtanhGrad)
+
+/**
+* @brief Calculates the softplus loss function with attributes of beta and threshold. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li x: A mutable Tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li beta: An optional float. Defaults to "1.0" \n
+
+* @li threshold: An optional float. Defaults to "20.0" \n
+
+* @par Outputs:
+* @li y: A mutable Tensor. Has the same type as "x" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Softplus.
+*/
+REG_OP(SoftplusV2)
+    .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(beta, Float, 1.0)
+    .ATTR(threshold, Float, 20.0)
+    .OP_END_FACTORY_REG(SoftplusV2)
+
+/**
+* @brief Calculates the reversed outputs of the function "softplus_v2". \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_gradients: A mutable Tensor. Must be one of the following types:
+*     float16, float32.
+* @li input_features: A mutable Tensor of the same type as "input_gradients" \n
+
+* @par Attributes:
+* @li beta: An optional float. Defaults to "1.0" \n
+
+* @li threshold: An optional float. Defaults to "20.0" \n
+
+* @par Outputs:
+* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SoftplusGrad.
+*/
+REG_OP(SoftplusV2Grad)
+    .INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 }))
+    .ATTR(beta, Float, 1.0)
+    .ATTR(threshold, Float, 20.0)
+    .OP_END_FACTORY_REG(SoftplusV2Grad)
+
+/**
+ * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor)
+ *  where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise.
+ * 
+ * @par inputs
+ * one input including:
+ * @li x: input A Tensor. Must be one of the following types: float32, float16
+ * 
+ * @par output
+ * one output including:
+ * @li y:A Tensor of the same type as x
+ * 
+ */
+REG_OP(ThresholdedRelu)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(alpha, Float, 1.0)
+    .OP_END_FACTORY_REG(ThresholdedRelu)
+
+/**
+* @brief Calculate the hard shrinkage function. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li lambd: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Hardshrink. \n
+*/
+REG_OP(HardShrink)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(lambd, Float, 0.5)
+    .OP_END_FACTORY_REG(HardShrink)
+
+/**
+*@brief Calculate the hard shrink grad function. \n
+*
+* Computes the gradient for the HardShrink: if x > lambda or x < -lambda, x,otherwise 0
+*
+*@par Inputs:
+*Two inputs, including:
+* @li gradients: A tensor. Must be one of the following types:
+*     float16, float32. \n
+* @li features: A tensor. Must be one of the following types:
+*     float16, float32. \n
+*
+*@par Outputs:
+*backprops: A Tensor with the same type and shape of features's. \n
+*
+*@par Attributes:
+*@li lambd: An optional float.Defaults to 0.5. \n
+*
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Hardshrink_backward. \n
+*/
+  REG_OP(HardShrinkGrad)
+  .INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .ATTR(lambd, Float, 0.5)
+  .OP_END_FACTORY_REG(HardShrinkGrad)
+
+/**
+* @brief Calculate the hard sigmoid function. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li input_x: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+
+* @par Attributes:
+* @li alpha: An optional float. Defaults to 0.16666666. \n
+* @li beta: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Hardsigmoid. \n
+*/    
+REG_OP(HardSigmoid)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(alpha, Float, 0.16666666)
+    .ATTR(beta, Float, 0.5)
+    .OP_END_FACTORY_REG(HardSigmoid)
+
+/**
+* @brief Calculate the soft shrinkage function. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li lambd: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator Softshrink. \n
+*/
+REG_OP(SoftShrink)
+     .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .ATTR(lambd, Float, 0.5)
+     .OP_END_FACTORY_REG(SoftShrink)
+
+/**
+* @brief Calculate the reversed outputs of the function "soft_shrink". \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_grad: A tensor. Must be one of the following types:
+*     float16, float32. \n
+* @li input_x: A tensor of the same dtype as "input_grad". \n
+
+* @par Attributes:
+* @li lambd: An optional float. Defaults to 0.5. \n
+
+* @par Outputs:
+* y: A Tensor of the same dtype and shape as "input_graxd". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator SoftShrinkGrad. \n
+*/
+REG_OP(SoftShrinkGrad)
+     .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+     .ATTR(lambd, Float, 0.5)
+     .OP_END_FACTORY_REG(SoftShrinkGrad)
+
+/**
+*@brief Calculate the gradient of log simoid. \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li grads: A tensor, gradient of previous layer. Must be one of the following types:
+*       float16, float32. \n
+* @li features: A tensor, input of log sigmoid. Must be one of the following types:
+*       float16, float32. \n
+
+*@par Outputs:
+*One outputs, including:
+* @li backprops: A tensor with the same type of and shape of grads. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator LogSigmoidBackward. \n
+*/
+REG_OP(LogSigmoidGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(LogSigmoidGrad)
+
+/**
+*@brief Calculate -ln(1+e^(-x)). \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x: A tensor. Must be one of the following types:
+*       float16, float32. \n
+
+*@par Outputs:
+*One outputs, including:
+* @li y: A tensor with the same type and shape of x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator LogSigmoid. \n
+*/
+REG_OP(LogSigmoid)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))  /* "output:y" */
+    .OP_END_FACTORY_REG(LogSigmoid)
+
+/**
+*@brief Calculate the backward outputs of the function "hard_sigmoid" \n
+
+*@par Inputs:
+*One inputs, including:
+* @li grads: A tensor. Must be one of the following types:
+*       float16, float32. \n
+* @li input_x: A tensor. Must be one of the following types:
+*       float16, float32. \n
+
+*@par Outputs:
+*One outputs, including:
+* @li y: A tensor with the same type and shape of x's. \n
+
+* @par Attributes:
+* @li alpha: An optional float. Defaults to 0.16666666. \n
+* @li beta: An optional float. Defaults to 0.5. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator LogSigmoidGrad. \n
+*/
+REG_OP(HardSigmoidGrad)
+    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(alpha, Float, 0.16666666)
+    .ATTR(beta, Float, 0.5)
+    .OP_END_FACTORY_REG(HardSigmoidGrad)
+
+/**
+* @brief Calculate the shrink function. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li input_x: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li lambd: An optional float. Defaults to 0.5. \n
+* @li bias: An optional float. Defaults to 0.0. \n
+
+* @par Outputs:
+* y: A Tensor with the same dtype and shape of input_x's. \n
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator Shrink. \n
+*/
+REG_OP(Shrink)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(lambd, Float, 0.5)
+    .ATTR(bias, Float, 0.0)
+    .OP_END_FACTORY_REG(Shrink)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
index 8d7ef9f9..f36d2935 100644
--- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
+++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/outfeed_ops.h b/third_party/fwkacllib/inc/ops/outfeed_ops.h
index e0b783bc..53b9d701 100644
--- a/third_party/fwkacllib/inc/ops/outfeed_ops.h
+++ b/third_party/fwkacllib/inc/ops/outfeed_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h
index f746b3b3..6854c866 100644
--- a/third_party/fwkacllib/inc/ops/pad_ops.h
+++ b/third_party/fwkacllib/inc/ops/pad_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -101,7 +101,7 @@ REG_OP(FillD)
 */
 REG_OP(BroadcastTo)
     .INPUT(x, TensorType::BasicType())
-    .INPUT(shape, TensorType({DT_INT32}))
+    .INPUT(shape, TensorType({DT_INT32,DT_INT64}))
     .OUTPUT(y, TensorType::BasicType())
     .OP_END_FACTORY_REG(BroadcastTo)
 
@@ -161,7 +161,7 @@ REG_OP(Pad)
 *@brief Pads a tensor . \n
 
 *@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n
+*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
 
 *@par Attributes:
 *paddings: An optional "vector<vector<int>>". Defaults to "{}".
@@ -180,8 +180,8 @@ REG_OP(Pad)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
 */
 REG_OP(PadD)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
     .REQUIRED_ATTR(paddings, ListListInt)
     .OP_END_FACTORY_REG(PadD)
 
@@ -213,7 +213,7 @@ REG_OP(PadV2)
 *@brief Pads a tensor . \n
 
 *@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n
+*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n
 *constant_values: A Tensor. Must have the same type as input.
 
 *@par Attributes:
@@ -227,10 +227,7 @@ REG_OP(PadV2)
 *y: A Tensor of the same type as "x" . \n
 
 *@par Third-party framework compatibility:
-* Compatible with TensorFlow operator Pad.
-*
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead.
+* Compatible with TensorFlow operator PadV2.
 */
 REG_OP(PadV2D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
@@ -272,42 +269,42 @@ REG_OP(PadV3)
     .ATTR(paddings_contiguous, Bool, true)
     .OP_END_FACTORY_REG(PadV3)
 
-/**
-*@brief Pads a tensor.
-
-*@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32.
-
-*@par Attributes:
-* @li paddings: An required "vector<vector<int>>".
-*     For each dimension D of input, paddings[D, 0] indicates how many
-*     values to add before the contents of tensor in that dimension,
-*     and paddings[D, 1] indicates how many values to add after the
-*     contents of tensor in that dimension.
-* @li constant_values: An optional int value for pad.
-* @li mode: An optional string, Defaults to "constant", indicates paddings mode,
-*     support "constant", "reflect", "edge"
-* @li paddings_contiguous: An optional bool value, Defaults to true.
-*     If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
-*     If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]
-
-*@par Outputs:
-*y: A Tensor of the same type as "x".
-
-*@par Third-party framework compatibility:
-* Compatible with ONNX operator Pad.
-
-* @par Restrictions:
-* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead.
-*/
-REG_OP(PadV3D)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
-    .REQUIRED_ATTR(paddings, ListListInt)
-    .ATTR(constant_values, Int, 0)
-    .ATTR(mode, String, "constant")
-    .ATTR(paddings_contiguous, Bool, true)
-    .OP_END_FACTORY_REG(PadV3D)
+  /**
+  *@brief Pads a tensor.
+
+  *@par Inputs:
+  *x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32.
+
+  *@par Attributes:
+  * @li paddings: An required "vector<vector<int>>".
+  *     For each dimension D of input, paddings[D, 0] indicates how many
+  *     values to add before the contents of tensor in that dimension,
+  *     and paddings[D, 1] indicates how many values to add after the
+  *     contents of tensor in that dimension.
+  * @li constant_values: An optional int value for pad.
+  * @li mode: An optional string, Defaults to "constant", indicates paddings mode,
+  *     support "constant", "reflect", "edge"
+  * @li paddings_contiguous: An optional bool value, Defaults to true.
+  *     If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
+  *     If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]
+
+  *@par Outputs:
+  *y: A Tensor of the same type as "x".
+
+  *@par Third-party framework compatibility:
+  * Compatible with ONNX operator Pad.
+
+  * @par Restrictions:
+  * Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead.
+  */
+  REG_OP(PadV3D)
+      .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
+      .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8}))
+      .REQUIRED_ATTR(paddings, ListListInt)
+      .ATTR(constant_values, Int, 0)
+      .ATTR(mode, String, "constant")
+      .ATTR(paddings_contiguous, Bool, true)
+      .OP_END_FACTORY_REG(PadV3D)
 
 /**
 *@brief Create a diagonal tensor
@@ -403,5 +400,76 @@ REG_OP(EmbeddingRankId)
     .ATTR(mode, String, "mod")
     .OP_END_FACTORY_REG(EmbeddingRankId)
 
+/**
+*@brief EmbeddingLocalIndex, Sort statistics index according to rank_id \n
+
+*@par Inputs:
+* @li addr_table: A 2D tensor which last dimension must be 3.
+* @li index: A tensor with data type int32, int64, uint32, uint64.
+
+*@par Attributes:
+* @li row_memory: The size of Embedding vector in a row, the default is 320.
+* @li mode: String type, currently there are two options: 'mod' and 'order'
+
+*@par Outputs:
+* @li local_idx:Index on each server.
+* @li nums:The number of local_idx found on each server.
+* @li recover_idx:The sorted local_idx element is at the position corresponding
+* to the original input index.
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Diag.
+*/
+REG_OP(EmbeddingLocalIndex)
+    .INPUT(addr_table, TensorType({DT_UINT64}))
+    .INPUT(index, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64}))
+    .OUTPUT(local_idx, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64}))
+    .OUTPUT(nums, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64}))
+    .OUTPUT(recover_idx, TensorType({DT_INT64,DT_INT32,DT_UINT32,DT_UINT64}))
+    .ATTR(row_memory, Int, 320)
+    .ATTR(mode, String, "mod")
+    .OP_END_FACTORY_REG(EmbeddingLocalIndex)
+
+/**
+* @brief Fill the value to a tensor has the specified shape.
+
+* @par Inputs:
+* One inputs, including:
+* @li dims: An Tensor, specify the shape that the value to fill.
+
+* @par Attributes:
+* @li value: An optional float value. Defaults to 0.0.
+
+* @par Outputs:
+* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator ConstantOfShape.
+*/
+REG_OP(FillV2)
+    .INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .ATTR(value, Float, 0)
+    .OP_END_FACTORY_REG(FillV2)
+
+/**
+* @brief Fill the value to a tensor has the specified shape.
+
+* @par Attributes:
+* @li value: An optional float value. Defaults to 0.0.
+
+* @li dims: An required listInt to specify the shape that the value to fill.
+
+* @par Outputs:
+* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value.
+
+* @par Third-party framework compatibility
+* Compatible with the ONNX operator ConstantOfShape.
+*/
+REG_OP(FillV2D)
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64}))
+    .ATTR(value, Float, 0)
+    .REQUIRED_ATTR(dims, ListInt)
+    .OP_END_FACTORY_REG(FillV2D)
 } // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h
index 5c7adfd8..b625180a 100644
--- a/third_party/fwkacllib/inc/ops/parsing_ops.h
+++ b/third_party/fwkacllib/inc/ops/parsing_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -51,6 +51,246 @@ REG_OP(StringToNumber)
     .ATTR(out_type, Type, DT_FLOAT)
     .OP_END_FACTORY_REG(StringToNumber)
 
+/**
+*@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
+*@brief Parse an Example prototype. 
+*@par Input:
+*serialized: A Tensor of type string.
+*dense_defaults:  DYNAMIC INPUT Tensor type as string, float, int64. \n
+
+*@par Attributes:
+*num_sparse: type int num of inputs sparse_indices , sparse_values, sparse_shapes
+*out_type: output type
+*sparse_keys: ListString
+*sparse_types: types of sparse_values
+*dense_keys: ListString
+*dense_shapes: output of dense_defaults shape
+*dense_types: output of dense_defaults type  \n
+
+*@par Outputs:
+*sparse_indices: A Tensor of type string. 
+*sparse_values:  Has the same type as sparse_types.
+*sparse_shapes: A Tensor of type int64
+*dense_values:  Has the same type as dense_defaults.
+
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+**/
+REG_OP(ParseSingleExample)
+    .INPUT(serialized, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(dense_defaults, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
+    .DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(sparse_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
+    .DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(dense_values, TensorType({DT_STRING,DT_FLOAT,DT_INT64}))
+    .ATTR(num_sparse, Int, 0)
+    .ATTR(sparse_keys, ListString, {})
+    .ATTR(dense_keys, ListString, {})
+    .ATTR(sparse_types, ListType, {})
+    .ATTR(Tdense, ListType, {})
+    .ATTR(dense_shapes, ListListInt, {})
+    .OP_END_FACTORY_REG(ParseSingleExample)
+
+/**
+*@brief Decodes raw file into  tensor . \n
+*@par Input:
+*bytes: A Tensor of type string.
+
+*@par Attributes:
+*little_endian: bool ture
+*out_type: output type
+
+*@par Outputs:
+*Output: A Tensor
+**/
+REG_OP(DecodeRaw)
+    .INPUT(bytes, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_BOOL,DT_FLOAT16,DT_DOUBLE,DT_FLOAT,
+                                    DT_INT64,DT_INT32,DT_INT8,DT_UINT8,DT_INT16,
+                                    DT_UINT16,DT_COMPLEX64,DT_COMPLEX128}))
+    .ATTR(out_type, Type, DT_FLOAT)
+    .ATTR(little_endian, Bool, true)
+    .OP_END_FACTORY_REG(DecodeRaw)
+
+/**
+*@brief Convert serialized tensorflow.TensorProto prototype to Tensor. \n
+
+*@par Inputs:
+*serialized: A Tensor of string type. Scalar string containing serialized
+*TensorProto prototype. \n
+
+*@par Attributes:
+*out_type: The type of the serialized tensor. The provided type must match the
+*type of the serialized tensor and no implicit conversion will take place. \n
+
+*@par Outputs:
+*output: A Tensor of type out_type. \n
+
+*@attention Constraints:
+*The implementation for StringToNumber on Ascend uses AICPU,
+*with badperformance. \n
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow ParseTensor operator.
+*/
+REG_OP(ParseTensor)
+    .INPUT(serialized, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType(DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16,
+                          DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32,
+                          DT_UINT64, DT_BOOL, DT_DOUBLE, DT_STRING,
+                          DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(out_type, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(ParseTensor)
+
+/**
+*@brief Converts each string in the input Tensor to the specified numeric
+*type . \n
+
+*@par Inputs:
+*Inputs include:
+*records: Each string is a record/row in the csv and all records should have the
+*same format. \n
+*record_defaults: One tensor per column of the input record, with either a
+*scalar default value for that column or an empty vector if the column is
+*required. \n
+
+*@par Attributes:
+*OUT_TYPE: The numeric type to interpret each string in string_tensor as . \n
+*field_delim: char delimiter to separate fields in a record. \n
+*use_quote_delim: If false, treats double quotation marks as regular characters
+*inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). \n
+*na_value: Additional string to recognize as NA/NaN. \n
+
+*@par Outputs:
+*output: A Tensor. Has the same type as x . \n
+
+*@attention Constraints:
+*The implementation for StringToNumber on Ascend uses AICPU, with bad
+*performance. \n
+
+*@par Third-party framework compatibility
+*@li compatible with tensorflow StringToNumber operator.
+*/
+REG_OP(DecodeCSV)
+    .INPUT(records, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(record_defaults, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
+                                        DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32,
+                                        DT_INT64, DT_STRING}))
+    .ATTR(OUT_TYPE, ListType, {})
+    .ATTR(field_delim, String, ",")
+    .ATTR(use_quote_delim, Bool, true)
+    .ATTR(na_value, String, ",")
+    .ATTR(select_cols, ListInt, {})
+    .OP_END_FACTORY_REG(DecodeCSV)
+
+/**
+*@brief Convert serialized tensorflow.TensorProto prototype to Tensor.
+*@brief Parse an Example prototype.
+*@par Input:
+*serialized: A Tensor of type string. \n
+*name:A Tensor of type string. \n
+*sparse_keys: Dynamic input tensor of string. \n
+*dense_keys: Dynamic input tensor of string \n
+*dense_defaults:  Dynamic input tensor type as string, float, int64. \n
+
+*@par Attributes:
+*Nsparse: Number of sparse_keys, sparse_indices and sparse_shapes \n
+*Ndense: Number of dense_keys \n
+*sparse_types: types of sparse_values \n
+*Tdense: Type of dense_defaults dense_defaults and dense_values \n
+*dense_shapes: output of dense_defaults shape  \n
+
+*@par Outputs:
+*sparse_indices: A Tensor of type string. \n
+*sparse_values:  Has the same type as sparse_types. \n
+*sparse_shapes: A Tensor of type int64 \n
+*dense_values:  Has the same type as dense_defaults. \n
+*@par Third-party framework compatibility \n
+*@li compatible with tensorflow StringToNumber operator. \n
+*/
+REG_OP(ParseExample)
+    .INPUT(serialized, TensorType({DT_STRING}))
+    .INPUT(name, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(sparse_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(dense_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(sparse_indices, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(sparse_shapes, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .ATTR(Nsparse, Int, 0)
+    .ATTR(Ndense, Int, 0)
+    .ATTR(sparse_types, ListType, {})
+    .ATTR(Tdense, ListType, {})
+    .ATTR(dense_shapes, ListListInt, {})
+    .OP_END_FACTORY_REG(ParseExample)
+
+/**
+*@brief Transforms a scalar brain.SequenceExample proto (as strings) into typed
+*tensors.
+*@par Input:
+*serialized: A Tensor of type string. \n
+*feature_list_dense_missing_assumed_empty:A Tensor of type string. \n
+*context_sparse_keys: Dynamic input tensor of string. \n
+*context_dense_keys: Dynamic input tensor of string \n
+*feature_list_sparse_keys:  Dynamic input tensor of string \n
+*feature_list_dense_keys:  Dynamic input tensor of string \n
+*context_dense_defaults:  Dynamic input tensor of string, float, int64 \n
+*debug_name: A Tensor of type string. \n
+
+*@par Attributes:
+*Ncontext_sparse: Number of context_sparse_keys, context_sparse_indices and context_sparse_shapes \n
+*Ncontext_dense: Number of context_dense_keys \n
+*Nfeature_list_sparse: Number of feature_list_sparse_keys \n
+*Nfeature_list_dense: Number of feature_list_dense_keys \n
+*context_sparse_types: Types of context_sparse_values \n
+*Tcontext_dense: Number of dense_keys \n
+*feature_list_dense_types: Types of feature_list_dense_values \n
+*context_dense_shapes: Shape of context_dense \n
+*feature_list_sparse_types: Type of feature_list_sparse_values \n
+*feature_list_dense_shapes: Shape of feature_list_dense \n
+
+*@par Outputs:
+*context_sparse_indices: Dynamic output tensor of type int64. \n
+*context_sparse_values:  Dynamic output tensor of type string, float, int64. \n
+*context_sparse_shapes: Dynamic output tensor of type int64 \n
+*context_dense_values:  Dynamic output tensor of type string, float, int64. \n
+*feature_list_sparse_indices: Dynamic output tensor of type int64. \n
+*feature_list_sparse_values:  Dynamic output tensor of type string, float, int64. \n
+*feature_list_sparse_shapes: Dynamic output tensor of type int64 \n
+*feature_list_dense_values:  Dynamic output tensor of type string, float, int64. \n
+*@par Third-party framework compatibility \n
+*@li compatible with tensorflow StringToNumber operator. \n
+*/
+REG_OP(ParseSingleSequenceExample)
+    .INPUT(serialized, TensorType({DT_STRING}))
+    .INPUT(feature_list_dense_missing_assumed_empty, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(context_sparse_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(context_dense_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(feature_list_sparse_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(feature_list_dense_keys, TensorType({DT_STRING}))
+    .DYNAMIC_INPUT(context_dense_defaults, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .INPUT(debug_name, TensorType({DT_STRING}))
+    .DYNAMIC_OUTPUT(context_sparse_indices, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(context_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(context_sparse_shapes, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(context_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(feature_list_sparse_indices, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(feature_list_sparse_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .DYNAMIC_OUTPUT(feature_list_sparse_shapes, TensorType({DT_INT64}))
+    .DYNAMIC_OUTPUT(feature_list_dense_values, TensorType({DT_FLOAT, DT_INT64, DT_STRING}))
+    .ATTR(Ncontext_sparse, Int, 0)
+    .ATTR(Ncontext_dense, Int, 0)
+    .ATTR(Nfeature_list_sparse, Int, 0)
+    .ATTR(Nfeature_list_dense, Int, 0)
+    .ATTR(context_sparse_types, ListType, {})
+    .ATTR(Tcontext_dense, ListType, {})
+    .ATTR(feature_list_dense_types, ListType, {})
+    .ATTR(context_dense_shapes, ListListInt, {})
+    .ATTR(feature_list_sparse_types, ListType, {})
+    .ATTR(feature_list_dense_shapes, ListListInt, {})
+    .OP_END_FACTORY_REG(ParseSingleSequenceExample)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h
index b53cfeb6..69d5e67e 100644
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -60,6 +60,26 @@ REG_OP(Dequantize)
     .ATTR(mode, String, "MIN_COMBINED")
     .OP_END_FACTORY_REG(Dequantize)
 
+/**
+*@brief Quantizes the input . \n
+*@par Inputs:
+*x:  shape and dtype of input_x. \n
+*scales: shape and dtype of input_scales. \n
+*zero_points: shape and dtype of input_zero_points \n
+*@par Attributes:
+*@li axis: the processed dim. \n
+*@par Outputs:
+*y: shape and dtype of output_y, should be same shape as input, dtype is same as the quantified type . \n
+*/
+REG_OP(Quantize)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scales, TensorType({DT_FLOAT}))
+    .INPUT(zero_points, TensorType({DT_INT8,DT_UINT8,DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT8,DT_UINT8,DT_INT32}))
+    .REQUIRED_ATTR(dtype, String)
+    .ATTR(axis, Int, 1)
+    .OP_END_FACTORY_REG(Quantize)
+
 /**
 *@brief Quantizes the input . \n
 
@@ -194,7 +214,7 @@ REG_OP(AscendRequant)
 *@brief Requantizes the input of int16 . \n
 
 *@par Inputs:
-*@li x: An NC1HWC0 tensor of type int16, specifying the input.
+*@li x0: An NC1HWC0 tensor of type int16, specifying the input.
 *@li req_scale: An NC1HWC0 tensor of type uint64, specifying the scaling ratio.
 *@li x1: An NC1HWC0 tensor of type int16 . \n
 
@@ -203,22 +223,21 @@ REG_OP(AscendRequant)
 *@li relu_flag: A optional bool, specifying whether to perform ReLU, either "True" or "False". Defaults to "False" . \n
 
 *@par Outputs:
-*@li y: The dequantized output tensor of type int8 and with format NC1HWC0.
+*@li y0: The dequantized output tensor of type int8 and with format NC1HWC0.
 *@li y1: The dequantized output tensor of type int16 and with format NC1HWC0 . \n
 
 *@par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
 */
 REG_OP(AscendRequantS16)
-  .INPUT(x, TensorType({DT_INT16}))
+  .INPUT(x0, TensorType({DT_INT16}))
   .INPUT(req_scale, TensorType({DT_UINT64}))
   .OPTIONAL_INPUT(x1, TensorType({DT_INT16}))
-  .OUTPUT(y, TensorType({DT_INT8}))
+  .OUTPUT(y0, TensorType({DT_INT8}))
   .OUTPUT(y1, TensorType({DT_INT16}))
   .ATTR(dual_output, Bool, false)
   .ATTR(relu_flag, Bool, false)
   .OP_END_FACTORY_REG(AscendRequantS16)
-
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
index 9b31aa8e..20484623 100644
--- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
index 13488a25..020e3da4 100644
--- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
index 8af4f867..258b0ca1 100644
--- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h
+++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h
index b46da435..b65a68f1 100644
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -356,6 +356,39 @@ REG_OP(DropOutGenMask)
     .ATTR(seed2, Int, 0)
     .OP_END_FACTORY_REG(DropOutGenMask)
 
+
+/**
+*@brief Generate random uint8 mask for dropout v3 . \n
+
+*@par Inputs:
+include:
+*@li shape:The shape of the output tensor.
+*@li prob:0-D. Prob of 1 . \n
+
+*@par Attributes:
+*@li seed:If either seed or seed2 are set to be non-zero, the random number
+*generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2:A second seed to avoid seed collision . \n
+
+*@par Outputs:
+*y:Output (1-D) random number using uint8 data format . \n
+
+*@attention Constraints:
+*The output is aligned with 16
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+
+*@see DropOutGenMaskV3()
+*/
+REG_OP(DropOutGenMaskV3)
+    .INPUT(shape, TensorType({ DT_INT32, DT_INT64 }))
+    .INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .OUTPUT(y, TensorType({ DT_UINT8 }))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .OP_END_FACTORY_REG(DropOutGenMaskV3)
+
 /**
 *@brief Generates values in an interval . \n
 
@@ -495,6 +528,62 @@ REG_OP(ShuffleChannel)
                            DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64}))
     .ATTR(group, Int, 1)
     .OP_END_FACTORY_REG(ShuffleChannel)
+
+/**
+ * @briefGenerate a tensor of samples from a multinomial 
+ * distribution according to the probabilities of each of 
+ * the possible outcomes.
+ * 
+ * @par inputs
+ * one input including:
+ * @li x:Input tensor with shape [batch_size, class_size], 
+ * where class_size is the number of all possible outcomes.
+ * Each value along the axis zero represents the unnormalized 
+ * log-probability of each corresponding outcome in a batch.
+ * 
+ * @par output
+ * one output including:
+ * @li y:Output tensor with shape [batch_size, sample_size], 
+ * where sample_size is the number of times to sample. 
+ * Each value along the axis zero represents the outcome of 
+ * the corresponding sample in a batch.
+ * 
+ * @par Restrictions:
+ * Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+ */
+REG_OP(MultinomialFuss)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64}))
+    .OUTPUT(y, TensorType({DT_INT32, DT_INT64}))
+    .ATTR(dtype, Int, 6)
+    .ATTR(sample_size, Int, 1)
+    .ATTR(seed, Float, 0)
+    .OP_END_FACTORY_REG(MultinomialFuss)
+
+/**
+* @brief During training, randomly zeroes some of the elements of the input tensor
+* with probability
+*
+* @par Inputs:
+* @li x: A ND Tensor. Must be one of the following data types: Float, Float16
+* @li seed: A ND Tensor. Must be one of the following data types: Float
+*
+* @par Attributes:
+* @li p: probability of an element to be zeroed
+*
+* @par Outputs:
+* @li y: A tensor with the same shape and type as "x".
+* @li mask: A tensor with the same shape and type as "x".
+* @li new_seed: A tensor with the same shape and type as "seed".
+*/
+
+REG_OP(DropoutV2)
+    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .INPUT(seed, TensorType({ DT_FLOAT }))
+    .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT }))
+    .OUTPUT(mask, TensorType({ DT_FLOAT }))
+    .OUTPUT(seed, TensorType({ DT_FLOAT }))
+    .REQUIRED_ATTR(p, Float)
+    .OP_END_FACTORY_REG(DropoutV2)
 }   // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index 6f44093e..97c7b8e1 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2020 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,7 +37,7 @@ namespace ge {
 *@attention Constraints:
 * This operator is a BatchNorm fusion operator for updating the moving
 * averages for training.
-* This operator is used in conjunction with BNTrainingUpdate.
+* This operator is used in conjunction with BNTrainingReduce.
 */
 REG_OP(BNTrainingReduce)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -45,6 +45,27 @@ REG_OP(BNTrainingReduce)
     .OUTPUT(square_sum, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(BNTrainingReduce)
 
+/**
+*@brief Performs reduced batch normalization . \n
+
+*@par Inputs:
+*x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n
+
+*@par Outputs:
+*@li sum: A 3D Tensor of type float32 for SUM reduced "x".
+*@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n
+
+*@attention Constraints:
+* This operator is a BatchNorm fusion operator for updating the moving
+* averages for training.
+* This operator is used in conjunction with BN3DTrainingReduce.
+*/
+REG_OP(BN3DTrainingReduce)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(sum, TensorType({DT_FLOAT}))
+    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingReduce)
+
 /**
 *@brief Performs the backpropagation of BatchNorm . \n
 
@@ -88,6 +109,49 @@ REG_OP(BNTrainingReduceGrad)
     .ATTR(epsilon, Float, 0.0001)
     .OP_END_FACTORY_REG(BNTrainingReduceGrad)
 
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Seven inputs, including:
+*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for
+* the gradient.
+*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
+*@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the mean of "x".
+*@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the variance of "x".
+*@li scale: A 6D Tensor of type float32, with format NDC1HWC0.
+*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the mean of "x".
+*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the variance of "x" . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
+
+*@par Outputs:
+*y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset
+* of "x" . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be BN3DTrainingReduceGrad . \n
+
+*@see BN3DTrainingReduceGrad
+*/
+REG_OP(BN3DTrainingReduceGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(diff_scale, TensorType({DT_FLOAT}))
+    .INPUT(diff_offset, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(batch_mean, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OP_END_FACTORY_REG(BN3DTrainingReduceGrad)
+
 /**
 *@brief Performs reduced batch normalization . \n
 
@@ -120,7 +184,7 @@ REG_OP(BNTrainingReduceGrad)
 *@attention Constraints:
 *@li This operator is a BatchNorm fusion operator for updating the moving
 averages for training.
-*This operator is used in conjunction with BNTrainingReduce.
+*This operator is used in conjunction with BNTrainingUpdate.
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
 * root instruction.
 */
@@ -141,6 +205,59 @@ REG_OP(BNTrainingUpdate)
     .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(BNTrainingUpdate)
 
+/**
+*@brief Performs reduced batch normalization . \n
+
+*@par Inputs:
+* Seven inputs, including: (NDC1HWC0 supported)
+*@li x: A 6D Tensor of type float16 or float32.
+*@li sum: A 6D Tensor of type float32 for the output of operator
+* BN3DTrainingUpdate.
+*@li square_sum: A 6D Tensor of type float32 for the output of operator
+* BN3DTrainingUpdate.
+*@li scale: A 6D Tensor of type float32, for the scaling factor.
+*@li offset: A 6D Tensor of type float32, for the scaling offset.
+*@li mean: A 6D Tensor of type float32, for the updated mean.
+*@li variance: A 6D Tensor of type float32, for the updated variance . \n
+
+*@par Attributes:
+*@li epsilon: A required float32, specifying the small value added to variance
+* to avoid dividing by zero.
+*@li factor: A required float32, specifying the weight for updating the mean
+* and variance . \n
+
+*@par Outputs:
+* Five outputs, including: (NDC1HWC0 supported)
+*@li y: A 6D Tensor of type float16 or float32, for normalized "x".
+*@li mean: A 6D Tensor of type float32, for the updated mean.
+*@li variance: A 6D Tensor of type float32, for the updated variance.
+*@li batch_mean: A 6D Tensor of type float32, for the mean of "x".
+*@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n
+
+*@attention Constraints:
+*@li This operator is a BatchNorm fusion operator for updating the moving
+averages for training.
+*This operator is used in conjunction with BN3DTrainingUpdate.
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
+* root instruction.
+*/
+REG_OP(BN3DTrainingUpdate)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(sum, TensorType({DT_FLOAT}))
+    .INPUT(square_sum, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(factor, Float)
+    .REQUIRED_ATTR(epsilon, Float)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(mean, TensorType({DT_FLOAT}))
+    .OUTPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingUpdate)
+
 /**
 *@brief Performs batch normalization for inference . \n
 
@@ -284,6 +401,40 @@ REG_OP(BNTrainingUpdateGrad)
     .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(BNTrainingUpdateGrad)
 
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0,
+* for the gradient.
+*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
+*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the mean of "x".
+*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the variance of "x" . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
+
+*@par Outputs:
+*@li diff_scale: A Tensor of type float32, with format NDC1HWC0,
+* for the offset of "scale".
+*@li diff_offset: A Tensor of type float32, with format NDC1HWC0,
+* for the offset of "offset" . \n
+
+*/
+REG_OP(BN3DTrainingUpdateGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(batch_mean, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OUTPUT(diff_scale, TensorType({DT_FLOAT}))
+    .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad)
+
 /**
 *@brief Performs the backpropagation of BatchNorm for inference . \n
 
@@ -635,8 +786,8 @@ REG_OP(ReduceMin)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead.
 */
 REG_OP(ReduceMinD)
-    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32}))
     .REQUIRED_ATTR(axes, ListInt)
     .ATTR(keep_dims, Bool, false)
     .OP_END_FACTORY_REG(ReduceMinD)
@@ -747,14 +898,14 @@ REG_OP(Reduction)
 *@brief Computes the euclidean norm of elements across dimensions of a tensor . \n
 
 *@par Inputs:
-*@li input_tensor: A Tensor. Must be one of the following types: float16, float32, int32.
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32.
 *@li axes: A Tensor of type int8 or int32. Specifies the dimensions to reduce. Defaults to "None" . \n
 
 *@par Attributes:
 *keep_dims: An optional bool. If "True", reduced dimensions will be retained. Defaults to "False" . \n
 
 *@par Outputs:
-*output_tensor: A Tensor. Must be one of the following types: float16, float32, int32 . \n
+*y: A Tensor. Must be one of the following types: float16, float32, int32 . \n
 
 *@attention Constraints:
 * If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)) . \n
@@ -821,7 +972,7 @@ Defaults to "0.00001" . \n
 *batch_ variance: A Tensor of type float32 for the result variance . \n
 
 *@attention Constraints:
-*For Ascend 310, the result accuracy fails to reach 1� due to the square root instruction.
+*For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction.
 */
 REG_OP(INInferV2)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -839,7 +990,7 @@ REG_OP(INInferV2)
 *@brief Performs reduced instance normalization . \n
 
 *@par Inputs:
-*x: A Tensor of type float16 or float32, with format NC1HWC0 . \n
+*x: A Tensor of type float16 or float32. \n
 
 *@par Outputs:
 *@li sum: A Tensor of type float32 for SUM reduced "x".
@@ -862,19 +1013,19 @@ REG_OP(INTrainingReduceV2)
 *@par Inputs:
 * Seven inputs, including: (NC1HWC0supported)
 *@li x: A Tensor of type float16 or float32.
-*@li sum: A T [N, C1, 1, 1, C0] ensor of type float32 for the output of operator INTrainingReduceV2.
-*@li square_sum: A  [N, C1, 1, 1, C0] Tensor of type float32 for the output of operator INTrainingReduceV2.
-*@li gamma: A  [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma.
-*@li beta: A  [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta.
-*@li mean: A  [N, C1, 1, 1, C0] Tensor of type float32, for the updated mean.
-*@li variance: A  [N, C1, 1, 1, C0] Tensor of type float32, for the updated variance . \n
+*@li sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
+*@li square_sum: A Tensor of type float32 for the output of operator INTrainingReduceV2.
+*@li gamma: A Tensor of type float32, for the scaling gamma.
+*@li beta: A Tensor of type float32, for the scaling beta.
+*@li mean: A Tensor of type float32, for the updated mean.
+*@li variance: A Tensor of type float32, for the updated variance . \n
 
 *@par Attributes:
 *@li momentum: A required float32, specifying the momentum to update mean and var.
 *@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
 
 *@par Outputs:
-* Three outputs, including: (NC1HWC0 supported)
+* Three outputs
 *@li y: A Tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A Tensor of type float32, for the updated mean.
 *@li batch_variance: A Tensor of type float32, for the updated variance . \n
@@ -882,7 +1033,7 @@ REG_OP(INTrainingReduceV2)
 *@attention Constraints:
 *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
 * This operator is used in conjunction with INTrainingReduceV2.
-*@li For Ascend 310, the result accuracy fails to reach 1� due to the square root instruction.
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
 */
 REG_OP(INTrainingUpdateV2)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -965,7 +1116,7 @@ for the updated variance.
 *@attention Constraints:
 *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training.
 * This operator is used in conjunction with GNTrainingUpdate.
-*@li For Ascend 310, the result accuracy fails to reach 1� due to the square root instruction.
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction.
 */
 REG_OP(GNTrainingUpdate)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -982,6 +1133,98 @@ REG_OP(GNTrainingUpdate)
     .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(GNTrainingUpdate)
 
+/**
+*@brief Joins a string Tensor across the given dimensions. \n
+
+*@par Inputs:
+include:
+*@li input:A Tensor of type string. The text to be processed.
+*@li reduction_indices:A Tensor of type int. The text to be processed. 
+
+*@par Attributes:
+*@li keep_dims:A bool, An optional bool. Defaults to False. If True, retain reduced dimensions with length 1..
+*@li separator:string.
+
+*@par output:
+*@li output::A Tensor of type string..
+*/
+REG_OP(ReduceJoin)
+    .INPUT(input, TensorType({DT_STRING}))
+    .INPUT(reduction_indices, TensorType({DT_INT32}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(keep_dims, Bool, true)
+    .ATTR(separator, String, "")
+    .OP_END_FACTORY_REG(ReduceJoin)
+
+/**
+* @brief Calculates the standard deviation and average value of Tensors.
+
+* @par Inputs:
+* @li x: A Tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* Three Attributes, including:
+* @li dim: An optional listint, Defaults to "None". \n
+
+* @li unbiased: An optional bool. Defaults to "True".
+*     If "True", Use Bessel Correction.
+*     If "False", Do not use Bessel Correction. \n
+
+* @li keepdim: An optional bool. Defaults to "False".
+*     If "True", Keep the original tensor dimension.
+*     If "False", Do not keep the original tensor dimension. \n
+
+* @par Outputs:
+* Two Outputs, including:
+* @li y1: A Tensor. Has the same type as "x".
+* @li y2: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator ReduceStd.
+*/
+REG_OP(ReduceStd)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(dim, ListInt, {})
+    .ATTR(unbiased, Bool, true)
+    .ATTR(keepdim, Bool, false)
+    .OP_END_FACTORY_REG(ReduceStd)
+
+/**
+* @brief Calculates the standard deviation of Tensors.
+
+* @par Inputs:
+* include:
+* @li x: A Tensor. Must be one of the following types: float16, float32. \n
+* @li mean: A Tensor. It's the mean of X. Must be one of the following types: float16, float32. \n
+
+
+* @par Attributes:
+* Three Attributes, including:
+* @li dim: An optional listint, Defaults to "None". \n
+* @li unbiased: An optional bool. Defaults to "True".
+*     If "True", Use Bessel Correction.
+*     If "False", Do not use Bessel Correction. \n
+* @li keepdim: An optional bool. Defaults to "False".
+*     If "True", Keep the original tensor dimension.
+*     If "False", Do not keep the original tensor dimension. \n
+
+* @par Outputs:
+* @li y: A Tensor. It's the std of X. Has the same type as "x".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator ReduceStdWithMean.
+*/
+REG_OP(ReduceStdWithMean)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(dim, ListInt, {})
+    .ATTR(unbiased, Bool, true)
+    .ATTR(keepdim, Bool, false)
+    .OP_END_FACTORY_REG(ReduceStdWithMean)
 } //namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
index 1b60d42a..74ac83f8 100644
--- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h
+++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h
index 84723872..80546860 100644
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,6 +33,7 @@ namespace ge {
 *@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n
+*@li mask:A 1D Tensor. Must be one of the following types: uint8.
 
 *@par Attributes:
 *@li keep_prob:An integer identifying the keep prob in the op. Default to 1.
@@ -42,7 +43,6 @@ namespace ge {
 
 *@par Outputs:
 *seven outputs:
-*@li mask:A 1D Tensor. Must be one of the following types: uint8.
 *@li ct:A 4D Tensor. Must be one of the following types: float16, float32.
 *@li ht:A 4D Tensor. Must be one of the following types: float16.
 *@li it:A 4D Tensor. Must be one of the following types: float16, float32.
@@ -187,16 +187,16 @@ REG_OP(DynamicRNNGrad)
 *@brief: DynamicRNN calculation.
 *@par Inputs:
 *ten inputs:
-*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
-*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
-*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND.
-*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
-*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
-*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
-*@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n
+*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:A optional Tensor. Only Support float16 in FRACTAL_NZ and int32 in ND.
+*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
 
 *@par Attributes:
 *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
@@ -209,6 +209,7 @@ REG_OP(DynamicRNNGrad)
 *@li time_major:An bool identifying the time major in the op. Default to true.
 *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
 *@li forget_bias:An float identifying the forget bias in the op. Default to 0.
+*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifjo". Default to "ijfo".
 *@li is_training:An bool identifying is training in the op. Default to true . \n
 
 *@par Outputs:
@@ -221,12 +222,14 @@ REG_OP(DynamicRNNGrad)
 *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@par Third-party framework compatibility:
+* Compatible with the TF operator LSTM.
 */
 REG_OP(DynamicRNN)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
     .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -251,9 +254,237 @@ REG_OP(DynamicRNN)
     .ATTR(time_major, Bool, true)
     .ATTR(activation, String, "tanh")
     .ATTR(forget_bias, Float, 0.0)
+    .ATTR(gate_order, String, "ijfo")
     .ATTR(is_training, Bool, true)
     .OP_END_FACTORY_REG(DynamicRNN)
 
+/**
+*@brief: DynamicRNNV2 calculation.
+*@par Inputs:
+*ten inputs:
+*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32.
+*The format must be FRACTAL_Z.
+*@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32.
+*The format must be FRACTAL_Z.
+*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
+*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
+
+*@par Attributes:
+*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL".
+*Only UNIDIRECTIONAL is currently supported.
+*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+*@li use_peephole:An bool identifying if use peephole in the op. Default to false.
+*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
+*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
+*@li num_proj:An integer identifying the num projection in the op. Default to 0.
+*@li time_major:An bool identifying the time major in the op. Default to true.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh".
+*Only tanh is currently supported.
+*@li recurrent_activation:An string identifying the type of activation function in the op. Default to "sigmoid".
+*Supprot "sigmoid" and "hard_sigmoid". In general, set "hard_sigmoid" for TF Keras LSTM.
+*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
+*@li gate_order:An string identifying the type of gate order in the op. Support "ijfo" and "ifco". Default to "ijfo".
+*Set "ijfo" for TF operator LSTM, Set "ifco" for TF Keras LSTM.
+*@li stateful: An bool identifying the type of stateful in the op. Default to fasle.Only false is currently supported.
+*@li merge_mode: An string identifying the type of merge_modein the op. Default to "concat".
+*Only "concat" is currently supported
+*@li is_training:An bool identifying is training in the op. Default to true . \n
+
+*@par Outputs:
+*eight outputs:
+*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*Return the last output_h.
+*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*Return the last output_c.
+*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@par Third-party framework compatibility:
+* Compatible with the TF operator LSTM or TF keras operator LSTM.
+*/
+
+REG_OP(DynamicRNNV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_input, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(cell_type, String, "LSTM")
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(use_peephole, Bool, false)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(recurrent_activation, String, "sigmoid")
+    .ATTR(forget_bias, Float, 0.0)
+    .ATTR(gate_order, String, "ijfo")
+    .ATTR(stateful, Bool, false)
+    .ATTR(merge_mode, String, "concat")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicRNNV2)
+
+/**
+*@brief: DynamicRNNV3 calculation.
+*@par Inputs:
+*ten inputs:
+*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND.
+*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n
+*@li real_mask:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li project:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Attributes:
+*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported.
+*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported.
+*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+*@li use_peephole:An bool identifying if use peephole in the op. Default to false.
+*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
+*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
+*@li num_proj:An integer identifying the num projection in the op. Default to 0.
+*@li time_major:An bool identifying the time major in the op. Default to true.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported.
+*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
+*@li is_training:An bool identifying is training in the op. Default to true . \n
+
+*@par Outputs:
+*eight outputs:
+*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@par Third-party framework compatibility:
+* Compatible with the TF operator LSTM.
+*/
+REG_OP(DynamicRNNV3)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(tanhc, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(cell_type, String, "LSTM")
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(use_peephole, Bool, false)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(forget_bias, Float, 0.0)
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicRNNV3)
+
+/**
+*@brief: DynamicLSTMV2 calculation.
+*@par Inputs:
+*ten inputs:
+*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND .
+
+*@par Attributes:
+*@li num_output:An integer identifying the num projection in the op. Default to 0.
+*@li expose_hidden:An bool identifying the expose_hidden in the op. Default to flase.
+*@li need_output_last:An bool identifying the time major in the op. Default to true.
+*@li forget_bias:An float identifying the forget bias in the op. Default to 0.
+
+*@par Outputs:
+*eight outputs:
+*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@par Third-party framework compatibility:
+* Compatible with the Caffe operator LSTM.
+*@par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicLSTMV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(cont, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(w_xc_x_static, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(h0, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(c0, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(last_output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(last_output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(num_output, Int, 0)
+    .ATTR(expose_hidden, Bool, false)
+    .ATTR(need_output_last, Bool, false)
+    .ATTR(forget_bias, Float, 0.0)
+    .OP_END_FACTORY_REG(DynamicLSTMV2)
+
 /**
 *@brief: LSTMInputGrad calculation.
 *@par Inputs:
@@ -297,6 +528,60 @@ REG_OP(LSTMInputGrad)
     .OP_END_FACTORY_REG(LSTMInputGrad)
 
 
+
+/**
+*@brief: Dynamic LSTM Cell grad calculation.Calculate the gradient of gates and cell state.
+*@par Inputs:
+*twelve inputs:
+*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li t_state:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ . \n
+
+*@par Attributes:
+*@li forget_bias:An integer identifying the forget bias in the op. Default to 1.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported . \n
+*@li direction:An string that marks the calculation sequence of the operator. Default to "Forward".
+*@li gate_order:An string mark the order of output 4 gate. Default to "ijfo".
+
+*@par Outputs:
+*two outputs:
+*@li dgate:A 4D Tensor. Must be one of the following types: float16.
+*@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(DynamicLSTMGradCell)
+  .INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(dc, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(i, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(j, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(f, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(o, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(tanhct, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(mask, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
+  .OUTPUT(dgate, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .OUTPUT(dct_1, TensorType({DT_FLOAT16, DT_FLOAT}))
+  .ATTR(forget_bias, Float, 1)
+  .ATTR(activation, String, "")
+  .ATTR(direction, String, "Forward")
+  .ATTR(gate_order, String, "ijfo")
+  .OP_END_FACTORY_REG(DynamicLSTMGradCell)
+
+
 /**
 *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state.
 *@par Inputs:
@@ -475,9 +760,9 @@ REG_OP(BasicRNNCell)
     .OP_END_FACTORY_REG(BasicRNNCell)
 
 /**
-*@brief: DynamicGRU calculation.
+*@brief DynamicGRU calculation.
 *@par Inputs:
-*seven inputs: \n
+*seven inputs: 
 *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li w:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li b:Must be one of the following types: float16, float32. The format must be ND.
@@ -497,7 +782,7 @@ REG_OP(BasicRNNCell)
 *@li is_training:An bool identifying is training in the op. Default to true.
 
 *@par Outputs:
-*five outputs: \n
+*five outputs: 
 *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -531,9 +816,9 @@ REG_OP(DynamicGRU)
     .OP_END_FACTORY_REG(DynamicGRU)
 
 /**
-*@brief: DynamicGRUV2 calculation.
+*@brief DynamicGRUV2 calculation.
 *@par Inputs:
-*seven inputs: \n
+*seven inputs: 
 *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ.
 *@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z.
 *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
@@ -555,16 +840,13 @@ REG_OP(DynamicGRU)
 *@li is_training:An bool identifying is training in the op. Default to true.
 
 *@par Outputs:
-*six outputs: \n
+*six outputs: 
 *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
-
-*@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicGRUV2)
     .INPUT(x, TensorType({DT_FLOAT16}))
@@ -592,6 +874,68 @@ REG_OP(DynamicGRUV2)
     .ATTR(is_training, Bool, true)
     .OP_END_FACTORY_REG(DynamicGRUV2)
 
+
+/**
+*@brief DynamicGRUV2Hidden calculation.
+*@par Inputs:
+*five inputs: 
+*@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ.
+*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
+*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
+*@li seq_length:Must be one of the following types: int32. The format must be ND.
+*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Attributes:
+*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". 
+Only UNIDIRECTIONAL is currently supported.
+*@li cell_depth:An integer identifying the cell depth in the op. Default to 1.
+*@li keep_prob:An float identifying the keep prob in the op. Default to 1.
+*@li cell_clip:An float identifying the cell clip in the op. Default to -1.
+*@li num_proj:An integer identifying the num projection in the op. Default to 0.
+*@li time_major:An bool identifying the time major in the op. Default to true.
+*@li activation:An string identifying the type of activation function in the op. Default to "tanh". 
+Only tanh is currently supported.
+*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
+*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
+*@li is_training:An bool identifying is training in the op. Default to true.
+
+*@par Outputs:
+*six outputs: 
+*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(DynamicGRUV2Hidden)
+    .INPUT(x_weight_input, TensorType({DT_FLOAT32}))
+    .INPUT(weight_hidden, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(direction, String, "UNIDIRECTIONAL")
+    .ATTR(cell_depth, Int, 1)
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(cell_clip, Float, -1.0)
+    .ATTR(num_proj, Int, 0)
+    .ATTR(time_major, Bool, true)
+    .ATTR(activation, String, "tanh")
+    .ATTR(gate_order, String, "zrh")
+    .ATTR(reset_after, Bool, true)
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(DynamicGRUV2Hidden)
+
+
 /**
 *@brief: DynamicGRUV2Grad calculation.
 *@par Inputs:
@@ -618,7 +962,6 @@ REG_OP(DynamicGRUV2)
 *@li cell_clip:An float identifying the cell clip in the op. Default to -1.
 *@li num_proj:An integer identifying the num projection in the op. Default to 0.
 *@li time_major:An bool identifying the time major in the op. Default to true.
-*@li bias_type:An string identifying the type of bias_type function in the op. Default to "double_bias".
 *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
 *@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true.
 
@@ -630,6 +973,9 @@ REG_OP(DynamicGRUV2)
 *@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(DynamicGRUV2Grad)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -658,7 +1004,6 @@ REG_OP(DynamicGRUV2Grad)
     .ATTR(cell_clip, Float, -1.0)
     .ATTR(num_proj, Int, 0)
     .ATTR(time_major, Bool, true)
-    .ATTR(bias_type, String, "double_bias")
     .ATTR(gate_order, String, "zrh")
     .ATTR(reset_after, Bool, true)
     .OP_END_FACTORY_REG(DynamicGRUV2Grad)
@@ -667,7 +1012,7 @@ REG_OP(DynamicGRUV2Grad)
 *@brief: GRUV2HiddenGrad calculation.
 *@par Inputs:
 *nine inputs: \n
-*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
@@ -678,6 +1023,7 @@ REG_OP(DynamicGRUV2Grad)
 *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 
 *@par Attributes:
+*@li t_state:An Int identifying the current t state. Default to [0, 4].
 *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.
 
 *@par Outputs:
@@ -685,10 +1031,12 @@ REG_OP(DynamicGRUV2Grad)
 *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
 *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
-REG_OP(GRUV2HiddenGrad)
-    .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
-    .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+REG_OP(GRUV2HiddenGradCell)
+    .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -699,8 +1047,197 @@ REG_OP(GRUV2HiddenGrad)
     .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(t_state, Int, 0)
     .ATTR(gate_order, String, "zrh")
-    .OP_END_FACTORY_REG(GRUV2HiddenGrad)
+    .OP_END_FACTORY_REG(GRUV2HiddenGradCell)
+
+/**
+* @brief Calculates the reversed outputs of the function "embedding". \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li grad: A mutable Tensor of word grad. Must be one of the following types:
+*     float32.
+* @li indices: A mutable word index Tensor of the int32 type.\n
+
+* @par Attributes:
+* @li num_weights: An int attr which use to judge how many words in dict. \n
+
+* @li padding_idx: An int attr judge which word to fill zeros. Defaults to "-1". \n
+
+* @li scale_grad_by_freq: An optional bool. Defaults to "False".
+*     If "True", "grad_weight" will be scale by word_frequency.
+*     If "False", "grad_weight" will not be scale by word_frequency. \n
+
+* @par Outputs:
+* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator EmbeddingDenseGrad.
+*/
+REG_OP(EmbeddingDenseGrad)
+    .INPUT(grad, TensorType({ DT_FLOAT32 }))  /* "First operand." */
+    .INPUT(indices, TensorType({ DT_INT32 })) /* "Second operand." */
+    .OUTPUT(y, TensorType({ DT_FLOAT32 }))    /* "Result, has same element type as two inputs" */
+    .REQUIRED_ATTR(num_weights, Int)
+    .ATTR(padding_idx, Int, -1)
+    .ATTR(scale_grad_by_freq, Bool, false)
+    .OP_END_FACTORY_REG(EmbeddingDenseGrad)
+
+/**
+*@brief CommonLSTM calculation.
+*@par Inputs:
+*eight inputs: \n
+*@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM.
+*@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND.
+*@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND.
+*@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND.
+
+*@par Attributes:
+*@li activation_alpha:Optional scaling values used by some activation functions. Empty is currently supported.
+*@li activation_beta:Optional scaling values used by some activation functions. Empty is currently supported.
+*@li activations:The list of activation functions. Empty is currently supported.
+*@li clip:An float identifying the cell clip in the op. Default to -1.
+*@li direction:Specify if the RNN is forward, reverse, or bidirectional. Must be one of forward(default), reverse, or bidirectional.
+*@li hidden_size:Number of neurons in the hidden layer. Reserved.
+*@li input_forget:Couple the input and forget gates if 1. Reserved.
+
+*@par Outputs:
+*three outputs: \n
+*@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
+*/
+
+REG_OP(CommonLSTM)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(initial_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(p, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y_c, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(activation_alpha, ListFloat, {})
+    .ATTR(activation_beta, ListFloat, {})
+    .ATTR(activations, ListString, {})
+    .ATTR(clip, Float, -1.0)
+    .ATTR(direction, String, "forward")
+    .REQUIRED_ATTR(hidden_size, Int)
+    .ATTR(input_forget, Int, 0)
+    .OP_END_FACTORY_REG(CommonLSTM)
+
+/**
+ * @brief Calculate the mask. According to hidden_size and num_step, convert seq_length to mask.
+ *
+ * @par Inputs:
+ * @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size].
+ * @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size].
+ * @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size].
+ *
+ * @par Outputs:
+ * seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n
+ *
+ * @par Restrictions:
+ * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+ */
+REG_OP(RnnGenMaskV2)
+    .INPUT(seq_length, TensorType({DT_INT32}))
+    .INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(RnnGenMaskV2)
+
+/**
+* @brief Common GRU calculation.
+
+* @par Inputs:
+* Eight inputs, including:
+* @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). The format must be FRACTAL_NZ 
+* @li w: The weight tensor for the gates is 3D Tensor(float16). The format must be FRACTAL_Z
+* @li r: The recurrence weight tesnor is 3D Tensor(float16). The format must be FRACTAL_Z
+* @li b: The bias tensor for the gates. The format must be ND
+* @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND
+* @li init_h: Optional initial value of the hidden(float16,float32). The format must be FRACTAL_NZ
+
+* @par Attributes:
+* @li activation_alpha: Optional scaling values used by some activation functions.  \n
+* @li activation_beta: Optional scaling values used by some activation functions.  \n
+* @li activations: A list of 2 (or 4 if bidirectional) activation functions for update, reset, and hidden gates.  \n
+* @li clip: Cell clip threshold. \n
+* @li direction: Specify if the RNN is forward, reverse, or bidirectional. \n
+* @li hidden_size: Number of neurons in the hidden layer. \n
+* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n
+
+* @par Outputs:
+* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ
+* @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ
+*/
+REG_OP(CommonGRU)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(r, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OPTIONAL_INPUT(sequence_lens, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(initial_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y_h, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(activation_alpha, ListFloat, {})
+    .ATTR(activation_beta , ListFloat, {})
+    .ATTR(activations , ListString, {})
+    .ATTR(clip, Float, -1.0)
+    .ATTR(direction, String, "forward")
+    .REQUIRED_ATTR(hidden_size, Int)
+    .ATTR(linear_before_reset , Int, 0)
+    .OP_END_FACTORY_REG(CommonGRU)
+/**
+* @brief Calculates the reversed outputs of the function "embedding". \n
+
+* @par Inputs:
+* Four inputs, including:
+* @li weight: A mutable Tensor of word grad. Must be one of the following types:
+*     float32.
+* @li indices: A mutable word index Tensor of the int32 type.\n
+* @li offsets: A mutable word index Tensor of the int32 type.\n
+* @li per_sample_weights: to indicate all weights should be taken to be 1.
+*     If specified, per_sample_weights must have exactly the same shape as input
+*     and is treated as having the same offsets, if those are not None.
+*     Only supported for mode='sum'..\n
+
+* @par Attributes:
+* @li mode: An string attr which use "sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag.. \n
+
+* @li scale_grad_by_freq: An optional bool. Defaults to "False".
+*     If "True", "grad_weight" will be scale by word_frequency.
+*     If "False", "grad_weight" will not be scale by word_frequency. \n
+* @li sparse: if True, gradient w.r.t.attr weight matrix will be a sparse tensor. \n
+* @li include_last_offset: if True, attr offsets  has one additional element, where the last element
+*     is equivalent to the size of indices. This matches the CSR format.. \n
+
+* @par Outputs:
+* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator EmbeddingBag.
+*/
+REG_OP(EmbeddingBag)
+    .INPUT(weight, TensorType({ DT_FLOAT32 }))
+    .INPUT(indices, TensorType({ DT_INT32 }))
+    .OPTIONAL_INPUT(offsets, TensorType({DT_INT32}))
+    .OPTIONAL_INPUT(per_sample_weights, TensorType({DT_FLOAT32}))
+    .OUTPUT(y, TensorType({ DT_FLOAT32 }))
+    .ATTR(mode, String, "mean")
+    .ATTR(scale_grad_by_freq, Bool, false)
+    .ATTR(sparse, Bool, false)
+    .ATTR(include_last_offset, Bool, false)
+    .OP_END_FACTORY_REG(EmbeddingBag)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_
diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h
index b7649a44..089af326 100644
--- a/third_party/fwkacllib/inc/ops/rpn_ops.h
+++ b/third_party/fwkacllib/inc/ops/rpn_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h
index 0ce473b7..5ce6c2e0 100644
--- a/third_party/fwkacllib/inc/ops/save_ops.h
+++ b/third_party/fwkacllib/inc/ops/save_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h
index cbd9839d..34c6a268 100644
--- a/third_party/fwkacllib/inc/ops/sdca_ops.h
+++ b/third_party/fwkacllib/inc/ops/sdca_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index 2c99e82e..1c26e033 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -239,6 +239,30 @@ REG_OP(GatherV2D)
     .REQUIRED_ATTR(axis, Int)
     .OP_END_FACTORY_REG(GatherV2D)
 
+/**
+*@Gathers values along an axis specified by dim . \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
+*@li index: A Tensor. Must be one of the following types: int64 . \n
+
+*@par Attributes:
+* dim: the axis along which to index . \n
+
+*@par Outputs:
+* y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the PyTorch operator Gather.
+*/
+
+REG_OP(GatherElements)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
+    .INPUT(index, TensorType({DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
+    .ATTR(dim, Int, 0)
+    .OP_END_FACTORY_REG(GatherElements)
+
 /**
 *@brief Extracts a strided slice of a tensor. Roughly speaking, this op
     extracts a slice of size (end-begin)/stride from the given input tensor.
@@ -275,8 +299,6 @@ REG_OP(GatherV2D)
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
 
-*@attention Constraints:
-
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSlice.
 */
@@ -327,8 +349,6 @@ REG_OP(StridedSlice)
 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n
 
-*@attention Constraints:
-
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSlice.
 
@@ -385,8 +405,6 @@ REG_OP(StridedSliceD)
 *@par Outputs:
 *output: A Tensor. Has the same type as "dy" . \n
 
-*@attention Constraints:
-
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSliceGradD.
 
@@ -444,8 +462,6 @@ REG_OP(StridedSliceGradD)
 *@par Outputs:
 *output: A Tensor has the same type as "dy" . \n
 
-*@attention Constraints:
-
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator StridedSliceGrad.
 */
@@ -486,6 +502,38 @@ REG_OP(UnsortedSegmentSum)
     .OUTPUT(y, TensorType::NumberType())
     .OP_END_FACTORY_REG(UnsortedSegmentSum)
 
+/**
+*@brief Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to 
+*	end, inclusive, on a logarithmic scale with base base. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li assist: A tensor. Must be one of the following types:
+*     float16, float32. \n
+
+* @par Attributes:
+* @li start: An required float. Used to select the start. \n
+* @li end: An required float. Used to select the end. \n
+* @li steps: An optional int.Defaults to 100. \n
+* @li base: An optional float.Defaults to 10.0. \n
+* @li dtype: An optional int.Defaults to 1. \n
+
+*@par Outputs:
+*y: A Tensor with the same type and shape of input_x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator logspaced. \n
+*/
+REG_OP(LogSpaceD)
+    .INPUT(assist, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR (start, Float)
+    .REQUIRED_ATTR (end, Float)
+    .ATTR(steps, Int, 100)
+    .ATTR(base, Float, 10.0)
+    .ATTR(dtype, Int, 1)
+    .OP_END_FACTORY_REG(LogSpaceD)
+
 /**
 *@brief Computes the sum along segments of a tensor . \n
 
@@ -796,6 +844,34 @@ REG_OP(SliceD)
     .REQUIRED_ATTR(size, ListInt)
     .OP_END_FACTORY_REG(SliceD)
 
+/**
+*@brief Extracts a slice from a tensor.
+*       This operation extracts a slice of size "size" from a tensor "x"
+*		starting at the location specified by "begin" . \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be one of the following types:
+* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
+* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n
+
+*@par Inputs:
+*@li offsets: The starting location for the slice.
+
+*@par Attributes:
+*@li size: The tensor shape . \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x". The slice extracted from the tensor.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead.
+*/
+REG_OP(SliceDV2)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(offsets, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .REQUIRED_ATTR(size, ListInt)
+    .OP_END_FACTORY_REG(SliceDV2)
+    
 /**
 * @brief Finds values and indices of the "k" largest elements for the last
 * dimension . \n
@@ -829,8 +905,8 @@ REG_OP(SliceD)
 * @li sorted = true
 * @li It's unstable sorted indices on the platform of Ascend310
 
-* @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator TopK.
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead.
 */
 REG_OP(TopKD)
     .INPUT(x, TensorType::RealNumberType())
@@ -855,6 +931,44 @@ REG_OP(TopKD)
 * Number of top elements to look for along the last dimension (along each row
 * for matrices) . \n
 
+* @par Attributes:
+* @li sorted: An optional bool. Defaults to true.
+* If true, the resulting "k" elements will be sorted by the values in descending
+* order.
+* @li dim: An optional int. Defaults to -1. For reserved use.
+* @li largest: An optional bool. Defaults to true. For reserved use. \n
+
+* @par Outputs:
+* @li values: A Tensor, specifying the sorted data. Has the same type as
+* "input".
+* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n
+
+* @see TopK()
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator TopKV2.
+*/
+REG_OP(TopKV2)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .OUTPUT(values, TensorType::RealNumberType())
+    .OUTPUT(indices, TensorType({DT_INT32}))
+    .ATTR(sorted, Bool, true)
+    .ATTR(dim, Int, -1)
+    .ATTR(largest, Bool, true)
+    .OP_END_FACTORY_REG(TopKV2)
+
+/**
+* @brief Finds values and indices of the "k" largest elements for the last
+* dimension . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A 1D or higher tensor of type BasicType, with the last dimension
+* at least "k".
+* @li k: A 0D Tensor of type int32.
+* Number of top elements to look for along the last dimension (along each row
+* for matrices) . \n
+
 * @par Attributes:
 * @li sorted: An optional bool. Defaults to true.
 * If true, the resulting "k" elements will be sorted by the values in descending
@@ -876,15 +990,17 @@ REG_OP(TopK)
     .OUTPUT(values, TensorType::RealNumberType())
     .OUTPUT(indices, TensorType({DT_INT32}))
     .ATTR(sorted, Bool, true)
+    .ATTR(largest, Bool, true)
+    .ATTR(dim, Int, -1)
     .OP_END_FACTORY_REG(TopK)
 /**
 *@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices" . \n
 
 *@par Inputs:
 *Inputs including:
-* @li indices: A required index tensor. Must be one of the following types: float32, float16, int32, int8, uint8.
-* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8.
-* @li shape: A required list of int32, specifying the output shape.
+* @li indices: A required index tensor. Must be one of the following types: int32 or int64.
+* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8...
+* @li shape: A required list of int32 or int64, specifying the output shape.
 *@par Outputs:
 *y:A output Tensor with same datatype as "updates" . \n
 
@@ -895,7 +1011,7 @@ REG_OP(TopK)
 * Compatible with the TensorFlow operator ScatterNd.
 */
 REG_OP(ScatterNd)
-    .INPUT(indices, TensorType::BasicType())
+    .INPUT(indices, TensorType::IndexNumberType())
     .INPUT(x, TensorType::BasicType())
     .INPUT(shape, TensorType::IndexNumberType())
     .OUTPUT(y, TensorType::BasicType())
@@ -908,11 +1024,11 @@ REG_OP(ScatterNd)
 *@par Inputs:
 *Inputs including:
 * @li indices: A required index tensor. Must be one of the following types:
- * float, float16, int32, int16. format:ND.
+ * int32 or int64. format:ND.
 * @li x: A required slice tensor. Must be one of the following types:
- * float, float16, int32, int16. format:ND.
+ * float16, float, int32, int8, uint8. format:ND.
 *@par Attributes:
-* @li shape: A required list of int32, specifying the output shape.
+* @li shape: A required list of int32 or int64, specifying the output shape.
 *@par Outputs:
 *y: A Tensor. Has the same type as "x". format:ND . \n
 
@@ -927,8 +1043,8 @@ REG_OP(ScatterNd)
 */
 REG_OP(ScatterNdD)
     .INPUT(indices, TensorType::IndexNumberType())
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
     .REQUIRED_ATTR(shape, ListInt)
     .OP_END_FACTORY_REG(ScatterNdD)
 
@@ -1752,6 +1868,33 @@ REG_OP(Crop)
       .REQUIRED_ATTR(offsets, ListInt)
       .OP_END_FACTORY_REG(Crop)
 
+/**
+*@brief Returns a namedtuple (values, indices) where values is the cumulative 
+* the cumulative minimum of elements of input in the dimension dim. 
+* And indices is the index location of each maximum value found in the dimension dim. \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x: A tensor . Must be one of the following types:
+*     float16, float32, int32, uint32, int8, uint8. \n
+
+*@par Attributes:
+* @li axis: Axis along which to cummin. \n
+
+*@par Outputs:
+* y: A Tensor with the same type and shape of x's. \n
+* indices: A Tensor with the int32 type and the same shape of x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Cummin. \n
+*/
+REG_OP(Cummin)
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OUTPUT(indices, TensorType::BasicType())
+    .REQUIRED_ATTR(axis, Int)
+    .OP_END_FACTORY_REG(Cummin)
+
 /**
 *@brief Extends the input with copies of data along a specified dimension. For example:
 *(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2);
@@ -1921,6 +2064,249 @@ REG_OP(CumulativeLogsumexpD)
     .ATTR(exclusive, Bool, false)
     .ATTR(reverse, Bool, false)
     .OP_END_FACTORY_REG(CumulativeLogsumexpD)
+
+/**
+* @brief Add updates to var according to axis and indices.
+
+* @par Inputs:
+* Three inputs, including:
+* @li var: A Tensor. Must be one of the following types:
+*     float16, float32, int16, int32, int8, uint8.
+* @li indices: A Tensor of the indices, type should be int32.
+* @li updates: A Tensor of the same type as "var". \n
+
+* @par Attributes:
+* @li axis: An required int to specify the axis to perform indices add. \n
+
+* @par Outputs:
+* @li var: A Tensor. Same as input "var".
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator index_add_.
+*/
+REG_OP(InplaceIndexAdd)
+    .INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8,
+                            DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8,
+                                DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8,
+                            DT_UINT8, DT_FLOAT32, DT_FLOAT16}))
+    .REQUIRED_ATTR(axis, Int)
+    .OP_END_FACTORY_REG(InplaceIndexAdd)
+
+/**
+* @brief Replace the value of X with value according to mask.
+* @par Inputs:
+* three inputs, including:
+*  @li x: A Tensor of dtype is float16 or float32 or int64 or int32 or int8.
+*  @li mask: A Tensor of dtype bool.
+*  @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8.
+
+* @par Outputs:
+*  @li y: A tensor. Must be one of the following dtypes:
+*   float16, float32, int64, int32, int8.
+*/
+REG_OP(MaskedFill)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64}))
+    .INPUT(mask, TensorType({DT_BOOL}))
+    .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64}))
+    .OP_END_FACTORY_REG(MaskedFill)
+
+/**
+* @brief Choose the value of X with value according to mask.
+
+* @par Inputs:
+* two inputs, including:
+*  @li x: A Tensor of dtype is float16 or float32.
+*  @li mask: A Tensor of dtype is bool. \n
+
+* @par Outputs:
+*  @li y: A tensor with the same type as x. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Numpy operator select.
+* Replaces the pytorch operator masked_select in some scenarios.\n
+*/
+REG_OP(MaskedSelectV2)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(mask, TensorType({DT_BOOL}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OP_END_FACTORY_REG(MaskedSelectV2)
+
+/**
+* @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n
+
+* @par Inputs:
+* One inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32.
+
+* @par Attributes:
+* @li start: An  attribute of type Int, start index of last dim. \n
+* @li end: An  attribute of type Int, end index of last dim. \n
+* @li stride: An  attribute of type Int, stride of slice. \n
+
+* @par Outputs:
+* @li y: A Tensor. Has the same type as "x". \n
+
+* @par Third-party framework compatibility
+* No compatibility
+*/
+REG_OP(SliceLastDim)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(start, Int)
+    .REQUIRED_ATTR(end, Int)
+    .ATTR(stride, Int, 1)
+    .OP_END_FACTORY_REG(SliceLastDim)
+
+/**
+* @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n
+*   extracts a slice of size (end-begin)/stride from the given input tensor. \n
+*   Starting at the location specified by begin the slice continues by \n
+*   adding stride to the index until all dimensions are not less than end. \n
+*
+* @par Inputs:
+* Four inputs, including:
+* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n
+*     complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n
+*     complex128, float16, uint32, uint64, complex64, complex128. \n
+* @li begin: A Tensor of type int32 or int64, for the index of the first value to select.
+*
+* @li end: A Tensor of type int32 or int64, for the index of the last value to select.
+*
+* @li axes: A Tensor of type int32 or int64, indicate axis to be select.
+*
+* @li strides: A Tensor of type int32 or int64, for the increment.
+*
+* @par Attributes:
+* @li begin_mask: A Tensor of type int32. \n
+*     A bitmask where a bit "i" being "1" means to ignore the begin \n
+*     value and instead use the largest interval possible.
+* @li end_mask: A Tensor of type int32. \n
+*     Analogous to "begin_mask".
+* @li ellipsis_mask: A Tensor of type int32. \n
+*     A bitmask where bit "i" being "1" means the "i"th position \n
+*     is actually an ellipsis.
+* @li new_axis_mask: A Tensor of type int32. \n
+*     A bitmask where bit "i" being "1" means the "i"th \n
+*     specification creates a new shape 1 dimension.
+* @li shrink_axis_mask: A Tensor of type int32. \n
+*     A bitmask where bit "i" implies that the "i"th \n
+*     specification should shrink the dimensionality.
+*
+* @par Outputs:
+* y: A Tensor. Has the same type as "x".
+*
+* @attention Constraints:
+*
+* @par Third-party framework compatibility
+* Compatible with the TensorFlow operator StridedSliceV2.
+*/
+REG_OP(StridedSliceV2)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(begin, TensorType::IndexNumberType())
+    .INPUT(end, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(axes, TensorType::IndexNumberType())
+    .OPTIONAL_INPUT(strides, TensorType::IndexNumberType())
+    .ATTR(begin_mask, Int, 0)
+    .ATTR(end_mask, Int, 0)
+    .ATTR(ellipsis_mask, Int, 0)
+    .ATTR(new_axis_mask, Int, 0)
+    .ATTR(shrink_axis_mask, Int, 0)
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(StridedSliceV2)
+
+/**
+*@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+*@li assist1: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+*@li assist2: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+
+* @par Attributes:
+* @li dim: A required int. Used to select the dimension of this tensor. \n
+
+*@par Outputs:
+*y: A Tensor with the same type and shape of input_x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator IndexFill. \n
+*/
+REG_OP(IndexFillD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(dim, Int)
+    .OP_END_FACTORY_REG(IndexFillD)
+
+/**
+* @brief For each row r of this and for each column c, do (*this)(r, c) += src(j, c), \n
+*   where j ranges from indexes[r].first through indexes[r].second - 1. \n
+*   In general indexes must be >= 0 and < src.NumRows(); \n
+*   but to represent an empty range you may use the pair (-1, -1) or any pair of numbers (i, j) such that i >= j. \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li x: A Tensor. Must be one of the following types:
+*     float16, float32.
+* @li indices: A Tensor of the indices, type should be int32.
+* @li src: A Tensor of the same type as "x". \n
+
+* @par Outputs:
+* @li x: A Tensor. Same as input "x".
+
+* @par Third-party framework compatibility
+* Compatible with the kaldi operator AddRowRanges.
+*/
+REG_OP(AddRowRanges)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(src, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OP_END_FACTORY_REG(AddRowRanges)
+
+/**
+*@brief masked fill tensor along with one axis by range.
+* boxes. It is a customized masked fill range operator . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li x: input tensor. A ND Tensor of float32/float16/int32/int8 with shapes
+* 1-D (D,), 2-D(N, D), 3-D(N, C, D)
+*@li start: masked fill start pos. A 3D Tensor of int32 with
+* shape (num, N). "num" indicates the number of loop masked fill, and the value N
+* indicates the batch of ND Tensor, if input x shape is 1-D, N = 1. \n
+*@li end: masked fill end pos. A 3D Tensor of int32 with
+* shape (num, N). "num" indicates the number of loop masked fill, and the value N
+* indicates the batch of ND Tensor. \n
+*@li value: masked fill value. A 2D Tensor of float32/float16/int32/int8 with
+* shape (num,). "num" indicates the number of loop masked fill
+
+*@par Attributes:
+*@li axis: axis with masked fill of int32. Defaults to -1.
+
+*@par Outputs:
+*y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D)
+
+* @par Restrictions:
+* Warning: input shape's length must not be bigger than 1024 * 1024 * 1024.
+*/
+REG_OP(MaskedFillRange)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
+    .INPUT(start, TensorType({DT_INT32}))
+    .INPUT(end, TensorType({DT_INT32}))
+    .INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32}))
+    .REQUIRED_ATTR(axis, Int)
+    .OP_END_FACTORY_REG(MaskedFillRange)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h
index 1d02fa15..04e04f1b 100644
--- a/third_party/fwkacllib/inc/ops/set_ops.h
+++ b/third_party/fwkacllib/inc/ops/set_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h
index d7512790..a1fc9ee6 100644
--- a/third_party/fwkacllib/inc/ops/sparse_ops.h
+++ b/third_party/fwkacllib/inc/ops/sparse_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -383,11 +383,11 @@ REG_OP(SparseFillEmptyRowsGrad)
 REG_OP(SparseTensorDenseMatMul)
     .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64}))
     .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \
-        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16}))
+        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64}))
     .INPUT(x1_shape, TensorType({DT_INT64}))
-    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
+    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
         DT_COMPLEX128, DT_FLOAT16}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
         DT_COMPLEX128, DT_FLOAT16}))
     .ATTR(adjoint_a, Bool, false)
     .ATTR(adjoint_b, Bool, false)
diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h
index 64fa7814..34ccb398 100644
--- a/third_party/fwkacllib/inc/ops/spectral_ops.h
+++ b/third_party/fwkacllib/inc/ops/spectral_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,6 +26,24 @@
 
 namespace ge {
 
+/**
+*@brief Computes the inverse 1-dimensional discrete Fourier transform over the
+inner-most dimension of `x`. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*@li y: A complex tensor of the same rank as `x`. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IFFT operator.
+*/
+REG_OP(IFFT)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(IFFT)
+
 /**
 *@brief Real-valued fast Fourier transform . \n
 
@@ -47,6 +65,84 @@ REG_OP(RFFT)
     .OUTPUT(y, TensorType({DT_COMPLEX64}))
     .OP_END_FACTORY_REG(RFFT)
 
+/**
+*@brief Inverse real-valued fast Fourier transform. \n
+
+*@par Inputs:
+*@li x: A complex64 tensor.
+*@li fft_length: An int32 tensor of shape [1]. The FFT length. \n
+
+*@par Outputs:
+*@li y: A float32 tensor of the same rank as `input`. The inner-most
+  dimension of `input` is replaced with the `fft_length` samples of its inverse
+  1D Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IRFFT operator.
+*/
+REG_OP(IRFFT)
+    .INPUT(x, TensorType({DT_COMPLEX64}))
+    .INPUT(fft_length, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(IRFFT)
+
+
+/**
+*@brief 2D fast Fourier transform. \n
+
+*@par Inputs:
+*@li x: A complex64 tensor.
+
+*@par Outputs:
+*@li y: A complex64 tensor of the same shape as `input`. The inner-most 2
+  dimensions of `input` are replaced with their 2D Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow FFT2D operator.
+*/
+REG_OP(FFT2D)
+    .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(FFT2D)
+
+/**
+*@brief Calculate the one-dimensional discrete Fourier transform on the
+innermost dimension of the input. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*@li y: A complex tensor with the same shape as input. The innermost dimension
+of the input is replaced by its 1-dimensional Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow FFT operator.
+*/
+REG_OP(FFT)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(FFT)
+
+/**
+*@brief Calculate the inverse 1-dimensional discrete Fourier transform on the
+innermost dimension of the input. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*@li y: A complex tensor with the same shape as input. The innermost dimension
+of the input is replaced by its inverse two-dimensional Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IFFT2D operator.
+*/
+REG_OP(IFFT2D)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(IFFT2D)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h
index efe4715d..fe25a46f 100644
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -62,8 +62,8 @@ REG_OP(Split)
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 
 *@par Attributes:
-*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
-*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
+*@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
+*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n
 
 *@par Outputs:
 *y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n
@@ -94,12 +94,12 @@ REG_OP(SplitD)
 *@par Inputs:
 * Three inputs, including:
 *@li x: An ND Tensor.
-*Must be one of the following types:
-*@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
-*@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n
+*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+*@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension.
+*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n
 
 *@par Attributes:
-*num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
+*num_split: A required int32. Specifies the number of output tensors. No default value . \n
 
 *@par Outputs:
 *y:  Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
@@ -129,9 +129,9 @@ REG_OP(SplitV)
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 
 *@par Attributes:
-*@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
-*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
-*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
+*@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension.
+*@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
+*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n
 
 *@par Outputs:
 *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
@@ -317,15 +317,15 @@ REG_OP(Concat)
 *     int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n
 
 *@par Attributes:
-*@li axis: A optional int, defaultvalue is 0.
+*@li axis: A optional int, default value is 0.
 *     Dimension along which to pack. The range is [-(R+1), R+1).
 *@li N: A required int. Number of tensors . \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
+
 *@par Third-party framework compatibility
-*Compatible with the TensorFlow operator Pack.
-It's a dynamic output.
+* Compatible with the TensorFlow operator Pack.
 */
 REG_OP(Pack)
     .DYNAMIC_INPUT(x, TensorType::BasicType())
diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h
index db1f5353..3c8e32b6 100644
--- a/third_party/fwkacllib/inc/ops/state_ops.h
+++ b/third_party/fwkacllib/inc/ops/state_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
index 366112d6..c2f65c6a 100644
--- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h
index dad3c379..ff9daaa3 100644
--- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h
+++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h
index 4a88bc79..f9cc2549 100644
--- a/third_party/fwkacllib/inc/ops/string_ops.h
+++ b/third_party/fwkacllib/inc/ops/string_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,235 @@
 #include "graph/operator_reg.h"
 
 namespace ge {
+/**
+*@brief Creates ngrams from ragged string data . \n
+
+*@par Inputs:
+include:
+*@li data:1-D.The values tensor of the ragged string tensor to make ngrams out of.
+*@li data_splits:The splits tensor of the ragged string tensor to make ngrams out of . \n
+
+*@par Attributes:
+* separator:The string to append between elements of the token. Use "" for no separator.
+* ngram_widths:The sizes of the ngrams to create.
+* left_pad:The string to use to pad the left side of the ngram sequence. Only used if pad_width != 0.
+* right_pad:The string to use to pad the right side of the ngram sequence. Only used if pad_width != 0.
+* pad_width:The number of padding elements to add to each side of each sequence. 
+* preserve_short_sequences: Preserve short sequences. \n
+
+*@par Outputs:
+*@li ngrams:The values tensor of the output ngrams ragged tensor.
+*@li ngrams_splits:The splits tensor of the output ngrams ragged tensor. \n
+
+*@see StringNGrams()
+
+*@par Third-party framework compatibility
+*compatible with StringNGrams op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(StringNGrams)
+    .INPUT(data, TensorType({DT_STRING}))
+    .INPUT(data_splits, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(ngrams, TensorType({DT_STRING}))
+    .OUTPUT(ngrams_splits, TensorType({DT_INT32, DT_INT64}))
+    .REQUIRED_ATTR(separator, String)
+    .ATTR(ngram_widths, ListInt, {})
+    .REQUIRED_ATTR(left_pad, String)
+    .REQUIRED_ATTR(right_pad, String)
+    .REQUIRED_ATTR(pad_width, Int)
+    .REQUIRED_ATTR(preserve_short_sequences, Bool)
+    .OP_END_FACTORY_REG(StringNGrams)
+
+/**
+*@brief Decodes each string in `input` into a sequence of Unicode code points . \n
+
+*@par Inputs:
+include:
+*@li input:The text to be decoded. Can have any shape. Note that the output is flattened
+to a vector of char values. \n
+
+*@par Attributes:
+* input_encoding:Text encoding of the input strings. This is any of the encodings supported
+by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+* errors:Error handling policy when there is invalid formatting found in the input.
+The value of 'strict' will cause the operation to produce a InvalidArgument
+error on any invalid input formatting. A value of 'replace' (the default) will
+cause the operation to replace any invalid formatting in the input with the
+`replacement_char` codepoint. A value of 'ignore' will cause the operation to
+skip any invalid formatting in the input and produce no corresponding output
+character.
+* replacement_char:The replacement character codepoint to be used in place of any invalid
+formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+be used. The default value is the default unicode replacement character is
+0xFFFD or U+65533.
+* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the
+`replacement_char`. Default is false. \n
+
+*@par Outputs:
+*@li row_splits:A 1D tensor containing the row splits.
+*@li char_values:A 1D tensor containing the decoded codepoints.
+*@li char_to_byte_starts:A 1D int32 Tensor containing the byte index in the input string where each
+character in `char_values` starts. \n
+
+*@see UnicodeDecodeWithOffsets()
+
+*@par Third-party framework compatibility
+*compatible with UnicodeDecodeWithOffsets op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UnicodeDecodeWithOffsets)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(row_splits, TensorType({DT_INT64}))
+    .OUTPUT(char_values, TensorType({DT_INT32}))
+    .OUTPUT(char_to_byte_starts, TensorType({DT_INT64}))
+    .REQUIRED_ATTR(input_encoding, String)
+    .ATTR(errors, String, "replace")
+    .ATTR(replacement_char, Int, 65533)
+    .ATTR(replace_control_characters, Bool, false)
+    .ATTR(Tsplits, Type, DT_INT64)
+    .OP_END_FACTORY_REG(UnicodeDecodeWithOffsets)
+
+/**
+*@brief Decodes each string in `input` into a sequence of Unicode code points. \n
+
+*@par Inputs:
+include:
+*@li input:The text to be decoded. Can have any shape. Note that the output is flattened
+to a vector of char values. \n
+
+*@par Attributes:
+* input_encoding:Text encoding of the input strings. This is any of the encodings supported
+by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+* errors:Error handling policy when there is invalid formatting found in the input.
+The value of 'strict' will cause the operation to produce a InvalidArgument
+error on any invalid input formatting. A value of 'replace' (the default) will
+cause the operation to replace any invalid formatting in the input with the
+`replacement_char` codepoint. A value of 'ignore' will cause the operation to
+skip any invalid formatting in the input and produce no corresponding output
+character.
+* replacement_char:The replacement character codepoint to be used in place of any invalid
+formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+be used. The default value is the default unicode replacement character is
+0xFFFD or U+65533.
+* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the
+`replacement_char`. Default is false. \n
+
+*@par Outputs:
+*@li row_splits:A 1D tensor containing the row splits.
+*@li char_values:A 1D tensor containing the decoded codepoints. \n
+
+*@see UnicodeDecode()
+
+*@par Third-party framework compatibility
+*compatible with UnicodeDecode op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UnicodeDecode)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(row_splits, TensorType({DT_INT64}))
+    .OUTPUT(char_values, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(input_encoding, String)
+    .ATTR(errors, String, "replace")
+    .ATTR(replacement_char, Int, 65533)
+    .ATTR(replace_control_characters, Bool, false)
+    .ATTR(Tsplits, Type, DT_INT64)
+    .OP_END_FACTORY_REG(UnicodeDecode)
+
+/**
+*@brief Transcode the input text from a source encoding to a destination encoding. \n
+
+*@par Inputs:
+include:
+*@li input:The text to be processed. Can have any shape. \n
+
+*@par Attributes:
+* input_encoding:Text encoding of the input strings. This is any of the encodings supported
+by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`.
+Multi-byte encodings will be big-endian.
+* errors:Error handling policy when there is invalid formatting found in the input.
+The value of 'strict' will cause the operation to produce a InvalidArgument
+error on any invalid input formatting. A value of 'replace' (the default) will
+cause the operation to replace any invalid formatting in the input with the
+`replacement_char` codepoint. A value of 'ignore' will cause the operation to
+skip any invalid formatting in the input and produce no corresponding output
+character.
+* replacement_char:The replacement character codepoint to be used in place of any invalid
+formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+be used. The default value is the default unicode replacement character is
+0xFFFD or U+65533.
+* replace_control_characters:Whether to replace the C0 control characters (00-1F) with the
+`replacement_char`. Default is false. \n
+
+*@par Outputs:
+*@li output:A string tensor containing unicode text encoded using `output_encoding`. \n
+
+*@see UnicodeTranscode()
+
+*@par Third-party framework compatibility
+*compatible with UnicodeTranscode op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UnicodeTranscode)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .REQUIRED_ATTR(input_encoding, String)
+    .ATTR(output_encoding, String, "UTF-8")
+    .ATTR(errors, String, "replace")
+    .ATTR(replacement_char, Int, 65533)
+    .ATTR(replace_control_characters, Bool, false)
+    .OP_END_FACTORY_REG(UnicodeTranscode)
+
+/**
+*@brief Encode a tensor of ints into unicode strings. \n
+
+*@par Inputs:
+include:
+*@li input_values:A 1D tensor containing the unicode codepoints that should be encoded.
+*@li input_splits:A 1D tensor specifying how the unicode codepoints should be split into strings. \n
+
+*@par Attributes:
+* output_encoding:The unicode encoding to use in the output. Must be one of `"UTF-8", "UTF-16-BE", "UTF-32-BE"`.
+Multi-byte encodings will be big-endian.
+* errors:Error handling policy when there is invalid formatting found in the input.
+The value of 'strict' will cause the operation to produce a InvalidArgument
+error on any invalid input formatting. A value of 'replace' (the default) will
+cause the operation to replace any invalid formatting in the input with the
+`replacement_char` codepoint. A value of 'ignore' will cause the operation to
+skip any invalid formatting in the input and produce no corresponding output
+character.
+* replacement_char:The replacement character codepoint to be used in place of any invalid
+formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+be used. The default value is the default unicode replacement character is
+0xFFFD or U+65533. \n
+
+*@par Outputs:
+*@li output:The 1-D Tensor of strings encoded from the provided unicode codepoints. \n
+
+*@see UnicodeEncode()
+
+*@par Third-party framework compatibility
+*compatible with UnicodeEncode op of tensorflow
+
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(UnicodeEncode)
+    .INPUT(input_values, TensorType({DT_INT32}))
+    .INPUT(input_splits, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(errors, String, "replace")
+    .ATTR(output_encoding, String, "UTF-8")
+    .ATTR(replacement_char, Int, 65533)
+    .OP_END_FACTORY_REG(UnicodeEncode)
 
 /**
 *@brief Split elements of input based on delimiter into a SparseTensor . \n
@@ -61,6 +290,116 @@ REG_OP(StringSplit)
     .ATTR(skip_empty, Bool, true)
     .OP_END_FACTORY_REG(StringSplit)
 
+/**
+*@brief Replaces the match of pattern in input with rewrite. \n
+
+*@par Inputs:
+include:
+*@li input:A Tensor of type string. The text to be processed. \n
+
+*@par Attributes:
+*@li pattern:A string. The regular expression to match the input.
+*@li rewrite:A string. The rewrite to be applied to the matched expression.
+*@li replace_global:An optional bool. Defaults to True. If True, the replacement is global,
+otherwise the replacement is done only on the first match.
+
+*@par output:
+*@li output::A Tensor of type string.
+*/
+REG_OP(StaticRegexReplace)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(pattern, String, "")
+    .ATTR(rewrite, String, "")
+    .ATTR(replace_global, Bool, true)
+    .OP_END_FACTORY_REG(StaticRegexReplace)
+
+/**
+*@brief The input is a string tensor of any shape. The pattern is the
+*regular expression to be matched with every element of the input tensor.
+*The boolean values (True or False) of the output tensor indicate
+*if the input matches the regex pattern provided.
+
+*@par Inputs:
+include:
+*@li input:A Tensor of type string. The text to be processed. \n
+
+*@par Attributes:
+*@li pattern:A string. The regular expression to match the input.
+
+*@par output:
+*@li output::A bool tensor with the same shape as `input`.
+*/
+REG_OP(StaticRegexFullMatch)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_BOOL}))
+    .ATTR(pattern, String, "")
+    .OP_END_FACTORY_REG(StaticRegexFullMatch)
+
+/**
+*@brief A Tensor of type string. The input to be joined. \n
+
+*@par Inputs:
+include:
+*@li input:A Tensor of type string. The text to be processed. 
+*@li segment_ids:A Tensor. Must be one of the following types: int32, int64. 
+*A tensor whose shape is a prefix of data.shape. Negative segment ids are not supported.
+*@li num_segments:A Tensor. Must be one of the following types: int32, int64. A scalar. 
+
+*@par Attributes:
+*@li separator:An optional string. Defaults to "". The separator to use when joining.
+
+*@par output:
+*@li output::A Tensor of type string..
+*/
+REG_OP(UnsortedSegmentJoin)
+    .INPUT(input, TensorType({DT_STRING}))
+    .INPUT(segment_ids, TensorType({DT_INT32,DT_INT64}))
+    .INPUT(num_segments, TensorType({DT_INT32,DT_INT64}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(separator, String, "")
+    .OP_END_FACTORY_REG(UnsortedSegmentJoin)
+
+/**
+*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation.
+*This method is used to obtain a symbolic handle that represents the computation of the input.
+
+*@par Inputs:
+include:
+*@li input:A Tensor of type string. The text to be processed. 
+
+*@par Attributes:
+*@li encoding:An optional string. Defaults to "". 
+
+*@par output:
+*@li output::A Tensor of type string..
+*/
+REG_OP(StringLower)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(encoding, String, "")
+    .OP_END_FACTORY_REG(StringLower)
+
+/**
+*@brief Inputs to TensorFlow operations are outputs of another TensorFlow operation.
+*This method is used to obtain a symbolic handle that represents the computation of the input.
+
+*@par Inputs:
+include:
+*@li input:A Tensor of type string. The text to be processed. 
+
+*@par Attributes:
+*@li encoding:An optional string. Defaults to "". 
+
+*@par output:
+*@li output::A Tensor of type string..
+*/
+REG_OP(StringUpper)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(encoding, String, "")
+    .OP_END_FACTORY_REG(StringUpper)
+
 /**
 *@brief Split elements of source based on sep into a SparseTensor . \n
 
@@ -488,7 +827,7 @@ include:
 */
 REG_OP(AsString)
     .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \
-        DT_DOUBLE, DT_BOOL}))
+        DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128}))
     .OUTPUT(y, TensorType({DT_STRING}))
     .ATTR(precision, Int, -1)
     .ATTR(scientific, Bool, false)
@@ -557,6 +896,45 @@ REG_OP(DecodeBase64)
     .INPUT(x, TensorType({DT_STRING}))
     .OUTPUT(y, TensorType({DT_STRING}))
     .OP_END_FACTORY_REG(DecodeBase64)
+
+/**
+*@brief StringNormalization performs string operations for basic cleaning . \n
+
+*@par Inputs:
+*@li input: only accepts [C] or [1, C] UTF-8 strings tensor . \n
+
+*@par Outputs:
+*@li output: UTF-8 strings tensor after cleaning . \n
+
+*@par Attributes:
+*@li stopwords : list of strings (default is empty).
+*List of stop words. If not set, no word would be removed from input strings
+tensor.
+
+*@li is_case_sensitive : bool (default is false).
+*Boolean. Whether the identification of stop words in input strings tensor is
+case-sensitive. Default is false.
+
+*@li case_change_action : string (default is "NONE").
+*string enum that cases output to be lowercased/uppercases/unchanged. Valid
+values are "LOWER", "UPPER", "NONE". Default is "NONE".
+
+*@li local : string (default is "en_US").
+*Environment dependent string that denotes the locale according to which output
+strings needs to be upper/lowercased.Default en_US or platform specific equivalent
+as decided by the implementation . \n
+
+*@attention Constraints:
+*@li input can be either a 1-D or 2-D tensor, the shape of 2-D tensor must be [1, C].
+*/
+REG_OP(StringNormalizer)
+    .INPUT(input, TensorType({DT_STRING}))
+    .OUTPUT(output, TensorType({DT_STRING}))
+    .ATTR(stopwords, ListString, {})
+    .ATTR(is_case_sensitive, Bool, false)
+    .ATTR(case_change_action, String, "NONE")
+    .ATTR(local, String, "en_US")
+    .OP_END_FACTORY_REG(StringNormalizer)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/swap_co_ops.h b/third_party/fwkacllib/inc/ops/swap_co_ops.h
index a1bf4f8b..6e8eaac3 100644
--- a/third_party/fwkacllib/inc/ops/swap_co_ops.h
+++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
index 9c61f2c9..9bef1d7b 100644
--- a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
+++ b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h
index 64e18fc7..4a46e35f 100644
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -130,28 +130,27 @@ REG_OP(Transpose)
     .OP_END_FACTORY_REG(Transpose)
 
 /**
-*@brief Doing format_transfer for various data format only
-support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW"
-"NCHW" to "FRACTAL_Zn" or "FRACTAL_Zn" to "NCHW".
-"HWCN" to "FRACTAL_Zn" or "FRACTAL_Zn" to "HWCN" . \n
+*@brief Do format transfer for various data format.
+* In general, the framework will insert it atomatically . \n
 
 *@par Inputs:
-*src: A Tensor dtype of all types . \n
+*src: A Tensor. For all branches can be types: float16, float32, int32, int8, bool.
+* For branches without padding also can be types: int16, int64, uint8, uint16, uint32, uint64 . \n
 
 *@par Attributes:
-*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc.
-*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc.
-*@li group: A required int32, default value is 1. \n
+*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Z" etc.
+*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Z" etc.
+*@li group: A optional int32, default value is 1. \n
 
 *@par Outputs:
-*dst: A Tensor dtype of all types.
+*dst: A Tensor. Has the same type as "src".
 */
 REG_OP(TransData)
     .INPUT(src, TensorType::BasicType())
     .OUTPUT(dst, TensorType::BasicType())
     .REQUIRED_ATTR(src_format, String)
     .REQUIRED_ATTR(dst_format, String)
-    .ATTR(group, Int, 1)
+    .ATTR(groups, Int, 1)
     .OP_END_FACTORY_REG(TransData)
 
 /**
@@ -174,21 +173,27 @@ REG_OP(Permute)
     .OP_END_FACTORY_REG(Permute)
 
 /**
-*@brief Flattens the inputs. Reserves axis 0 and flattens the input tensors
-* along axis 1 . \n
+*@brief Flattens the inputs tensor into a 2D matrix. If input tensor has shape (d_0, d_1,..., d_n),
+* then the output will have shape (d_0 X d_1 ... d_(axis-1), d_axis X d_(axis + 1)...X d_n)\n
 
 *@par Inputs:
-*One input:
-*x: A multi-dimensional Tensor. Must be one of the following types:
-* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 . \n
+* One input:
+* x: A multi-dimensional Tensor. Must be one of the following types:
+* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32.
 
 *@par Outputs:
-*y: A 2D flattened Tensor (Reserves axis 0 and flattens the input tensors
-* along axis 1). Must be one of the following data types: int8, uint8, int16,
-* uint16, int32, uint32, int64,uint64, float16, float32 . \n
+* y: A 2D flattened Tensor with the contents of the input tensor, with input dimensions up to axis flattened 
+* to the outer dimension of the output and remaining input dimensions flattened into the inner dimension of the output.
+* Must be one of the following data types: int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32 .
+
+*@par Attributes:
+* axis: A optional int32, default value is 1. Indicate up to which input dimensions (exclusive) should be flattened 
+* to the outer dimension of the output. The value for axis must be in the range [-r, r], where r is the rank of 
+* the input tensor. Negative value means counting dimensions from the back. When axis = 0, the shape of 
+* the output tensor is (1, (d_0 X d_1 ... d_n), where the shape of the input tensor is (d_0, d_1, ... d_n).
 
 *@par Third-party framework compatibility
-* Compatible with TensorFlow operator Flatten.
+* Compatible with TensorFlow / ONNX operator Flatten.
 */
 REG_OP(Flatten)
     .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64,
@@ -197,6 +202,7 @@ REG_OP(Flatten)
     .OUTPUT(y, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64,
                            DT_UINT8, DT_UINT16, DT_UINT32, DT_UINT64,
                            DT_FLOAT, DT_FLOAT16}))
+    .ATTR(axis, Int, 1)
     .OP_END_FACTORY_REG(Flatten)
 
 /**
@@ -357,7 +363,7 @@ REG_OP(DepthToSpace)
 *@brief Permutes data into spatial data blocks and then prunes them . \n
 
 *@par Inputs:
-*@li x: A 4D Tensor with format NHWC.
+*@li x: A 4D Tensor with format. Must set the format, supported format list ["NCHW, NHWC"]
 *@li crops: A 1D list or tuple of int32 or int64 . \n
 
 *Must be one of the following types: float16, float32
@@ -418,12 +424,8 @@ REG_OP(BatchToSpace)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead.
 */
 REG_OP(BatchToSpaceD)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8,
-                        DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64,
-                        DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8,
-                        DT_UINT16, DT_UINT32, DT_UINT64, DT_INT8, DT_INT16, DT_COMPLEX64,
-                        DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32}))
+    .INPUT(x, TensorType::BasicType())
+    .OUTPUT(y, TensorType::BasicType())
     .REQUIRED_ATTR(block_size, Int)
     .REQUIRED_ATTR(crops, ListInt)
     .OP_END_FACTORY_REG(BatchToSpaceD)
@@ -434,9 +436,10 @@ REG_OP(BatchToSpaceD)
 
 *@par Inputs:
 * Two inputs, including:
-*@li x: An NHWC Tensor. Must be one of the following types:
+*@li x: An 4D Tensor. Must be one of the following types:
 * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8,
 * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+* Must set the format, supported format list ["NCHW, NHWC"]
 *@li paddings: A 2D tensor of type int, specifying the input . \n
 
 *@par Attributes:
@@ -518,7 +521,8 @@ REG_OP(Unpack)
 * @par Inputs:
 * x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
 *    following types:float32, double, int32, uint8, int16, int8, int64, uint16,
-*    float16, uint32, uint64
+*    float16, uint32, uint64. The inputs must have data_format with one of follows:
+*    NHWC, NCHW.
 
 * @par Attributes:
 * @li ksizes: A required list or tuple. The size of the sliding window for each
@@ -533,7 +537,6 @@ REG_OP(Unpack)
 * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
 * @li padding: A required string. The type of padding algorithm to use,
   support "SAME" or "VALID". \n
-* @li data_format: A required string. The format of input, only supported NHWC. \n
 
 * @par Outputs:
 * y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
@@ -554,7 +557,6 @@ REG_OP(ExtractImagePatches)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(rates, ListInt)
     .REQUIRED_ATTR(padding, String)
-    .ATTR(data_format, String, "NHWC")
     .OP_END_FACTORY_REG(ExtractImagePatches)
 
 /**
@@ -563,6 +565,7 @@ REG_OP(ExtractImagePatches)
 
 * @par Inputs:
 * x: A 5D Tensor with shape [batch, in_planes, in_rows, in_cols, depth] . \n
+*    The inputs must have data_format with one of follows: NDHWC, NCDHW. \n
 
 * @par Attributes:
 * @li ksizes: A required list or tuple. The size of the sliding window for each
@@ -571,7 +574,6 @@ REG_OP(ExtractImagePatches)
 * patches are in "x". Must be: [1, stride_planes, stride_rows, stride_cols, 1].
 * @li padding: A required string. The type of padding algorithm to use ,
 * support "SAME" or "VALID" . \n
-* @li data_format: An optional string. The format of input, only supported NDHWC. \n
 
 * @par Outputs:
 * Output: A 5D Tensor with shape [batch, out_planes, out_rows, out_cols, ksize_planes *
@@ -590,7 +592,6 @@ REG_OP(ExtractVolumePatches)
     .REQUIRED_ATTR(ksizes, ListInt)
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(padding, String)
-    .ATTR(data_format, String, "NDHWC")
     .OP_END_FACTORY_REG(ExtractVolumePatches)
 
 /**
@@ -717,6 +718,210 @@ REG_OP(CompressFcOp)
 .OUTPUT(compress_index, TensorType({DT_INT8}))
 .REQUIRED_ATTR(compress_parameters, ListInt)
 .OP_END_FACTORY_REG(CompressFcOp)
+
+/**
+*@brief Performs Col2im for each batch entry. \n
+
+*@par Inputs:
+*@li input_x: The Col Tensor. 5-D, shape: `(n, c1, kernel_h*kernel_w, ho*wo, c0)`. 
+where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//stride_d + 1     \n
+
+*@par Outputs:
+*@li output_y: The img Tensor. 5-D, shape: `(n, c1, output_h, output_w, c0)`. \n
+
+*@par Attributes:
+*@li kernel_shape: ListInt, value: `(kernel_h, kernel_w)`, the shape of kernel in convolution.
+*@li dilation: ListInt, value: `(dilation_h, dilation_w)`, the dilation in convolution.
+*@li padding: ListInt, value: `(padding_h, padding_w)`, the dilation in convolution.
+*@li stride:  ListInt, value: `(stride_h, stride_w)`, the dilation in convolution.  \n
+
+*@par Third-party framework compatibility
+* Compatible with Pytorch col2im/im2col_backward operator.
+*/
+REG_OP(Col2im)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(output_size, TensorType({DT_INT32, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(kernel_size, ListInt)
+    .REQUIRED_ATTR(dilation, ListInt)
+    .REQUIRED_ATTR(padding, ListInt)
+    .REQUIRED_ATTR(stride, ListInt)
+    .OP_END_FACTORY_REG(Col2im)
+
+/**
+* @brief Performs Im2col for each batch entry. \n
+
+* @par Inputs:
+* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the
+*    following types:float32, int8, float16. The inputs must have data_format with
+*    one of follows:NHWC, NCHW.
+
+* @par Attributes:
+* @li ksizes: A required list or tuple. The size of the sliding window for each
+* dimension of images.
+* @li strides: A optional list or tuple. How far the centers of two consecutive
+* patches are in the images. Defaults to "{1}".
+* @li dilations: A optional list or tuple. Defaults to "{1}".
+* This is the input stride, specifying how far two consecutive patch
+* samples are in the input. Equivalent to extracting patches
+* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *
+* (dilations - 1), followed by subsampling them spatially by a factor of dilations.
+* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
+* @li padding_mode: A optional String. The type of padding algorithm to use,
+* support "SAME", "VALID", "CALCULATED". Among the three modes, only the "CALCULATED"
+* means to use the pads below. Defaults to "CALCULATED".
+* @li pads: A optional list or tuple. The pad distance. Defaults to "{0}". \n
+
+* @par Outputs:
+* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *
+* ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols
+* x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols"
+* are the dimensions of the output patches . \n
+
+* @attention Constraints:
+* "ksizes", "strides", "dilations" and "pads" are lists of integers . \n
+
+* @par Third-party framework compatibility
+* Compatible with Pytorch Im2col operator.
+*/
+REG_OP(Im2col)
+    .INPUT(x, TensorType::RealNumberType())
+    .OUTPUT(y, TensorType::RealNumberType())
+    .REQUIRED_ATTR(ksizes, ListInt)
+    .ATTR(strides, ListInt, {1})
+    .ATTR(dilations, ListInt, {1})
+    .ATTR(padding_mode, String, "CALCULATED")
+    .ATTR(pads, ListInt, {0})
+    .OP_END_FACTORY_REG(Im2col)
+
+/**
+*@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine
+matrices theta. \n
+
+*@par Inputs:
+*Input theta must be float16 or float, output_size must be int32 type.Inputs
+include:
+*@li theta: input batch of affine matrices with shape (N,2,3) for 2D or (N,3,4)
+for 3D
+*@li output_size: the target output image size. (N×C×H×W for 2D or N×C×D×H×W for
+3D) Example: torch.Size((32, 3, 24, 24)) . \n
+
+
+*@par Attributes:
+*align_corners: if True, consider -1 and 1 to refer to the centers of the corner
+pixels rather than the image corners.Refer to grid_sample() for a more complete
+description. A grid generated by affine_grid() should be passed to grid_sample()
+with the same setting for this option. Default: False \n
+
+*@par Outputs:
+*@li y: A 2-D integer tensor of shape [M] representing the
+selected indices from the boxes tensor, where M <= max_output_size. \n
+
+*@attention Constraints:
+*Input theta must be float16 or float, output_size must be int32 type . \n
+
+*@par Third-party framework compatibility
+*Compatible with Pytorch affine_grid operator.
+*/
+
+REG_OP(AffineGrid)
+    .INPUT(theta, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(output_size, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(align_corners, Bool, false)
+    .OP_END_FACTORY_REG(AffineGrid)
+
+/**
+*@brief  Make memory of a view be contiguous. \n
+
+*@par Inputs:
+*Four inputs, including:
+*@li x: The input tensor.
+*@li size: The shape of output tensor. 
+*@li stride: The stride of output tensor.
+*@li storage_offset: The offset in the underlying storage of the output tensor. \n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+*Compatible with the pytorch operator as_strided.
+*/
+REG_OP(AsStrided)
+    .INPUT(x, TensorType::BasicType())
+    .INPUT(size, TensorType::IndexNumberType())
+    .INPUT(stride, TensorType::IndexNumberType())
+    .INPUT(storage_offset, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(AsStrided)
+
+/**
+*@brief This transform extracts n-grams from the input sequence and save them as a
+vector. \n
+
+*@par Inputs:
+*@li input: can be either a 1-D or 2-D tensor for n-gram extraction, It is ether string UTF-8 or int32/int64 . \n
+
+*@par Attributes:
+*@li max_gram_length : int (required)
+*Maximum n-gram length. If this value is 3, 3-grams will be used to generate the output .
+*@li max_skip_count : int (required)
+*Maximum number of items (integers/strings) to be skipped when constructing an n-gram from X.
+If max_skip_count=1, min_gram_length=2, max_gram_length=3, this operator may generate 2-grams
+with skip_count=0 and skip_count=1, and 3-grams with skip_count=0 and skip_count=1.
+*@li min_gram_length : int (required)
+*Minimum n-gram length. If this value is 2 and max_gram_length is 3, output may contain counts of
+2-grams and 3-grams.
+*@li mode : string (required)
+*The weighting criteria. It can be one of "TF" (term frequency), "IDF" (inverse document frequency),
+and "TFIDF" (the combination of TF and IDF).
+*@li ngram_counts : list of ints (required)
+*The starting indexes of 1-grams, 2-grams, and so on in pool. It is useful when determining the boundary
+between two consecutive collections of n-grams. For example, if ngram_counts is [0, 17, 36],
+the first index (zero-based) of 1-gram/2-gram/3-gram in pool are 0/17/36. This format is essentially identical
+to CSR (or CSC) sparse matrix format, and we choose to use this due to its popularity.
+*@li ngram_indexes : list of ints (required)
+*list of int64s (type: AttributeProto::INTS). This list is parallel to the specified 'pool_*' attribute. The i-th element
+in ngram_indexes indicate the coordinate of the i-th n-gram in the output tensor.
+*@li pool_int64s : list of ints
+*List of int64 n-grams learned from the training set. Either this or pool_strings attributes must be present but not both.
+It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams. The i-th element
+in pool stores the n-gram that should be mapped to coordinate ngram_indexes[i] in the output vector.
+*@li pool_strings : list of strings
+*List of strings n-grams learned from the training set. Either this or pool_int64s attributes must be present but not both.
+It's an 1-D tensor starting with the collections of all 1-grams and ending with the collections of n-grams. The i-th element
+in pool stores the n-gram that should be mapped to coordinate ngram_indexes[i] in the output vector.
+*@li weights : list of floats
+*list of floats. This attribute stores the weight of each n-gram in pool. The i-th element in weights is the weight of
+the i-th n-gram in pool. Its length equals to the size of ngram_indexes. By default, weights is an all-one tensor.This attribute
+is used when mode is "IDF" or "TFIDF" to scale the associated word counts. \n
+
+*@par Outputs:
+*@li output: tensor(float)
+*For 1-D input, output is the n-gram representation of that input. For 2-D input, the output is also a 2-D tensor
+whose i-th row is the n-gram representation of the i-th input row. More specifically, if input shape is [C], the corresponding
+output shape would be [max(ngram_indexes) + 1]. If input shape is [N, C], this operator produces a [N, max(ngram_indexes) + 1]-tensor. \n
+
+*@attention Constraints:
+*@li input can be either a 1-D or 2-D tensor, shape is [C] or [N, C].
+*@li max(ngram_indexes) + 1 == len(weights), len(y) == len(weights).
+*@li ngram_counts and pool(pool_int64s or pool_strings) must match.
+*@li either pool_strings or pool_int64s attributes must be present but not both.
+*/
+
+REG_OP(TfidVectorizer)
+    .INPUT(input, TensorType({DT_INT32, DT_INT64, DT_STRING}))
+    .OUTPUT(output, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(max_gram_length, Int)
+    .REQUIRED_ATTR(max_skip_count, Int)
+    .REQUIRED_ATTR(min_gram_length, Int)
+    .REQUIRED_ATTR(mode, String)
+    .REQUIRED_ATTR(ngram_counts, ListInt)
+    .REQUIRED_ATTR(ngram_indexes, ListInt)
+    .ATTR(pool_int64s, ListInt, {})
+    .ATTR(pool_strings, ListString, {})
+    .ATTR(weights, ListFloat, {})
+    .OP_END_FACTORY_REG(TfidVectorizer)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
index e19cbd7c..8ef69d8b 100644
--- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
+++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2019 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h
index 57948c47..01f63705 100644
--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -41,6 +41,11 @@ typedef enum rtEventWaitStatus {
 #define RT_EVENT_DDSYNC       0x04U
 #define RT_EVENT_TIME_LINE    0x08U
 
+#define RT_EVENT_DDSYNC_NS    0x01U
+#define RT_EVENT_STREAM_MARK  0x02U
+#define RT_EVENT_DDSYNC       0x04U
+#define RT_EVENT_TIME_LINE    0x08U
+
 /**
  * @ingroup dvrt_event
  * @brief create event instance
diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h
index aa394eea..10f884f2 100644
--- a/third_party/fwkacllib/inc/runtime/rt.h
+++ b/third_party/fwkacllib/inc/runtime/rt.h
@@ -27,6 +27,7 @@
 #include "mem.h"
 #include "rt_model.h"
 #include "stream.h"
+#include "rt_stars.h"
 #include "rt_ffts.h"
 
 #endif  // __CCE_RUNTIME_RT_H__
diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h
new file mode 100644
index 00000000..188656b1
--- /dev/null
+++ b/third_party/fwkacllib/inc/runtime/rt_stars.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ * Description:
+ */
+
+#ifndef __CCE_RUNTIME_STARS_H
+#define __CCE_RUNTIME_STARS_H
+
+#include "base.h"
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+extern "C" {
+#endif
+
+/**
+ * @ingroup rt_stars
+ * @brief launch stars task.
+ * used for send star sqe directly.
+ * @param [in] taskSqe     stars task sqe
+ * @param [in] sqeLen      stars task sqe length
+ * @param [in] stream      associated stream
+ * @return RT_ERROR_NONE for ok, others failed
+ */
+RTS_API rtError_t rtStarsTaskLaunch(const void *taskSqe, uint32_t sqeLen, rtStream_t stream);
+
+/**
+ * @ingroup rt_stars
+ * @brief create cdq instance.
+ * @param [in] batchNum     batch number
+ * @param [in] batchSize    batch size
+ * @param [in] queName      cdq name
+ * @return RT_ERROR_NONE for ok, ACL_ERROR_RT_NO_CDQ_RESOURCE for no cdq resources
+ */
+RTS_API rtError_t rtCdqCreate(uint32_t batchNum, uint32_t batchSize, const char *queName);
+
+/**
+ * @ingroup rt_stars
+ * @brief destroy cdq instance.
+ * @param [in] queName      cdq name
+ * @return RT_ERROR_NONE for ok, others failed
+ */
+RTS_API rtError_t rtCdqDestroy(const char *queName);
+
+/**
+ * @ingroup rt_stars
+ * @brief get free batch in the queue.
+ * @param [in] queName      cdq name
+ * @param [in] timeout      batch size
+ * @param [out] batchId     batch index
+ * @return RT_ERROR_NONE for ok, ACL_ERROR_RT_WAIT_TIMEOUT for timeout
+ */
+RTS_API rtError_t rtCdqAllocBatch(const char *queName, int32_t timeout, uint32_t *batchId);
+
+/**
+ * @ingroup rt_stars
+ * @brief launch a write_cdqm task on the stream.
+ * When the task is executed, the data information will be inserted into the cdqe index position of the queue.
+ * @param [in] queName      cdq name
+ * @param [in] cdqeIndex    cdqe index
+ * @param [in] data         cdqe infomation
+ * @param [in] dataSize     data size
+ * @param [in] stream       launch task on the stream
+ * @return RT_ERROR_NONE for ok, others failed
+ */
+RTS_API rtError_t rtCdqEnQueue(const char *queName, uint32_t cdqeIndex, void *data, uint32_t dataSize,
+    rtStream_t stream);
+
+/**
+ * @ingroup rt_stars
+ * @brief launch a write_cdqm task on the stream.
+ * When the task is executed, the data information will be inserted into the cdqe index position of the queue.
+ * @param [in] queName      cdq name
+ * @param [in] cdqeIndex    cdqe index
+ * @param [in] data         cdqe infomation
+ * @param [in] dataSize     data size
+ * @param [in] stream       launch task on the stream
+ * @return RT_ERROR_NONE for ok, others failed
+ */
+RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, const void *prtAddr,
+    rtStream_t stream);
+
+#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
+}
+#endif
+#endif // __CCE_RUNTIME_STARS_H
diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h
index 665c8b82..36fc500e 100644
--- a/third_party/fwkacllib/inc/tdt/tsd_client.h
+++ b/third_party/fwkacllib/inc/tdt/tsd_client.h
@@ -107,88 +107,6 @@ TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, con
 */
 TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback);
 
-/**
-* @ingroup CreateCmdParameterObj
-* @brief creat tsdclient func parameter obj.
-*
-* @par Function
-* creat tsdclient func parameter obj.
-*
-* @param type [IN] type tdt::TsdCmdType, tsd func type.
-* @param cmdParameterObj [IN] type void *, func parameter obj.
-* @retval TDT_OK Success
-* @retval TDT_INTERFACE_NOT_SUPPORT
-*
-* @par Dependency
-* @li libtsdclient.so: Library to which the interface belongs.
-* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
-* @li status.h: Header file where 'TDT_StatusT' defined
-*/
-TDT_StatusT CreateCmdParameterObj(tdt::TsdCmdType type, void **cmdParameterObj);
-
-/**
-* @ingroup SetCmdParameterObjAttribute
-* @brief set cmdParameterObj input value.
-*
-* @par Function
-* set cmdParameterObj input value.
-*
-* @param type [IN] type tdt::TsdCmdType, tsd func type.
-* @param cmdParameterObj [IN] type void *, func parameter obj.
-* @param itemType [IN] type tdt::InputItem, func input type.
-* @param valuePtr [IN] type const void *, input value.
-* @param valueLength [IN] type int, input value length.
-* @retval TDT_OK Success
-* @retval TDT_INTERFACE_NOT_SUPPORT
-*
-* @par Dependency
-* @li libtsdclient.so: Library to which the interface belongs.
-* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
-* @li status.h: Header file where 'TDT_StatusT' defined
-*/
-TDT_StatusT SetCmdParameterObjAttribute(tdt::TsdCmdType type, void *cmdParameterObj, tdt::InputItem itemType, const void *valuePtr, int valueLength);
-
-/**
-* @ingroup GetCmdParameterObjAttribute
-* @brief set cmdParameterObj input value.
-*
-* @par Function
-* set cmdParameterObj input value.
-*
-* @param type [IN] type tdt::TsdCmdType, tsd func type.
-* @param cmdParameterObj [IN] type void *, func parameter obj.
-* @param itemType [IN] type tdt::InputItem, func input type.
-* @param valuePtr [IN] type const void *, input value.
-* @param valueLength [IN] type int, input value length.
-* @retval TDT_OK Success
-* @retval TDT_INTERFACE_NOT_SUPPORT
-*
-* @par Dependency
-* @li libtsdclient.so: Library to which the interface belongs.
-* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
-* @li status.h: Header file where 'TDT_StatusT' defined
-*/
-TDT_StatusT GetCmdParameterObjAttribute(tdt::TsdCmdType type, void *cmdParameterObj, tdt::InputItem itemType, void *valuePtr, int &valueLength);
-
-/**
-* @ingroup TsdClientCmd
-* @brief creat tsdclient func parameter obj.
-*
-* @par Function
-* creat tsdclient func parameter obj.
-*
-* @param type [IN] type tdt::TsdCmdType, tsd func type.
-* @param cmdParameterObj [IN] type void *, func parameter obj.
-* @retval TDT_OK Success
-* @retval TDT_INTERFACE_NOT_SUPPORT
-*
-* @par Dependency
-* @li libtsdclient.so: Library to which the interface belongs.
-* @li data_common.h: Header file where tdt::TsdCmdType and tdt::InputItem defined.
-* @li status.h: Header file where 'TDT_StatusT' defined
-*/
-TDT_StatusT TsdClientCmd(tdt::TsdCmdType cmd, void *cmdParameterObj);
-
 #ifdef __cplusplus
 }
 #endif  // __cplusplus
diff --git a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
index a1c39a51..67adecd9 100644
--- a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
+++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h
@@ -1,12 +1,18 @@
 /**
-* @file adx_datadump_server.h
-*
-* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 #ifndef ADX_DATADUMP_SERVER_H
 #define ADX_DATADUMP_SERVER_H
diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
index c8715041..07b32149 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
@@ -14,151 +14,99 @@
  * limitations under the License.
  */
 
-#ifndef MSPROF_ENGINE_PROF_ACL_API_H_
-#define MSPROF_ENGINE_PROF_ACL_API_H_
-
-#define MSVP_MAX_DEV_NUM 64
-#define MSVP_PROF_API __attribute__((visibility("default")))
+#ifndef MSPROFILER_API_PROF_ACL_API_H_
+#define MSPROFILER_API_PROF_ACL_API_H_
 
 // DataTypeConfig
-#define PROF_ACL_API                0x0001
-#define PROF_TASK_TIME              0x0002
-#define PROF_AICORE_METRICS         0x0004
-#define PROF_AICPU_TRACE            0x0008
-#define PROF_MODEL_EXECUTE          0x0010
-#define PROF_RUNTIME_API            0x0020
-#define PROF_RUNTIME_TRACE          0x0040
-#define PROF_SCHEDULE_TIMELINE      0x0080
-#define PROF_SCHEDULE_TRACE         0x0100
-#define PROF_AIVECTORCORE_METRICS   0x0200
-#define PROF_SUBTASK_TIME           0x0400
-
-#define PROF_TRAINING_TRACE         0x0800
-#define PROF_HCCL_TRACE             0x1000
-#define PROF_DATA_PROCESS           0x2000
-#define PROF_TASK_TRACE             0x3842
+#define PROF_ACL_API                0x00000001
+#define PROF_TASK_TIME              0x00000002
+#define PROF_AICORE_METRICS         0x00000004
+#define PROF_AICPU_TRACE            0x00000008
+#define PROF_MODEL_EXECUTE          0x00000010
+#define PROF_RUNTIME_API            0x00000020
+#define PROF_RUNTIME_TRACE          0x00000040
+#define PROF_SCHEDULE_TIMELINE      0x00000080
+#define PROF_SCHEDULE_TRACE         0x00000100
+#define PROF_AIVECTORCORE_METRICS   0x00000200
+#define PROF_SUBTASK_TIME           0x00000400
+
+#define PROF_TRAINING_TRACE         0x00000800
+#define PROF_HCCL_TRACE             0x00001000
+
+#define PROF_TASK_TRACE             0x00001852
+
+// system profilinig switch
+#define PROF_CPU                    0x00010000
+#define PROF_HARDWARE_MEMORY        0x00020000
+#define PROF_IO                     0x00040000
+#define PROF_INTER_CONNECTION       0x00080000
+#define PROF_DVPP                   0x00100000
+#define PROF_SYS_AICORE_SAMPLE      0x00200000
+#define PROF_AIVECTORCORE_SAMPLE    0x00400000
 
 #define PROF_MODEL_LOAD             0x8000000000000000
 
 // DataTypeConfig MASK
-#define PROF_ACL_API_MASK                0x0001
-#define PROF_TASK_TIME_MASK              0x0002
-#define PROF_AICORE_METRICS_MASK         0x0004
-#define PROF_AICPU_TRACE_MASK            0x0008
-#define PROF_MODEL_EXECUTE_MASK          0x0010
-#define PROF_RUNTIME_API_MASK            0x0020
-#define PROF_RUNTIME_TRACE_MASK          0x0040
-#define PROF_SCHEDULE_TIMELINE_MASK      0x0080
-#define PROF_SCHEDULE_TRACE_MASK         0x0100
-#define PROF_AIVECTORCORE_METRICS_MASK   0x0200
-#define PROF_SUBTASK_TIME_MASK           0x0400
-
-#define PROF_TRAINING_TRACE_MASK         0x0800
-#define PROF_HCCL_TRACE_MASK             0x1000
-#define PROF_DATA_PROCESS_MASK           0x2000
+#define PROF_ACL_API_MASK                0x00000001
+#define PROF_TASK_TIME_MASK              0x00000002
+#define PROF_AICORE_METRICS_MASK         0x00000004
+#define PROF_AICPU_TRACE_MASK            0x00000008
+#define PROF_MODEL_EXECUTE_MASK          0x00000010
+#define PROF_RUNTIME_API_MASK            0x00000020
+#define PROF_RUNTIME_TRACE_MASK          0x00000040
+#define PROF_SCHEDULE_TIMELINE_MASK      0x00000080
+#define PROF_SCHEDULE_TRACE_MASK         0x00000100
+#define PROF_AIVECTORCORE_METRICS_MASK   0x00000200
+#define PROF_SUBTASK_TIME_MASK           0x00000400
+
+#define PROF_TRAINING_TRACE_MASK         0x00000800
+#define PROF_HCCL_TRACE_MASK             0x00001000
+
+// system profilinig mask
+#define PROF_CPU_MASK                    0x00010000
+#define PROF_HARDWARE_MEMORY_MASK        0x00020000
+#define PROF_IO_MASK                     0x00040000
+#define PROF_INTER_CONNECTION_MASK       0x00080000
+#define PROF_DVPP_MASK                   0x00100000
+#define PROF_SYS_AICORE_SAMPLE_MASK      0x00200000
+#define PROF_AIVECTORCORE_SAMPLE_MASK    0x00400000
 
 #define PROF_MODEL_LOAD_MASK             0x8000000000000000
 
-#include <cstdint>
-#include <string>
-
-/**
- * @name  ProrErrorCode
- * @brief error code enum of prof_acl_apis
- */
-enum ProfErrorCode {
-    PROF_ERROR_NONE = 0,            // ok
-    PROF_ERROR_PARAM_INVALID,       // param invalid, for example nullptr
-    PROF_ERROR_REPEAT_INIT,         // profiling has already been inited
-    PROF_ERROR_CONFIG_INVALID,      // config invalid, for example invalid json string
-    PROF_ERROR_DIR_NO_ACCESS,       // dir is not accessable
-    PROF_ERROR_FAILURE,             // failed to init or start profiling
-    PROF_ERROR_NOT_INITED,          // profiling has not been inited
-    PROF_ERROR_DEVICE_INVALID,      // device id invalid
-    PROF_ERROR_UNSUPPORTED,         // unsupported data type or ai core metrics
-    PROF_ERROR_REPEAT_START,        // profiilng has already been started
-    PROF_ERROR_NOT_STARTED,         // profiling has not been started
-};
-
-/**
- * @brief transfer profiling config in acl.json to sample config
- * @param aclCfg       [IN]  profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
- * @param sampleCfg    [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
- * @return ProfErrorCode
- */
-MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
 
-/**
- * @name  ProfInit
- * @brief init profiling
- * @param profInitCfg [IN] config of init profiling of json format
- * @return ProfErrorCode
- */
-MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);
-
-/**
- * @name  ProfAicoreMetrics
- * @brief aicore metrics enum
- */
-enum ProfAicoreMetrics {
-    PROF_AICORE_ARITHMATIC_THROUGHPUT = 0,
-    PROF_AICORE_PIPELINE = 1,
-    PROF_AICORE_SYNCHRONIZATION = 2,
-    PROF_AICORE_MEMORY = 3,
-    PROF_AICORE_INTERNAL_MEMORY = 4,
-    PROF_AICORE_STALL = 5,
-    PROF_AICORE_EVENT = 255
-};
+#if (OS_TYPE != LINUX)
+#define MSVP_PROF_API __declspec(dllexport)
+#else
+#define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
 
-/**
- * @name  ProfConfig
- * @brief struct of ProfStart
- */
-struct ProfConfig {
-    uint32_t devNums;                     // length of device id list
-    uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list
-    ProfAicoreMetrics aicoreMetrics;      // aicore metric
-    uint64_t dataTypeConfig;              // data type to start profiling
-};
+#include <cstdint>
 
+namespace Msprofiler {
+namespace Api {
 /**
- * @name  ProfStartProfiling
- * @brief start profiling
- * @param profStartCfg [IN] config to start profiling
- * @return ProfErrorCode
+ * @name  ProfGetOpExecutionTime
+ * @brief get op execution time of specific part of data
+ * @param data  [IN] data read from pipe
+ * @param len   [IN] data length
+ * @param index [IN] index of part(op)
+ * @return op execution time (us)
  */
-MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);
+MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
+}
+}
 
-/**
- * @name  ProfStopConfig
- * @brief struct of ProfStop
- */
-struct ProfStopConfig {
-    uint64_t padding;
-};
+#ifdef __cplusplus
+extern "C" {
+#endif
 
-/**
- * @name  ProfStopProfiling
- * @brief stop profiling
- * @param profStopCfg [IN] config to stop profiling
- * @return ProfErrorCode
- */
-MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);
-
-/**
- * @name  ProfFinalize
- * @brief finalize profiling task
- * @return ProfErrorCode
- */
-MSVP_PROF_API int32_t ProfFinalize();
+MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
 
-/**
- * @name  ProfGetDataTypeConfig
- * @brief get dataTypeConfig started with of one device
- * @param deviceId          [IN] deviceId to get dataTypeConfig
- * @param dataTypeConfig    [OUT] result get
- * @return ProfErrorCode
- */
-MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig);
+#ifdef __cplusplus
+}
+#endif
 
-#endif  // MSPROF_ENGINE_PROF_ACL_API_H_
+#endif  // MSPROFILER_API_PROF_ACL_API_H_
diff --git a/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h b/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h
index 4f013eef..f8cb1b22 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h
@@ -16,7 +16,16 @@
 
 #ifndef MSPROF_ENGINE_PROF_MGR_CORE_H_
 #define MSPROF_ENGINE_PROF_MGR_CORE_H_
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE != LINUX)
+#define MSVP_PROF_API __declspec(dllexport)
+#else
 #define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
+
 
 #include <string>
 #include <vector>
diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
index ff91351b..d5ed7569 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
@@ -41,42 +41,44 @@ namespace Engine {
  *  the Reporter class .used to send data to profiling
  */
 class MSVP_PROF_API Reporter {
- public:
-  virtual ~Reporter() {}
+public:
+    virtual ~Reporter() {}
 
- public:
-  /**
-   * @ingroup reporter
-   * @name  : Report
-   * @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n
-              The data will be firstly appended to cache, if the cache is full, data will be ignored
-   * @param data [IN] const ReporterData * the data send to libmsporf
-   * @retval PROFILING_SUCCESS 0 (success)
-   * @retval PROFILING_FAILED -1 (failed)
-   *
-   * @par depend:
-   * @li libmsprof
-   * @li prof_reporter.h
-   * @since c60
-   * @see Flush
-   */
-  virtual int Report(const ReporterData *data) = 0;
+public:
+    /**
+     * @ingroup reporter
+     * @name  : Report
+     * @brief : API of libmsprof, report data to libmsprof, it's a non-blocking function \n
+                The data will be firstly appended to cache, if the cache is full, data will be ignored
+    * @param data [IN] const ReporterData * the data send to libmsporf
+    * @retval PROFILING_SUCCESS 0 (success)
+    * @retval PROFILING_FAILED -1 (failed)
+    *
+    * @par depend:
+    * @li libmsprof
+    * @li prof_reporter.h
+    * @since c60
+    * @see Flush
+    */
+    virtual int Report(const ReporterData *data) = 0;
 
-  /**
-   * @ingroup reporter
-   * @name  : Flush
-   * @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n
-              The all datas of cache will be write to file or send to host
-   * @retval PROFILING_SUCCESS 0 (success)
-   * @retval PROFILING_FAILED -1 (failed)
-   *
-   * @par depend:
-   * @li libmsprof
-   * @li prof_reporter.h
-   * @since c60
-   * @see ProfMgrStop
-   */
-  virtual int Flush() = 0;
+    /**
+     * @ingroup reporter
+     * @name  : Flush
+     * @brief : API of libmsprof, notify libmsprof send data over, it's a blocking function \n
+                The all datas of cache will be write to file or send to host
+    * @retval PROFILING_SUCCESS 0 (success)
+    * @retval PROFILING_FAILED -1 (failed)
+    *
+    * @par depend:
+    * @li libmsprof
+    * @li prof_reporter.h
+    * @since c60
+    * @see ProfMgrStop
+    */
+    virtual int Flush() = 0;
+
+    virtual uint32_t GetReportDataMaxLen() = 0;
 };
 
 }  // namespace Engine
diff --git a/third_party/prebuild/aarch64/libalog.so b/third_party/prebuild/aarch64/libalog.so
index e041ad7e..65aefa59 100755
Binary files a/third_party/prebuild/aarch64/libalog.so and b/third_party/prebuild/aarch64/libalog.so differ
diff --git a/third_party/prebuild/aarch64/liberror_manager.so b/third_party/prebuild/aarch64/liberror_manager.so
index 759d8e30..6358365b 100755
Binary files a/third_party/prebuild/aarch64/liberror_manager.so and b/third_party/prebuild/aarch64/liberror_manager.so differ
diff --git a/third_party/prebuild/aarch64/libmmpa.a b/third_party/prebuild/aarch64/libmmpa.a
index d7c29e2b..7d042c4c 100755
Binary files a/third_party/prebuild/aarch64/libmmpa.a and b/third_party/prebuild/aarch64/libmmpa.a differ
diff --git a/third_party/prebuild/x86_64/libalog.so b/third_party/prebuild/x86_64/libalog.so
index 051f85d9..4c8a45a4 100755
Binary files a/third_party/prebuild/x86_64/libalog.so and b/third_party/prebuild/x86_64/libalog.so differ
diff --git a/third_party/prebuild/x86_64/liberror_manager.so b/third_party/prebuild/x86_64/liberror_manager.so
index cd9ad8bc..d97e6ef1 100755
Binary files a/third_party/prebuild/x86_64/liberror_manager.so and b/third_party/prebuild/x86_64/liberror_manager.so differ
diff --git a/third_party/prebuild/x86_64/libmmpa.a b/third_party/prebuild/x86_64/libmmpa.a
index bec195ad..13ca68db 100755
Binary files a/third_party/prebuild/x86_64/libmmpa.a and b/third_party/prebuild/x86_64/libmmpa.a differ