!513 decouple cce and resovle new changes support windows

From: @taoxiangdong Reviewed-by: @youui,@xchu42,@ji_chen Signed-off-by:
4 years ago · 16ee0ea368
--- a/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -14,7 +14,6 @@
 * limitations under the License.
 */
 #include "host_cpu_engine.h"
 #include <dlfcn.h>
 #include "graph/common/omg_util.h"
 #include "graph/utils/op_desc_utils.h"
 #include "graph/utils/tensor_adapter.h"
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -16,7 +16,6 @@

 #include "graph/load/new_model_manager/davinci_model.h"

 #include <cce/dnn.h>
 #include <graph/utils/node_utils.h>
 #include <algorithm>
 #include <map>
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -66,7 +66,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
  // get opcontext stored in model
  const domi::KernelContext &context = kernel_def.context();
  // get kernel_type
  kernel_type_ = static_cast<cce::ccKernelType>(context.kernel_type());
  kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
  // get opdesc
  op_desc_ = davinci_model_->GetOpByIndex(context.op_index());
  GE_CHECK_NOTNULL(op_desc_);
@@ -88,13 +88,13 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
  // get bin_file_key
  const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
  // new aicpu kernel(rtCpuKernelLaunch) no need to check function
  if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) {
  if (kernel_type_ == ccKernelType::CCE_AI_CORE) {
    rtError_t rt_ret;
    rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s",
                                                    kernel_def.stub_func().c_str());
                    return RT_ERROR_TO_GE_STATUS(rt_ret););
  } else if (kernel_type_ == cce::ccKernelType::TE) {
  } else if (kernel_type_ == ccKernelType::TE) {
    rtError_t rt_ret;
    rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
@@ -111,7 +111,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
    ctx_.opIndex2[i] = context.origin_op_index(i);
  }
  ctx_.opCount = context.origin_op_index_size();
  if (kernel_type_ == cce::ccKernelType::TE) {
  if (kernel_type_ == ccKernelType::TE) {
    ctx_.opIndex = context.op_index();
    uint16_t *args_offset_tmp = reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data()));
    if (context.args_offset().size() / sizeof(uint16_t) < 1) {
@@ -120,9 +120,9 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
    }

    ret = InitTVMTask(args_offset_tmp[0], kernel_def);
  } else if (kernel_type_ == cce::ccKernelType::CUSTOMIZED) {
  } else if (kernel_type_ == ccKernelType::CUSTOMIZED) {
    ret = InitAICPUCustomTask(context.op_index(), kernel_def);
  } else if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) {
  } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
    ret = InitAicpuTask(context.op_index(), kernel_def);
  } else {
    if (kernel_def.args().empty() || args_size_ == 0) {
@@ -373,7 +373,7 @@ Status KernelTaskInfo::Distribute() {
  INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
  int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, 10) : 0;
  bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_);
  if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) {
  if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
    GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_);
    // blockDim is reserved parameter, set to 1
    rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()),
@@ -874,7 +874,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    return INTERNAL_ERROR;
  }

  if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) {
  if (kernel_type_ == ccKernelType::CUST_AI_CPU) {
    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed");
  }

@@ -946,7 +946,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
    GELOGI("Op debug is open in aicpu task info");
    dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
  }
  if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) {
  if (kernel_type_ == ccKernelType::CUST_AI_CPU) {
    dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
  }

@@ -1076,7 +1076,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d

 Status KernelTaskInfo::SetContext(const domi::KernelDef &kernel_def) {
  const domi::KernelContext &context = kernel_def.context();
  ctx_.kernelType = static_cast<cce::ccKernelType>(context.kernel_type());
  ctx_.kernelType = static_cast<ccKernelType>(context.kernel_type());
  ctx_.opId = context.op_id();
  ctx_.kernelFuncId = context.kernel_func_id();
  ctx_.isFlowtable = context.is_flowtable();
@@ -1161,9 +1161,9 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u
    GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", error);
    return FAILED;
  }
  cce::ccStatus_t cc_ret;
  ccStatus_t cc_ret;
  std::string update_kernel_args = "ccUpdateKernelArgs";
  auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t,
  auto cceUpdateKernelArgs = (ccStatus_t(*)(ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t,
                                                 void *))mmDlsym(handle, const_cast<char *>(update_kernel_args.c_str()));
  if (cceUpdateKernelArgs == nullptr) {
    GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs");
@@ -1189,7 +1189,7 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u
    GELOGW("Failed to close handle %s", error);
    return FAILED;
  }
  if (cc_ret != cce::CC_STATUS_SUCCESS) {
  if (cc_ret != CC_STATUS_SUCCESS) {
    GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret);
    return CCE_FAILED;
  }
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
@@ -43,7 +43,7 @@ class KernelTaskInfo : public TaskInfo {
        stream_id_(0),
        so_name_(""),
        kernel_name_(""),
        kernel_type_(cce::ccKernelType::CCE_AI_CORE),
        kernel_type_(ccKernelType::CCE_AI_CORE),
        dump_flag_(RT_KERNEL_DEFAULT),
        dump_args_(nullptr),
        op_desc_(nullptr),
@@ -75,7 +75,7 @@ class KernelTaskInfo : public TaskInfo {

  Status Release() override;

  cce::ccOpContext *GetCtx() override { return &ctx_; }
  ccOpContext *GetCtx() override { return &ctx_; }

  FusionOpInfo *GetFusionOpInfo() override { return &fusion_op_info_; }

@@ -92,7 +92,7 @@ class KernelTaskInfo : public TaskInfo {

  bool CallSaveDumpInfo() override  { return call_save_dump_; };

  cce::ccOpContext ctx_;
  ccOpContext ctx_;
  FusionOpInfo fusion_op_info_;

 private:
@@ -153,7 +153,7 @@ class KernelTaskInfo : public TaskInfo {
  uint32_t stream_id_;
  std::string so_name_;
  std::string kernel_name_;
  cce::ccKernelType kernel_type_;
  ccKernelType kernel_type_;
  uint32_t dump_flag_;
  void *dump_args_;
  OpDescPtr op_desc_;
--- a/ge/graph/load/new_model_manager/task_info/task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/task_info.h
@@ -20,7 +20,7 @@
 #include <vector>

 #include "cce/customize.h"
 #include "cce/taskdown_common.hpp"
 #include "framework/common/taskdown_common.h"
 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/load/new_model_manager/ts_mem_mall.h"
 #include "graph/load/new_model_manager/task_info/task_info_factory.h"
@@ -87,7 +87,7 @@ class TaskInfo {

  virtual Status Release() { return SUCCESS; }

  virtual cce::ccOpContext *GetCtx() { return nullptr; }
  virtual ccOpContext *GetCtx() { return nullptr; }

  virtual uint32_t GetTaskID() { return 0xFFFFFFFF; }

--- a/ge/graph/manager/host_mem_manager.cc
+++ b/ge/graph/manager/host_mem_manager.cc
@@ -106,7 +106,7 @@ Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_add
    GELOGE(INTERNAL_ERROR, "Find host base base_addr failed,node name:%s!", op_name.c_str());
    return INTERNAL_ERROR;
  }
  base_addr = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(var_memory_base_map_[op_name].device_address));
  base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(var_memory_base_map_[op_name].device_address));
  data_size = var_memory_base_map_[op_name].mem_size;
  return SUCCESS;
 }
--- a/ge/host_kernels/ssd_prior_box_kernel.cc
+++ b/ge/host_kernels/ssd_prior_box_kernel.cc
@@ -180,7 +180,7 @@ Status SsdPriorboxKernel::SetVariance(const vector<float> &variance, const int d
  return SUCCESS;
 }

 Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint aspect_ratios_size, uint min_sizes_size, uint max_sizes_size,
 Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size,
                                                int layer_width, int layer_height, int &num_priors,
                                                int &dim_size) const {
  if (ge::CheckUint32MulOverflow(min_sizes_size, aspect_ratios_size) != SUCCESS) {
--- a/ge/host_kernels/ssd_prior_box_kernel.h
+++ b/ge/host_kernels/ssd_prior_box_kernel.h
@@ -100,7 +100,7 @@ class SsdPriorboxKernel : public Kernel {
   * @return OTHERS:  Execution failed
   * @author
   */
  Status GetNumPriorAndDimSize(uint aspect_ratios_size, uint min_sizes_size, uint max_sizes_size, int layer_width,
  Status GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, int layer_width,
                               int layer_height, int &num_priors, int &dim_size) const;
  void DataCalulate(float x, float y, float box_x, float box_y, int img_x, int img_y, vector<float> &result);
  std::unique_ptr<float[]> BoundaryCalulate(int dim_size, int layer_width, int layer_height, float step_width,
--- a/ge/hybrid/executor/hybrid_profiler.h
+++ b/ge/hybrid/executor/hybrid_profiler.h
@@ -33,7 +33,7 @@ class HybridProfiler {
    SHAPE_INFERENCE,
    COMPILE,
    EXECUTION,
    CALLBACK,
    CALLBACK
  };

  struct Event {
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -767,7 +767,7 @@ Status HybridModelBuilder::HandleDtString(const GeTensor &tensor, void *var_addr
                           "Shape size is invalid");
    auto offset = static_cast<uint64_t>(elem_num * kBytes);
    auto hbm_raw_data_base_addr =
        reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(var_addr) + offset);
        static_cast<uint64_t>(reinterpret_cast<uintptr_t>(var_addr) + offset);
    for (int64_t i = elem_num - 1; i >= 0; --i) {
      buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]);
    }
--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -15,7 +15,7 @@
 */

 #include "aicore_node_executor.h"
 #include "cce/taskdown_common.hpp"
 #include "framework/common/taskdown_common.h"
 #include "hybrid/executor/hybrid_execution_context.h"

 namespace ge {
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -15,7 +15,7 @@
 */

 #include "hybrid/node_executor/aicore/aicore_op_task.h"
 #include "cce/taskdown_common.hpp"
 #include "framework/common/taskdown_common.h"
 #include "framework/common/debug/log.h"
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/node_executor/aicore/aicore_task_builder.h"
@@ -95,8 +95,8 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) {

  const domi::KernelDef &kernel_def = task_def.kernel();
  const domi::KernelContext &context = kernel_def.context();
  auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
  if (kernel_type != cce::ccKernelType::TE) {
  auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
  if (kernel_type != ccKernelType::TE) {
    GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type));
    return INTERNAL_ERROR;
  }
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -15,7 +15,7 @@
 */

 #include "hybrid/node_executor/aicpu/aicpu_node_executor.h"
 #include "cce/taskdown_common.hpp"
 #include "framework/common/taskdown_common.h"
 #include "common/formats/formats.h"
 #include "aicpu/common/aicpu_task_struct.h"
 #include "graph/load/new_model_manager/model_manager.h"
@@ -642,8 +642,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) {
  const std::string &so_name = kernel_def.so_name();
  const OpDescPtr op_desc = node_item_->GetOpDesc();
  const auto &context = kernel_def.context();
  auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
  if (kernel_type == cce::ccKernelType::CUST_AI_CPU) {
  auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
  if (kernel_type == ccKernelType::CUST_AI_CPU) {
    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed.");
    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
  }
@@ -723,9 +723,9 @@ Status AicpuNodeTask::UpdateIoAddr(TaskContext &context) {

  auto io_addr = args_.get() + sizeof(aicpu::AicpuParamHead);
  // if has input and output, need copy to ioaddr
  error_t cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead),
  int cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead),
                             &io_addrs[0], sizeof(uint64_t) * io_addrs.size());
  GE_CHK_BOOL_RET_STATUS(cpy_ret == EOK, INTERNAL_ERROR,
  GE_CHK_BOOL_RET_STATUS(cpy_ret == 0, INTERNAL_ERROR,
                         "Node[%s] memcpy io addr to AicpuParamHead failed, ret=%d, args_size=%u, io nums=%zu.",
                         node_name_.c_str(), cpy_ret, args_size_, io_addrs.size());
  return SUCCESS;
@@ -736,9 +736,9 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) {
  const auto &so_name = task_def_.kernel().so_name();
  const auto &kernel_name = task_def_.kernel().kernel_name();
  const auto &kcontext = task_def_.kernel().context();
  auto kernel_type = static_cast<cce::ccKernelType>(kcontext.kernel_type());
  auto kernel_type = static_cast<ccKernelType>(kcontext.kernel_type());
  uint32_t flag = RT_KERNEL_DEFAULT;
  if (kernel_type == cce::ccKernelType::CUST_AI_CPU) {
  if (kernel_type == ccKernelType::CUST_AI_CPU) {
    flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU);
  }
  auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()),
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -237,8 +237,8 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
    if (task_type == RT_MODEL_TASK_KERNEL) {
      const domi::KernelDef &kernel_def = task_def.kernel();
      const auto &context = kernel_def.context();
      auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
      if (kernel_type == cce::ccKernelType::TE) {
      auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
      if (kernel_type == ccKernelType::TE) {
        GELOGD("Building TBE task");
        TbeOpTask *tbe_task = nullptr;
        auto ret = BuildKernelTask(task_def.kernel(), &tbe_task);
@@ -249,7 +249,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
        single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size());
        ParseArgTable(tbe_task, single_op);
        single_op.tasks_.emplace_back(tbe_task);
      } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
      } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
        GELOGD("Building AICPU_CC task");
        OpTask *task = nullptr;
        uint64_t singleop_kernel_id = aicpu_kernel_id++;
@@ -388,13 +388,13 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
 Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
  const domi::KernelDef &kernel_def = task_def.kernel();
  const auto &context = kernel_def.context();
  auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
  if (kernel_type == cce::ccKernelType::TE) {
  auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
  if (kernel_type == ccKernelType::TE) {
    GELOGD("Building TBE task");
    TbeOpTask *tbe_task = nullptr;
    GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
    single_op.op_task_.reset(tbe_task);
  } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
  } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
    GELOGD("Building AICPU_CC task");
    OpTask *task = nullptr;
    uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;
--- a/ge/single_op/task/aicpu_kernel_task_builder.cc
+++ b/ge/single_op/task/aicpu_kernel_task_builder.cc
@@ -15,7 +15,7 @@
 */

 #include "single_op/task/aicpu_kernel_task_builder.h"
 #include "cce/taskdown_common.hpp"
 #include "framework/common/taskdown_common.h"
 #include "graph/load/new_model_manager/model_manager.h"

 namespace ge {
@@ -58,8 +58,8 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) {
  task.op_desc_ = op_desc_;

  const auto &context = kernel_def_.context();
  auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
  if (kernel_type == cce::ccKernelType::CUST_AI_CPU) {
  auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
  if (kernel_type == ccKernelType::CUST_AI_CPU) {
    task.is_custom_ = true;
    task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed");
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -369,6 +369,7 @@ static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str();
 static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str();

 // for interface: aclgrphBuildModel
 #ifdef __GNUC__
 const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT,
                                                             INPUT_SHAPE,
                                                             OP_NAME_MAP,
@@ -424,6 +425,7 @@ const std::set<std::string> global_options = {CORE_TYPE,
                                              DEBUG_DIR,
                                              OP_COMPILER_CACHE_DIR,
                                              OP_COMPILER_CACHE_MODE};
 #endif
 }  // namespace ir_option
 }  // namespace ge

--- a/inc/framework/common/op/ge_op_utils.h
+++ b/inc/framework/common/op/ge_op_utils.h
@@ -17,7 +17,6 @@
 #ifndef INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_
 #define INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_

 #include <cce/dnn.h>
 #include <memory>
 #include <vector>

@@ -32,7 +31,6 @@
 #include "proto/insert_op.pb.h"

 namespace ge {
 using namespace cce;
 using domi::Status;

 // Add Sub Mul
@@ -76,18 +74,7 @@ class OpUtils {
  static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) {
    return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true);
  }
  ///
  /// @ingroup domi_omg
  /// @brief Convert the dimension of array according to different format
  /// @param [in] src_format src_shape format
  /// @param [in] src Dimension array to be converted
  /// @param [in] dst_format Target format after conversion
  /// @param [out] dst Dimension array after conversion
  /// @return SUCCESS success
  /// @return FAILED fail
  ///
  static bool ConvertDim(ccTensorFormat_t src_format, const std::vector<int64_t> &src, ccTensorFormat_t dst_format,
                         std::vector<int64_t> &dst);

  ///
  /// @ingroup domi_omg
  /// @brief Determine whether to manually calculate the tensor size based on the values of format and dim
@@ -97,73 +84,6 @@ class OpUtils {
  /// @return false skip
  ///
  static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt);
  ///
  /// @ingroup domi_ome
  /// @brief Initialize the tensor description, which is used for input and output.
  /// @param [in] model_tensor Tensor information defined by the offline model
  /// @param [out] cc_tensor Tensor definition used by CC
  /// @return SUCCESS success
  /// @return FAILED fail
  ///
  static Status InitTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccTensorDescriptor_t &cc_tensor);
  ///
  /// @ingroup domi_ome
  /// @brief Initialize the tensor description, which is used for input and output.
  /// @param [in] model_tensor Tensor information defined by the offline model
  /// @param [in] dst_data_type data_type of the target cc_tensor
  /// @param [out] cc_tensor Tensor definition used by CC
  /// @return SUCCESS success
  /// @return FAILED fail
  ///
  static Status InitTensorDescriptor(const ge::GeTensorDesc &model_tensor, int32_t dst_data_type,
                                     ccTensorDescriptor_t &cc_tensor);
  ///
  /// @ingroup domi_ome
  /// @brief Initialize the tensor description for bias.
  /// @param [in] model_tensor Tensor information defined by the offline model
  /// @param [out]  cc_tensor Tensor definition used by CC
  /// @return SUCCESS success
  /// @return FAILED fail
  ///
  ///
  static Status InitTensorDescriptor(const ge::GeTensor &model_tensor, ccTensorDescriptor_t &cc_tensor);
  ///
  /// @ingroup domi_ome
  /// @brief Initialize the tensor description for bias.
  /// @param [in] model_tensor Tensor information defined by the offline model
  /// @param [in] dst_data_type data_type of the target cc_tensor
  /// @param [out] cc_tensor Tensor definition used by CC
  /// @return SUCCESS success
  /// @return FAILED fail
  ///
  static Status InitTensorDescriptor(const ge::GeTensor &model_tensor, int32_t dst_data_type,
                                     ccTensorDescriptor_t &cc_tensor);

  static Status InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector<int64_t> &dim,
                                     ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt = 4);
  ///
  /// @ingroup domi_ome
  /// @brief Destroys a tensor
  /// @param [inout] cc_tensor Tensor definition used by CC
  ///
  static void DestroyTensorDescriptor(ccTensorDescriptor_t &cc_tensor) noexcept;

  ///
  /// @ingroup domi_ome
  /// @brief Destroys a tensor
  /// @param [inout] cc_filter cc_filter Definition of the filter used by CC
  ///
  static void DestroyFilterDescriptor(ccFilterDescriptor_t &cc_filter);

  ///
  /// @ingroup domi_ome
  /// @brief Initializing Filter Description
  /// @param [in] model_filter Filter information defined in the offline model
  /// @param [out] cc_filter Definition of the filter used by CC
  /// @return SUCCESS success
  /// @return FAILED fail
  ///
  static Status InitFilterDescriptor(const ge::GeTensor &model_filter, ccFilterDescriptor_t &cc_filter);

  ///
  /// @brief Extract AIPP parameters from AttrDefMap and splice them
@@ -209,16 +129,7 @@ class OpUtils {
  /// @param [out] output Data pointer after conversion. The format is HWCK
  ///
  static void TransDataKCHW2HWCK(const void *input, int64_t K, int64_t C, int64_t H, int64_t W, void *output);
  ///
  /// @ingroup domi_omg
  /// @brief Initialize the input and output description of the data node which is applied to filter weight in the
  /// training network
  /// @param [in] model_tensor input and output tensor information
  /// @param [out] cc_tensor Tensor in CCE format after conversion
  ///
  static Status InitFilterTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccFilterDescriptor_t &cc_tensor);

  static void SetTensorDescriptorAllOffsetQuantizeInfo(const GeTensorDesc &tensor, ccTensorDescriptor_t cc_tensor);
  
  static vector<ConstGeTensorPtr> GetWeights(const ge::Node &node);
  static vector<ConstGeTensorPtr> GetWeights(ge::ConstNodePtr node);
  static vector<GeTensorPtr> MutableWeights(const ge::Node &node);
@@ -228,69 +139,7 @@ class OpUtils {
  static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType type, std::vector<int64_t> &dims);

 private:
  friend class CceTensorDescriptor;
  static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc);
 };

 class CceTensorDescriptor;

 using CceTensorDescriptorPtr = std::shared_ptr<CceTensorDescriptor>;

 class CceTensorDescriptor {
 public:
  explicit CceTensorDescriptor(ccTensorDescriptor_t cc_tensor);
  CceTensorDescriptor(const CceTensorDescriptor &) = delete;
  CceTensorDescriptor &operator=(const CceTensorDescriptor &) = delete;

  ~CceTensorDescriptor();

  ccTensorDescriptor_t GetPtr() { return cc_tensor_; }

  ///
  /// @brief      Initializes the tensor based on shape information.
  /// @param[in]  format  data permutation format
  /// @param[in]  data_type Data Type
  /// @param[in]  dim dim information
  /// @return     return code
  ///
  Status InitTensor(int32_t format, int32_t data_type, const std::vector<int64_t> &dims);

  Status InitTensor(int32_t format, int32_t data_type, const ge::GeShape &shape);

  ///
  /// @brief      get format of tensor
  /// @param[out] format format of the tensor
  /// @return     return code
  ///
  Status GetFormat(ccTensorFormat_t *format);

  ///
  /// @brief      Obtains the size of the tensor.
  /// @param[out] size size of Tensor
  /// @return     return code
  ///
  Status GetTensorSizeInBytes(uint32_t *size);

  ///
  /// @brief transform tensor between 4d(NCHW) and 5d(NC1HWC0)
  /// @param [in] xDesc   descriptor of input tensor
  /// @param [in] x   point to input data in host memory
  /// @param [in] dataTypeTransmode   mode of data type transform
  /// @param [in] yDesc   descriptor of output tensor
  /// @param [in|out] y   point to output data in host memory
  /// @param [in] ySizeInBytes   size of outputData
  /// @return return code
  ///
  static Status TransTensor(const ccTensorDescriptor_t xDesc, const void *x, const CceTensorDescriptorPtr &yDesc,
                            void *y, uint32_t ySizeInBytes);

  ///
  /// @brief      CceTensorDescriptor Static Constructor
  /// @return     CceTensorDescriptor smart pointer
  ///
  static CceTensorDescriptorPtr Create();

  ccTensorDescriptor_t cc_tensor_ = nullptr;
 };
 }  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_
--- a/inc/framework/common/op/op_parser_util.h
+++ b/inc/framework/common/op/op_parser_util.h
@@ -17,7 +17,6 @@
 #ifndef INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_
 #define INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_

 #include <cce/dnn.h>
 #include <limits.h>
 #include <math.h>
 #include <stdint.h>
@@ -31,10 +30,7 @@ const uint32_t NORMAL_OUTPUT_NUM = 1;
 const uint32_t NORMAL_WORKSPACE_NUM = 0;
 const int32_t NORMAL_1D_DIM_NUM = 1;
 const int32_t NORMAL_SCALE_DIM_NUM = 0;
 const int NORMAL_TENSOR_FORMAT = static_cast<const int>(cce::CC_TENSOR_NC1HWC0);
 const int NORMAL_TENSOR_SIZE = 4;
 const int NORMAL_DEVICE_DATA_TYPE = static_cast<const int>(cce::CC_DATA_HALF);
 const int DEFAULT_POOLING_MODE = static_cast<const int>(cce::CC_POOLING_MAX);
 const uint32_t DEFAULT_REAL_DIM_CNT = 4;

 // const
@@ -183,7 +179,6 @@ const int32_t SSD_DETECTIONOUTPUT_BACKGROUND_LABEL_ID_DEFAULT_VALUE = 0;
 const float SSD_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3;
 const int32_t SSD_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200;
 const float SSD_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0;
 const int SSD_DETECTIONOUTPUT_CODE_TYPE_DEFAULT_VALUE = static_cast<const int>(cce::CC_BOX_CENTER_SIZE);
 const int32_t SSD_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200;
 const bool SSD_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false;
 const float SSD_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1;
@@ -200,7 +195,6 @@ const float REFINEDET_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3;
 const int32_t REFINEDET_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200;
 const float REFINEDET_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0;
 const bool REFINEDET_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false;
 const int REFINEDET_DETECTIONOUTPUT_CODE_TYPE_DEFAULT_VALUE = static_cast<const int>(cce::CC_BOX_CENTER_SIZE);
 const int32_t REFINEDET_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200;
 const float REFINEDET_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1;
 const float REFINEDET_DETECTIONOUTPUT_OBJECTNESS_SCORE_DEFAULT_VALUE = 0;
--- a/inc/framework/common/taskdown_common.h
+++ b/inc/framework/common/taskdown_common.h
@@ -0,0 +1,73 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_
 #define INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_

 #include "runtime/rt.h"

 using namespace std;

 namespace ge {

 #define CC_FUSION_OP_MAX 32

 typedef enum tagCcStatus {
  CC_STATUS_SUCCESS = 0,         /**< succ */
  CC_STATUS_NOT_INITIALIZED = 1, /**< not init */
  CC_STATUS_ALLOC_FAILED = 2,    /**< alloc mem failed */
  CC_STATUS_BAD_PARAM = 3,       /**< para check failed */
  CC_STATUS_INTERNAL_ERROR = 4,  /**< internal error */
  CC_STATUS_KERNEL_ERROR = 5,    /**< kernel error */
  CC_STATUS_RUNTIME_ERROR = 6,   /**< runtime error */
  CC_STATUS_NOT_SUPPORTED = 7,   /**< unsupport error */
  CC_STATUS_INVALID_VALUE = 7,   /**< invalid value error for blas*/
  CC_STATUS_RESERVED             /**< just for check */
 } ccStatus_t;

 typedef enum tagccKernelType {
  CCE_AI_CORE = 0, /* cce aicore */
  CCE_AI_CPU = 1,  /* cce aicpu */
  TE = 2,          /* te operator*/
  CUSTOMIZED = 3,  /* customized operator */
  TE_AI_CORE = 4,  /* te aicore operator*/
  TE_AI_CPU = 5,   /* te aicpu operator */
  AI_CPU = 6,      /* aicpu */
  CUST_AI_CPU = 7, /* custom aicpu*/
  INVALID = 8,     /* unknown kernel type */
 } ccKernelType;

 typedef struct tagOpContext {
  ccKernelType kernelType;
  uint32_t opId;
  uint32_t kernelFuncId;
  uint32_t opIndex;
  uint32_t opCount;
  uint32_t opIndex2[CC_FUSION_OP_MAX];
  bool isFlowtable;
  uint16_t *argsOffset;
  uint32_t argsCount;
  uint64_t genDataBaseAddr;
  uint64_t genDataBaseSize;
  uint64_t genWeightBaseAddr;
  uint64_t genWeightBaseSize;
  uint64_t genVariableBaseAddr;
  uint64_t genVariableBaseSize;
  uint64_t l2ctrlSize;
 } ccOpContext;
 }  // namespace ge

 #endif  // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 9e392045c26a57913b512d0686e1285650b62abe
 Subproject commit 47c1c18b4b8e5ab38ae1e380c9f1671cbafc4aee