Browse Source

decouple cce in graphengine

tags/v1.2.0
taoxiangdong 3 years ago
parent
commit
55fd4c4c14
13 changed files with 112 additions and 195 deletions
  1. +0
    -1
      ge/graph/load/new_model_manager/davinci_model.cc
  2. +11
    -11
      ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  3. +4
    -4
      ge/graph/load/new_model_manager/task_info/kernel_task_info.h
  4. +2
    -2
      ge/graph/load/new_model_manager/task_info/task_info.h
  5. +1
    -1
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  6. +3
    -3
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  7. +5
    -5
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  8. +6
    -6
      ge/single_op/single_op_model.cc
  9. +3
    -3
      ge/single_op/task/aicpu_kernel_task_builder.cc
  10. +2
    -0
      inc/external/ge/ge_api_types.h
  11. +2
    -153
      inc/framework/common/op/ge_op_utils.h
  12. +0
    -6
      inc/framework/common/op/op_parser_util.h
  13. +73
    -0
      inc/framework/common/taskdown_common.h

+ 0
- 1
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -16,7 +16,6 @@


#include "graph/load/new_model_manager/davinci_model.h" #include "graph/load/new_model_manager/davinci_model.h"


#include <cce/dnn.h>
#include <graph/utils/node_utils.h> #include <graph/utils/node_utils.h>
#include <algorithm> #include <algorithm>
#include <map> #include <map>


+ 11
- 11
ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -66,7 +66,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
// get opcontext stored in model // get opcontext stored in model
const domi::KernelContext &context = kernel_def.context(); const domi::KernelContext &context = kernel_def.context();
// get kernel_type // get kernel_type
kernel_type_ = static_cast<cce::ccKernelType>(context.kernel_type());
kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
// get opdesc // get opdesc
op_desc_ = davinci_model_->GetOpByIndex(context.op_index()); op_desc_ = davinci_model_->GetOpByIndex(context.op_index());
GE_CHECK_NOTNULL(op_desc_); GE_CHECK_NOTNULL(op_desc_);
@@ -88,13 +88,13 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
// get bin_file_key // get bin_file_key
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
// new aicpu kernel(rtCpuKernelLaunch) no need to check function // new aicpu kernel(rtCpuKernelLaunch) no need to check function
if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) {
if (kernel_type_ == ccKernelType::CCE_AI_CORE) {
rtError_t rt_ret; rtError_t rt_ret;
rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_); rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s",
kernel_def.stub_func().c_str()); kernel_def.stub_func().c_str());
return RT_ERROR_TO_GE_STATUS(rt_ret);); return RT_ERROR_TO_GE_STATUS(rt_ret););
} else if (kernel_type_ == cce::ccKernelType::TE) {
} else if (kernel_type_ == ccKernelType::TE) {
rtError_t rt_ret; rtError_t rt_ret;
rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
@@ -111,7 +111,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
ctx_.opIndex2[i] = context.origin_op_index(i); ctx_.opIndex2[i] = context.origin_op_index(i);
} }
ctx_.opCount = context.origin_op_index_size(); ctx_.opCount = context.origin_op_index_size();
if (kernel_type_ == cce::ccKernelType::TE) {
if (kernel_type_ == ccKernelType::TE) {
ctx_.opIndex = context.op_index(); ctx_.opIndex = context.op_index();
uint16_t *args_offset_tmp = reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data())); uint16_t *args_offset_tmp = reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data()));
if (context.args_offset().size() / sizeof(uint16_t) < 1) { if (context.args_offset().size() / sizeof(uint16_t) < 1) {
@@ -120,9 +120,9 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
} }


ret = InitTVMTask(args_offset_tmp[0], kernel_def); ret = InitTVMTask(args_offset_tmp[0], kernel_def);
} else if (kernel_type_ == cce::ccKernelType::CUSTOMIZED) {
} else if (kernel_type_ == ccKernelType::CUSTOMIZED) {
ret = InitAICPUCustomTask(context.op_index(), kernel_def); ret = InitAICPUCustomTask(context.op_index(), kernel_def);
} else if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) {
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
ret = InitAicpuTask(context.op_index(), kernel_def); ret = InitAicpuTask(context.op_index(), kernel_def);
} else { } else {
if (kernel_def.args().empty() || args_size_ == 0) { if (kernel_def.args().empty() || args_size_ == 0) {
@@ -373,7 +373,7 @@ Status KernelTaskInfo::Distribute() {
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, 10) : 0; int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, 10) : 0;
bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_); bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_);
if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) {
if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_);
// blockDim is reserved parameter, set to 1 // blockDim is reserved parameter, set to 1
rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()), rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()),
@@ -874,7 +874,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) {
if (kernel_type_ == ccKernelType::CUST_AI_CPU) {
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed");
} }


@@ -946,7 +946,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
GELOGI("Op debug is open in aicpu task info"); GELOGI("Op debug is open in aicpu task info");
dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead); dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
} }
if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) {
if (kernel_type_ == ccKernelType::CUST_AI_CPU) {
dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
} }


@@ -1076,7 +1076,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d


Status KernelTaskInfo::SetContext(const domi::KernelDef &kernel_def) { Status KernelTaskInfo::SetContext(const domi::KernelDef &kernel_def) {
const domi::KernelContext &context = kernel_def.context(); const domi::KernelContext &context = kernel_def.context();
ctx_.kernelType = static_cast<cce::ccKernelType>(context.kernel_type());
ctx_.kernelType = static_cast<ccKernelType>(context.kernel_type());
ctx_.opId = context.op_id(); ctx_.opId = context.op_id();
ctx_.kernelFuncId = context.kernel_func_id(); ctx_.kernelFuncId = context.kernel_func_id();
ctx_.isFlowtable = context.is_flowtable(); ctx_.isFlowtable = context.is_flowtable();
@@ -1163,7 +1163,7 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u
} }
cce::ccStatus_t cc_ret; cce::ccStatus_t cc_ret;
std::string update_kernel_args = "ccUpdateKernelArgs"; std::string update_kernel_args = "ccUpdateKernelArgs";
auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t,
auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t,
void *))mmDlsym(handle, const_cast<char *>(update_kernel_args.c_str())); void *))mmDlsym(handle, const_cast<char *>(update_kernel_args.c_str()));
if (cceUpdateKernelArgs == nullptr) { if (cceUpdateKernelArgs == nullptr) {
GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs"); GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs");


+ 4
- 4
ge/graph/load/new_model_manager/task_info/kernel_task_info.h View File

@@ -43,7 +43,7 @@ class KernelTaskInfo : public TaskInfo {
stream_id_(0), stream_id_(0),
so_name_(""), so_name_(""),
kernel_name_(""), kernel_name_(""),
kernel_type_(cce::ccKernelType::CCE_AI_CORE),
kernel_type_(ccKernelType::CCE_AI_CORE),
dump_flag_(RT_KERNEL_DEFAULT), dump_flag_(RT_KERNEL_DEFAULT),
dump_args_(nullptr), dump_args_(nullptr),
op_desc_(nullptr), op_desc_(nullptr),
@@ -75,7 +75,7 @@ class KernelTaskInfo : public TaskInfo {


Status Release() override; Status Release() override;


cce::ccOpContext *GetCtx() override { return &ctx_; }
ccOpContext *GetCtx() override { return &ctx_; }


FusionOpInfo *GetFusionOpInfo() override { return &fusion_op_info_; } FusionOpInfo *GetFusionOpInfo() override { return &fusion_op_info_; }


@@ -92,7 +92,7 @@ class KernelTaskInfo : public TaskInfo {


bool CallSaveDumpInfo() override { return call_save_dump_; }; bool CallSaveDumpInfo() override { return call_save_dump_; };


cce::ccOpContext ctx_;
ccOpContext ctx_;
FusionOpInfo fusion_op_info_; FusionOpInfo fusion_op_info_;


private: private:
@@ -153,7 +153,7 @@ class KernelTaskInfo : public TaskInfo {
uint32_t stream_id_; uint32_t stream_id_;
std::string so_name_; std::string so_name_;
std::string kernel_name_; std::string kernel_name_;
cce::ccKernelType kernel_type_;
ccKernelType kernel_type_;
uint32_t dump_flag_; uint32_t dump_flag_;
void *dump_args_; void *dump_args_;
OpDescPtr op_desc_; OpDescPtr op_desc_;


+ 2
- 2
ge/graph/load/new_model_manager/task_info/task_info.h View File

@@ -20,7 +20,7 @@
#include <vector> #include <vector>


#include "cce/customize.h" #include "cce/customize.h"
#include "cce/taskdown_common.hpp"
#include "framework/common/taskdown_common.h"
#include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_inner_error_codes.h"
#include "graph/load/new_model_manager/ts_mem_mall.h" #include "graph/load/new_model_manager/ts_mem_mall.h"
#include "graph/load/new_model_manager/task_info/task_info_factory.h" #include "graph/load/new_model_manager/task_info/task_info_factory.h"
@@ -87,7 +87,7 @@ class TaskInfo {


virtual Status Release() { return SUCCESS; } virtual Status Release() { return SUCCESS; }


virtual cce::ccOpContext *GetCtx() { return nullptr; }
virtual ccOpContext *GetCtx() { return nullptr; }


virtual uint32_t GetTaskID() { return 0xFFFFFFFF; } virtual uint32_t GetTaskID() { return 0xFFFFFFFF; }




+ 1
- 1
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -15,7 +15,7 @@
*/ */


#include "aicore_node_executor.h" #include "aicore_node_executor.h"
#include "cce/taskdown_common.hpp"
#include "framework/common/taskdown_common.h"
#include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/hybrid_execution_context.h"


namespace ge { namespace ge {


+ 3
- 3
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -15,7 +15,7 @@
*/ */


#include "hybrid/node_executor/aicore/aicore_op_task.h" #include "hybrid/node_executor/aicore/aicore_op_task.h"
#include "cce/taskdown_common.hpp"
#include "framework/common/taskdown_common.h"
#include "framework/common/debug/log.h" #include "framework/common/debug/log.h"
#include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/hybrid_execution_context.h"
#include "hybrid/node_executor/aicore/aicore_task_builder.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h"
@@ -151,8 +151,8 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) {


const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context(); const domi::KernelContext &context = kernel_def.context();
auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
if (kernel_type != cce::ccKernelType::TE) {
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type != ccKernelType::TE) {
GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type)); GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type));
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


+ 5
- 5
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -15,7 +15,7 @@
*/ */


#include "hybrid/node_executor/aicpu/aicpu_node_executor.h" #include "hybrid/node_executor/aicpu/aicpu_node_executor.h"
#include "cce/taskdown_common.hpp"
#include "framework/common/taskdown_common.h"
#include "common/formats/formats.h" #include "common/formats/formats.h"
#include "aicpu/common/aicpu_task_struct.h" #include "aicpu/common/aicpu_task_struct.h"
#include "graph/load/new_model_manager/model_manager.h" #include "graph/load/new_model_manager/model_manager.h"
@@ -642,8 +642,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) {
const std::string &so_name = kernel_def.so_name(); const std::string &so_name = kernel_def.so_name();
const OpDescPtr op_desc = node_item_->GetOpDesc(); const OpDescPtr op_desc = node_item_->GetOpDesc();
const auto &context = kernel_def.context(); const auto &context = kernel_def.context();
auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
if (kernel_type == cce::ccKernelType::CUST_AI_CPU) {
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::CUST_AI_CPU) {
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed."); GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed.");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
} }
@@ -736,9 +736,9 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) {
const auto &so_name = task_def_.kernel().so_name(); const auto &so_name = task_def_.kernel().so_name();
const auto &kernel_name = task_def_.kernel().kernel_name(); const auto &kernel_name = task_def_.kernel().kernel_name();
const auto &kcontext = task_def_.kernel().context(); const auto &kcontext = task_def_.kernel().context();
auto kernel_type = static_cast<cce::ccKernelType>(kcontext.kernel_type());
auto kernel_type = static_cast<ccKernelType>(kcontext.kernel_type());
uint32_t flag = RT_KERNEL_DEFAULT; uint32_t flag = RT_KERNEL_DEFAULT;
if (kernel_type == cce::ccKernelType::CUST_AI_CPU) {
if (kernel_type == ccKernelType::CUST_AI_CPU) {
flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU); flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU);
} }
auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()), auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()),


+ 6
- 6
ge/single_op/single_op_model.cc View File

@@ -237,8 +237,8 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
if (task_type == RT_MODEL_TASK_KERNEL) { if (task_type == RT_MODEL_TASK_KERNEL) {
const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelDef &kernel_def = task_def.kernel();
const auto &context = kernel_def.context(); const auto &context = kernel_def.context();
auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
if (kernel_type == cce::ccKernelType::TE) {
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::TE) {
GELOGD("Building TBE task"); GELOGD("Building TBE task");
TbeOpTask *tbe_task = nullptr; TbeOpTask *tbe_task = nullptr;
auto ret = BuildKernelTask(task_def.kernel(), &tbe_task); auto ret = BuildKernelTask(task_def.kernel(), &tbe_task);
@@ -249,7 +249,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size());
ParseArgTable(tbe_task, single_op); ParseArgTable(tbe_task, single_op);
single_op.tasks_.emplace_back(tbe_task); single_op.tasks_.emplace_back(tbe_task);
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task"); GELOGD("Building AICPU_CC task");
OpTask *task = nullptr; OpTask *task = nullptr;
uint64_t singleop_kernel_id = aicpu_kernel_id++; uint64_t singleop_kernel_id = aicpu_kernel_id++;
@@ -388,13 +388,13 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelDef &kernel_def = task_def.kernel();
const auto &context = kernel_def.context(); const auto &context = kernel_def.context();
auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
if (kernel_type == cce::ccKernelType::TE) {
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::TE) {
GELOGD("Building TBE task"); GELOGD("Building TBE task");
TbeOpTask *tbe_task = nullptr; TbeOpTask *tbe_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
single_op.op_task_.reset(tbe_task); single_op.op_task_.reset(tbe_task);
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task"); GELOGD("Building AICPU_CC task");
OpTask *task = nullptr; OpTask *task = nullptr;
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;


+ 3
- 3
ge/single_op/task/aicpu_kernel_task_builder.cc View File

@@ -15,7 +15,7 @@
*/ */


#include "single_op/task/aicpu_kernel_task_builder.h" #include "single_op/task/aicpu_kernel_task_builder.h"
#include "cce/taskdown_common.hpp"
#include "framework/common/taskdown_common.h"
#include "graph/load/new_model_manager/model_manager.h" #include "graph/load/new_model_manager/model_manager.h"


namespace ge { namespace ge {
@@ -58,8 +58,8 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) {
task.op_desc_ = op_desc_; task.op_desc_ = op_desc_;


const auto &context = kernel_def_.context(); const auto &context = kernel_def_.context();
auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
if (kernel_type == cce::ccKernelType::CUST_AI_CPU) {
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::CUST_AI_CPU) {
task.is_custom_ = true; task.is_custom_ = true;
task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed");


+ 2
- 0
inc/external/ge/ge_api_types.h View File

@@ -369,6 +369,7 @@ static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str();
static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str();


// for interface: aclgrphBuildModel // for interface: aclgrphBuildModel
#ifdef __GNUC__
const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT, const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT,
INPUT_SHAPE, INPUT_SHAPE,
OP_NAME_MAP, OP_NAME_MAP,
@@ -424,6 +425,7 @@ const std::set<std::string> global_options = {CORE_TYPE,
DEBUG_DIR, DEBUG_DIR,
OP_COMPILER_CACHE_DIR, OP_COMPILER_CACHE_DIR,
OP_COMPILER_CACHE_MODE}; OP_COMPILER_CACHE_MODE};
#endif
} // namespace ir_option } // namespace ir_option
} // namespace ge } // namespace ge




+ 2
- 153
inc/framework/common/op/ge_op_utils.h View File

@@ -17,7 +17,6 @@
#ifndef INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ #ifndef INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_
#define INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ #define INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_


#include <cce/dnn.h>
#include <memory> #include <memory>
#include <vector> #include <vector>


@@ -32,7 +31,6 @@
#include "proto/insert_op.pb.h" #include "proto/insert_op.pb.h"


namespace ge { namespace ge {
using namespace cce;
using domi::Status; using domi::Status;


// Add Sub Mul // Add Sub Mul
@@ -76,18 +74,7 @@ class OpUtils {
static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) { static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) {
return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true); return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true);
} }
///
/// @ingroup domi_omg
/// @brief Convert the dimension of array according to different format
/// @param [in] src_format src_shape format
/// @param [in] src Dimension array to be converted
/// @param [in] dst_format Target format after conversion
/// @param [out] dst Dimension array after conversion
/// @return SUCCESS success
/// @return FAILED fail
///
static bool ConvertDim(ccTensorFormat_t src_format, const std::vector<int64_t> &src, ccTensorFormat_t dst_format,
std::vector<int64_t> &dst);

/// ///
/// @ingroup domi_omg /// @ingroup domi_omg
/// @brief Determine whether to manually calculate the tensor size based on the values of format and dim /// @brief Determine whether to manually calculate the tensor size based on the values of format and dim
@@ -97,73 +84,6 @@ class OpUtils {
/// @return false skip /// @return false skip
/// ///
static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt); static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt);
///
/// @ingroup domi_ome
/// @brief Initialize the tensor description, which is used for input and output.
/// @param [in] model_tensor Tensor information defined by the offline model
/// @param [out] cc_tensor Tensor definition used by CC
/// @return SUCCESS success
/// @return FAILED fail
///
static Status InitTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccTensorDescriptor_t &cc_tensor);
///
/// @ingroup domi_ome
/// @brief Initialize the tensor description, which is used for input and output.
/// @param [in] model_tensor Tensor information defined by the offline model
/// @param [in] dst_data_type data_type of the target cc_tensor
/// @param [out] cc_tensor Tensor definition used by CC
/// @return SUCCESS success
/// @return FAILED fail
///
static Status InitTensorDescriptor(const ge::GeTensorDesc &model_tensor, int32_t dst_data_type,
ccTensorDescriptor_t &cc_tensor);
///
/// @ingroup domi_ome
/// @brief Initialize the tensor description for bias.
/// @param [in] model_tensor Tensor information defined by the offline model
/// @param [out] cc_tensor Tensor definition used by CC
/// @return SUCCESS success
/// @return FAILED fail
///
///
static Status InitTensorDescriptor(const ge::GeTensor &model_tensor, ccTensorDescriptor_t &cc_tensor);
///
/// @ingroup domi_ome
/// @brief Initialize the tensor description for bias.
/// @param [in] model_tensor Tensor information defined by the offline model
/// @param [in] dst_data_type data_type of the target cc_tensor
/// @param [out] cc_tensor Tensor definition used by CC
/// @return SUCCESS success
/// @return FAILED fail
///
static Status InitTensorDescriptor(const ge::GeTensor &model_tensor, int32_t dst_data_type,
ccTensorDescriptor_t &cc_tensor);

static Status InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector<int64_t> &dim,
ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt = 4);
///
/// @ingroup domi_ome
/// @brief Destroys a tensor
/// @param [inout] cc_tensor Tensor definition used by CC
///
static void DestroyTensorDescriptor(ccTensorDescriptor_t &cc_tensor) noexcept;

///
/// @ingroup domi_ome
/// @brief Destroys a tensor
/// @param [inout] cc_filter cc_filter Definition of the filter used by CC
///
static void DestroyFilterDescriptor(ccFilterDescriptor_t &cc_filter);

///
/// @ingroup domi_ome
/// @brief Initializing Filter Description
/// @param [in] model_filter Filter information defined in the offline model
/// @param [out] cc_filter Definition of the filter used by CC
/// @return SUCCESS success
/// @return FAILED fail
///
static Status InitFilterDescriptor(const ge::GeTensor &model_filter, ccFilterDescriptor_t &cc_filter);


/// ///
/// @brief Extract AIPP parameters from AttrDefMap and splice them /// @brief Extract AIPP parameters from AttrDefMap and splice them
@@ -209,16 +129,7 @@ class OpUtils {
/// @param [out] output Data pointer after conversion. The format is HWCK /// @param [out] output Data pointer after conversion. The format is HWCK
/// ///
static void TransDataKCHW2HWCK(const void *input, int64_t K, int64_t C, int64_t H, int64_t W, void *output); static void TransDataKCHW2HWCK(const void *input, int64_t K, int64_t C, int64_t H, int64_t W, void *output);
///
/// @ingroup domi_omg
/// @brief Initialize the input and output description of the data node which is applied to filter weight in the
/// training network
/// @param [in] model_tensor input and output tensor information
/// @param [out] cc_tensor Tensor in CCE format after conversion
///
static Status InitFilterTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccFilterDescriptor_t &cc_tensor);

static void SetTensorDescriptorAllOffsetQuantizeInfo(const GeTensorDesc &tensor, ccTensorDescriptor_t cc_tensor);
static vector<ConstGeTensorPtr> GetWeights(const ge::Node &node); static vector<ConstGeTensorPtr> GetWeights(const ge::Node &node);
static vector<ConstGeTensorPtr> GetWeights(ge::ConstNodePtr node); static vector<ConstGeTensorPtr> GetWeights(ge::ConstNodePtr node);
static vector<GeTensorPtr> MutableWeights(const ge::Node &node); static vector<GeTensorPtr> MutableWeights(const ge::Node &node);
@@ -228,69 +139,7 @@ class OpUtils {
static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType type, std::vector<int64_t> &dims); static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType type, std::vector<int64_t> &dims);


private: private:
friend class CceTensorDescriptor;
static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc); static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc);
}; };

class CceTensorDescriptor;

using CceTensorDescriptorPtr = std::shared_ptr<CceTensorDescriptor>;

class CceTensorDescriptor {
public:
explicit CceTensorDescriptor(ccTensorDescriptor_t cc_tensor);
CceTensorDescriptor(const CceTensorDescriptor &) = delete;
CceTensorDescriptor &operator=(const CceTensorDescriptor &) = delete;

~CceTensorDescriptor();

ccTensorDescriptor_t GetPtr() { return cc_tensor_; }

///
/// @brief Initializes the tensor based on shape information.
/// @param[in] format data permutation format
/// @param[in] data_type Data Type
/// @param[in] dim dim information
/// @return return code
///
Status InitTensor(int32_t format, int32_t data_type, const std::vector<int64_t> &dims);

Status InitTensor(int32_t format, int32_t data_type, const ge::GeShape &shape);

///
/// @brief get format of tensor
/// @param[out] format format of the tensor
/// @return return code
///
Status GetFormat(ccTensorFormat_t *format);

///
/// @brief Obtains the size of the tensor.
/// @param[out] size size of Tensor
/// @return return code
///
Status GetTensorSizeInBytes(uint32_t *size);

///
/// @brief transform tensor between 4d(NCHW) and 5d(NC1HWC0)
/// @param [in] xDesc descriptor of input tensor
/// @param [in] x point to input data in host memory
/// @param [in] dataTypeTransmode mode of data type transform
/// @param [in] yDesc descriptor of output tensor
/// @param [in|out] y point to output data in host memory
/// @param [in] ySizeInBytes size of outputData
/// @return return code
///
static Status TransTensor(const ccTensorDescriptor_t xDesc, const void *x, const CceTensorDescriptorPtr &yDesc,
void *y, uint32_t ySizeInBytes);

///
/// @brief CceTensorDescriptor Static Constructor
/// @return CceTensorDescriptor smart pointer
///
static CceTensorDescriptorPtr Create();

ccTensorDescriptor_t cc_tensor_ = nullptr;
};
} // namespace ge } // namespace ge
#endif // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_ #endif // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_

+ 0
- 6
inc/framework/common/op/op_parser_util.h View File

@@ -17,7 +17,6 @@
#ifndef INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_ #ifndef INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_
#define INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_ #define INC_FRAMEWORK_COMMON_OP_OP_PARSER_UTIL_H_


#include <cce/dnn.h>
#include <limits.h> #include <limits.h>
#include <math.h> #include <math.h>
#include <stdint.h> #include <stdint.h>
@@ -31,10 +30,7 @@ const uint32_t NORMAL_OUTPUT_NUM = 1;
const uint32_t NORMAL_WORKSPACE_NUM = 0; const uint32_t NORMAL_WORKSPACE_NUM = 0;
const int32_t NORMAL_1D_DIM_NUM = 1; const int32_t NORMAL_1D_DIM_NUM = 1;
const int32_t NORMAL_SCALE_DIM_NUM = 0; const int32_t NORMAL_SCALE_DIM_NUM = 0;
const int NORMAL_TENSOR_FORMAT = static_cast<const int>(cce::CC_TENSOR_NC1HWC0);
const int NORMAL_TENSOR_SIZE = 4; const int NORMAL_TENSOR_SIZE = 4;
const int NORMAL_DEVICE_DATA_TYPE = static_cast<const int>(cce::CC_DATA_HALF);
const int DEFAULT_POOLING_MODE = static_cast<const int>(cce::CC_POOLING_MAX);
const uint32_t DEFAULT_REAL_DIM_CNT = 4; const uint32_t DEFAULT_REAL_DIM_CNT = 4;


// const // const
@@ -183,7 +179,6 @@ const int32_t SSD_DETECTIONOUTPUT_BACKGROUND_LABEL_ID_DEFAULT_VALUE = 0;
const float SSD_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3; const float SSD_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3;
const int32_t SSD_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200; const int32_t SSD_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200;
const float SSD_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0; const float SSD_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0;
const int SSD_DETECTIONOUTPUT_CODE_TYPE_DEFAULT_VALUE = static_cast<const int>(cce::CC_BOX_CENTER_SIZE);
const int32_t SSD_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200; const int32_t SSD_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200;
const bool SSD_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false; const bool SSD_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false;
const float SSD_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1; const float SSD_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1;
@@ -200,7 +195,6 @@ const float REFINEDET_DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3;
const int32_t REFINEDET_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200; const int32_t REFINEDET_DETECTIONOUTPUT_TOP_K_DEFAULT_VALUE = 200;
const float REFINEDET_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0; const float REFINEDET_DETECTIONOUTPUT_ETA_DEFAULT_VALUE = 1.0;
const bool REFINEDET_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false; const bool REFINEDET_DETECTIONOUTPUT_VARIANCE_ENCODED_IN_TARGET_DEFAULT_VALUE = false;
const int REFINEDET_DETECTIONOUTPUT_CODE_TYPE_DEFAULT_VALUE = static_cast<const int>(cce::CC_BOX_CENTER_SIZE);
const int32_t REFINEDET_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200; const int32_t REFINEDET_DETECTIONOUTPUT_KEEP_TOP_K_DEFAULT_VALUE = 200;
const float REFINEDET_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1; const float REFINEDET_DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.1;
const float REFINEDET_DETECTIONOUTPUT_OBJECTNESS_SCORE_DEFAULT_VALUE = 0; const float REFINEDET_DETECTIONOUTPUT_OBJECTNESS_SCORE_DEFAULT_VALUE = 0;


+ 73
- 0
inc/framework/common/taskdown_common.h View File

@@ -0,0 +1,73 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_
#define INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_

#include "runtime/rt.h"

using namespace std;

namespace ge {

#define CC_FUSION_OP_MAX 32

typedef enum tagCcStatus {
CC_STATUS_SUCCESS = 0, /**< succ */
CC_STATUS_NOT_INITIALIZED = 1, /**< not init */
CC_STATUS_ALLOC_FAILED = 2, /**< alloc mem failed */
CC_STATUS_BAD_PARAM = 3, /**< para check failed */
CC_STATUS_INTERNAL_ERROR = 4, /**< internal error */
CC_STATUS_KERNEL_ERROR = 5, /**< kernel error */
CC_STATUS_RUNTIME_ERROR = 6, /**< runtime error */
CC_STATUS_NOT_SUPPORTED = 7, /**< unsupport error */
CC_STATUS_INVALID_VALUE = 7, /**< invalid value error for blas*/
CC_STATUS_RESERVED /**< just for check */
} ccStatus_t;

typedef enum tagccKernelType {
CCE_AI_CORE = 0, /* cce aicore */
CCE_AI_CPU = 1, /* cce aicpu */
TE = 2, /* te operator*/
CUSTOMIZED = 3, /* customized operator */
TE_AI_CORE = 4, /* te aicore operator*/
TE_AI_CPU = 5, /* te aicpu operator */
AI_CPU = 6, /* aicpu */
CUST_AI_CPU = 7, /* custom aicpu*/
INVALID = 8, /* unknown kernel type */
} ccKernelType;

typedef struct tagOpContext {
ccKernelType kernelType;
uint32_t opId;
uint32_t kernelFuncId;
uint32_t opIndex;
uint32_t opCount;
uint32_t opIndex2[CC_FUSION_OP_MAX];
bool isFlowtable;
uint16_t *argsOffset;
uint32_t argsCount;
uint64_t genDataBaseAddr;
uint64_t genDataBaseSize;
uint64_t genWeightBaseAddr;
uint64_t genWeightBaseSize;
uint64_t genVariableBaseAddr;
uint64_t genVariableBaseSize;
uint64_t l2ctrlSize;
} ccOpContext;
} // namespace ge

#endif // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_

Loading…
Cancel
Save