@@ -46,7 +46,12 @@ namespace { | |||||
const size_t kDataOutputNum = 1; | const size_t kDataOutputNum = 1; | ||||
const uint32_t kInputIndexOfData = 0; | const uint32_t kInputIndexOfData = 0; | ||||
const uint32_t kOutputIndexOfData = 0; | const uint32_t kOutputIndexOfData = 0; | ||||
const size_t kNumTaskWithAtomicAddrCleanTask = 2; | |||||
const size_t kNumTaskWithMemCpyTask = 2; | |||||
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | ||||
const char *const kEngineNameAiCore = "AIcoreEngine"; | |||||
const char *const kEngineNameAiCpu = "aicpu_ascend_kernel"; | |||||
const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; | |||||
Status CheckHostMem(const std::vector<string> &dependencies, const NodePtr &node, bool &is_host_mem) { | Status CheckHostMem(const std::vector<string> &dependencies, const NodePtr &node, bool &is_host_mem) { | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
@@ -395,7 +400,7 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) { | |||||
} | } | ||||
} | } | ||||
} | } | ||||
Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) { | Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) { | ||||
GE_CHECK_NOTNULL(task); | GE_CHECK_NOTNULL(task); | ||||
auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
@@ -408,7 +413,7 @@ Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask * | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | return ACL_ERROR_GE_INTERNAL_ERROR; | ||||
} | } | ||||
auto *tbe_task = new (std::nothrow) TbeOpTask(); | |||||
std::unique_ptr<TbeOpTask> tbe_task(new (std::nothrow) TbeOpTask()); | |||||
if (tbe_task == nullptr) { | if (tbe_task == nullptr) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][TbeOpTask]failed."); | GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][TbeOpTask]failed."); | ||||
REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new TbeOpTask."); | REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new TbeOpTask."); | ||||
@@ -418,12 +423,41 @@ Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask * | |||||
auto builder = TbeTaskBuilder(model_name_, iter->second, task_def); | auto builder = TbeTaskBuilder(model_name_, iter->second, task_def); | ||||
auto ret = builder.BuildTask(*tbe_task, model_params_); | auto ret = builder.BuildTask(*tbe_task, model_params_); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
delete tbe_task; | |||||
tbe_task = nullptr; | |||||
GELOGE(ret, "[Build][TbeOpTask]failed."); | |||||
REPORT_INNER_ERROR("E19999", "[Build][TbeOpTask]failed."); | |||||
return ret; | |||||
} | |||||
*task = tbe_task.release(); | |||||
return SUCCESS; | |||||
} | |||||
Status SingleOpModel::BuildAtomicTask(const domi::TaskDef &task_def, AtomicAddrCleanOpTask **task) { | |||||
GE_CHECK_NOTNULL(task); | |||||
const auto &context = task_def.kernel().context(); | |||||
auto iter = op_list_.find(context.op_index()); | |||||
if (iter == op_list_.end()) { | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Param:TaskDef]op desc not found. op index = %u", context.op_index()); | |||||
REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for op desc not found. op index = %u", context.op_index()); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | |||||
std::unique_ptr<AtomicAddrCleanOpTask> atomic_task(new (std::nothrow) AtomicAddrCleanOpTask()); | |||||
if (atomic_task == nullptr) { | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AtomicAddrCleanOpTask]failed."); | |||||
REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new AtomicAddrCleanOpTask."); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | |||||
auto builder = AtomicAddrCleanTaskBuilder(model_name_, iter->second, task_def); | |||||
auto ret = builder.BuildTask(*atomic_task, model_params_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "[Build][AtomicAddrCleanOpTask]failed."); | |||||
REPORT_INNER_ERROR("E19999", "[Build][AtomicAddrCleanOpTask]failed."); | |||||
return ret; | return ret; | ||||
} | } | ||||
*task = tbe_task; | |||||
*task = atomic_task.release(); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -536,9 +570,29 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, | |||||
auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | ||||
GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
single_op.compute_graph_ = compute_graph; | single_op.compute_graph_ = compute_graph; | ||||
if (tbe_tasks_.size() > 0) { | |||||
const auto &task_def = tbe_tasks_[0]; | |||||
if (node_tasks_.size() != 1) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size()); | |||||
REPORT_INNER_ERROR("E19999", "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size()); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | |||||
auto iter = node_tasks_.begin(); | |||||
auto node = iter->first; | |||||
const auto &task_defs = iter->second; | |||||
if (task_defs.size() <= 0 || task_defs.size() > kNumTaskWithAtomicAddrCleanTask) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size()); | |||||
REPORT_INNER_ERROR("E19999", "[Check][Size]task_defs size must be 1 or 2, but get %zu.", task_defs.size()); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | |||||
GE_CHECK_NOTNULL(node); | |||||
auto op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
const auto &lib_name = op_desc->GetOpKernelLibName(); | |||||
if (lib_name == kEngineNameAiCore) { | |||||
GELOGD("Building TBE task."); | GELOGD("Building TBE task."); | ||||
const auto &task_def = task_defs.back(); | |||||
TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | ||||
tbe_task->SetModelArgs(model_name_, model_id_); | tbe_task->SetModelArgs(model_name_, model_id_); | ||||
@@ -546,37 +600,43 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, | |||||
GELOGD("tiling buffer is not nullptr."); | GELOGD("tiling buffer is not nullptr."); | ||||
tbe_task->stream_resource_ = stream_resource; | tbe_task->stream_resource_ = stream_resource; | ||||
} | } | ||||
if (task_defs.size() == kNumTaskWithAtomicAddrCleanTask) { | |||||
const auto &atomic_task_def = task_defs.front(); | |||||
AtomicAddrCleanOpTask *atomic_task = nullptr; | |||||
GE_CHK_STATUS_RET_NOLOG(BuildAtomicTask(atomic_task_def, &atomic_task)); | |||||
GE_CHK_STATUS_RET_NOLOG(atomic_task->InitAtomicAddrCleanIndices()); | |||||
tbe_task->SetAtomicAddrCleanTask(atomic_task); | |||||
} | |||||
single_op.op_task_.reset(tbe_task); | single_op.op_task_.reset(tbe_task); | ||||
} else if (aicpu_tasks_.size() > 0) { | |||||
const auto &task_def = aicpu_tasks_[0]; | |||||
auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
GELOGD("Building AICPU_CC task"); | |||||
OpTask *task = nullptr; | |||||
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | |||||
GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); | |||||
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); | |||||
task->SetModelArgs(model_name_, model_id_); | |||||
single_op.op_task_.reset(task); | |||||
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||||
GELOGD("Building AICPU_TF task"); | |||||
AiCpuTask *aicpu_task = nullptr; | |||||
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | |||||
GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); | |||||
GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, dynamic_singleop_kernel_id)); | |||||
if (aicpu_task->GetUnknownType() == DEPEND_COMPUTE) { | |||||
if (aicpu_tasks_.size() < 2) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); | |||||
REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | |||||
const TaskDef ©_task_def = aicpu_tasks_[1]; | |||||
GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | |||||
} else if (lib_name == kEngineNameAiCpu) { | |||||
const auto &task_def = task_defs[0]; | |||||
GELOGD("Building AICPU_CC task"); | |||||
OpTask *task = nullptr; | |||||
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | |||||
GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); | |||||
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); | |||||
task->SetModelArgs(model_name_, model_id_); | |||||
single_op.op_task_.reset(task); | |||||
} else if (lib_name == kEngineNameAiCpuTf) { | |||||
const auto &task_def = task_defs[0]; | |||||
GELOGD("Building AICPU_TF task"); | |||||
AiCpuTask *aicpu_task = nullptr; | |||||
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | |||||
GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); | |||||
GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, dynamic_singleop_kernel_id)); | |||||
if (aicpu_task->GetUnknownType() == DEPEND_COMPUTE) { | |||||
if (task_defs.size() < kNumTaskWithMemCpyTask) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); | |||||
REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | } | ||||
aicpu_task->SetModelArgs(model_name_, model_id_); | |||||
single_op.op_task_.reset(aicpu_task); | |||||
const TaskDef ©_task_def = task_defs[1]; | |||||
GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | |||||
} | } | ||||
aicpu_task->SetModelArgs(model_name_, model_id_); | |||||
single_op.op_task_.reset(aicpu_task); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -585,9 +645,7 @@ Status SingleOpModel::NeedHybridModel(GeModelPtr &ge_model, bool &need_hybrid_mo | |||||
bool is_host_mem = false; | bool is_host_mem = false; | ||||
GE_CHK_STATUS_RET(CheckInferDepend(ge_model, is_infer_depend, is_host_mem), "[Check][InferDepend] failed."); | GE_CHK_STATUS_RET(CheckInferDepend(ge_model, is_infer_depend, is_host_mem), "[Check][InferDepend] failed."); | ||||
bool need_d2h_cpy = is_infer_depend && !is_host_mem; | bool need_d2h_cpy = is_infer_depend && !is_host_mem; | ||||
bool aicpu_multi_task = tbe_tasks_.size() >= 1 && aicpu_tasks_.size() >= 1; | |||||
bool aicore_multi_task = tbe_tasks_.size() > 1; | |||||
need_hybrid_model = need_d2h_cpy || aicore_multi_task || aicpu_multi_task; | |||||
need_hybrid_model = need_d2h_cpy || node_tasks_.size() > 1; | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -601,31 +659,27 @@ Status SingleOpModel::ParseTasks() { | |||||
GELOGI("[%s] Task[%d], type = [%u], DebugString = [%s]", model_name_.c_str(), i, task_def.type(), | GELOGI("[%s] Task[%d], type = [%u], DebugString = [%s]", model_name_.c_str(), i, task_def.type(), | ||||
task_def.DebugString().c_str()); | task_def.DebugString().c_str()); | ||||
auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
uint32_t op_index = 0; | |||||
if (task_type == RT_MODEL_TASK_KERNEL) { | if (task_type == RT_MODEL_TASK_KERNEL) { | ||||
const auto &kernel_def = task_def.kernel(); | |||||
const auto &context = kernel_def.context(); | |||||
auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||||
if (kernel_type == ccKernelType::TE) { | |||||
tbe_tasks_.emplace_back(task_def); | |||||
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||||
aicpu_tasks_.emplace_back(task_def); | |||||
} else { | |||||
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, | |||||
"[Check][Param:TaskDef]Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", | |||||
context.kernel_type()); | |||||
REPORT_INNER_ERROR("E19999", | |||||
"BuildModelTaskKernel fail for got:%u not supported, Only TBE, AI_CPU, CUST_AI_CPU kernel are supported.", | |||||
context.kernel_type()); | |||||
return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID; | |||||
} | |||||
} else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
tbe_tasks_.emplace_back(task_def); | |||||
op_index = task_def.kernel().context().op_index(); | |||||
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | ||||
aicpu_tasks_.emplace_back(task_def); | |||||
op_index = task_def.kernel_ex().op_index(); | |||||
} else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
op_index = task_def.kernel_with_handle().context().op_index(); | |||||
} else { | } else { | ||||
// skip | |||||
GELOGD("Skip task type: %d", static_cast<int>(task_type)); | GELOGD("Skip task type: %d", static_cast<int>(task_type)); | ||||
continue; | |||||
} | |||||
GELOGD("op_index = %u, task_type = %d", op_index, task_type); | |||||
auto iter = op_list_.find(op_index); | |||||
if (iter == op_list_.end()) { | |||||
GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by op_index = %u", op_index); | |||||
REPORT_INNER_ERROR("E19999", "Failed to get node by op_index = %u.", op_index); | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
auto &node = iter->second; | |||||
node_tasks_[node].emplace_back(task_def); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -69,6 +69,7 @@ class SingleOpModel { | |||||
Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | ||||
Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); | Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); | ||||
Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | ||||
Status BuildAtomicTask(const domi::TaskDef &task_def, AtomicAddrCleanOpTask **task); | |||||
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, uint64_t kernel_id); | Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, uint64_t kernel_id); | ||||
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | ||||
@@ -79,9 +80,7 @@ class SingleOpModel { | |||||
Status NeedHybridModel(GeModelPtr &ge_model, bool &flag); | Status NeedHybridModel(GeModelPtr &ge_model, bool &flag); | ||||
Status ParseTasks(); | Status ParseTasks(); | ||||
std::vector<domi::TaskDef> tbe_tasks_; | |||||
std::vector<domi::TaskDef> aicpu_tasks_; | |||||
std::map<NodePtr, std::vector<domi::TaskDef>> node_tasks_; | |||||
std::string model_name_; | std::string model_name_; | ||||
uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
const void *ori_model_data_; | const void *ori_model_data_; | ||||
@@ -27,7 +27,6 @@ | |||||
#include "common/formats/formats.h" | #include "common/formats/formats.h" | ||||
#include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include "register/op_tiling.h" | |||||
#include "runtime/rt.h" | #include "runtime/rt.h" | ||||
#include "single_op/task/build_task_utils.h" | #include "single_op/task/build_task_utils.h" | ||||
@@ -222,19 +221,26 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status TbeOpTask::UpdateRunInfo() { | |||||
// invoke OpParaCalculate | |||||
GELOGD("Start to invoke OpParaCalculate."); | |||||
optiling::utils::OpRunInfo run_info(0, true, 0); | |||||
Status TbeOpTask::CalcTilingInfo(optiling::utils::OpRunInfo &run_info) { | |||||
auto ret = optiling::OpParaCalculateV2(*node_, run_info); | auto ret = optiling::OpParaCalculateV2(*node_, run_info); | ||||
if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpParaCalculate] failed, ret = %u.", ret); | GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpParaCalculate] failed, ret = %u.", ret); | ||||
REPORT_INNER_ERROR("E19999", "invoke OpParaCalculate failed, ret = %u.", ret); | REPORT_INNER_ERROR("E19999", "invoke OpParaCalculate failed, ret = %u.", ret); | ||||
return ACL_ERROR_GE_INTERNAL_ERROR; | return ACL_ERROR_GE_INTERNAL_ERROR; | ||||
} | } | ||||
return SUCCESS; | |||||
} | |||||
Status TbeOpTask::UpdateRunInfo() { | |||||
// invoke OpParaCalculate | |||||
GELOGD("Start to invoke OpParaCalculate."); | |||||
optiling::utils::OpRunInfo run_info(0, true, 0); | |||||
GE_CHK_STATUS_RET(CalcTilingInfo(run_info), "[Calc][TilingInfo]failed."); | |||||
block_dim_ = run_info.GetBlockDim(); | block_dim_ = run_info.GetBlockDim(); | ||||
tiling_data_ = run_info.GetAllTilingData().str(); | tiling_data_ = run_info.GetAllTilingData().str(); | ||||
tiling_key_ = run_info.GetTilingKey(); | tiling_key_ = run_info.GetTilingKey(); | ||||
clear_atomic_ = run_info.GetClearAtomic(); | |||||
run_info.GetAllWorkspaces(run_info_workspaces_); | run_info.GetAllWorkspaces(run_info_workspaces_); | ||||
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, | GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, | ||||
tiling_data_.size(), tiling_key_); | tiling_data_.size(), tiling_key_); | ||||
@@ -262,7 +268,6 @@ Status TbeOpTask::UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc | |||||
dst_tensor.SetShape(GeShape(std::move(storage_shape))); | dst_tensor.SetShape(GeShape(std::move(storage_shape))); | ||||
dst_tensor.SetOriginShape(src_tensor.GetShape()); | dst_tensor.SetOriginShape(src_tensor.GetShape()); | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -346,6 +351,17 @@ Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status TbeOpTask::CheckAndExecuteAtomic(const vector<GeTensorDesc> &input_desc, | |||||
const vector<DataBuffer> &input_buffers, | |||||
vector<GeTensorDesc> &output_desc, | |||||
vector<DataBuffer> &output_buffers, | |||||
rtStream_t stream) { | |||||
if (clear_atomic_ && atomic_task_ != nullptr) { | |||||
return atomic_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status TbeOpTask::UpdateTilingArgs(rtStream_t stream) { | Status TbeOpTask::UpdateTilingArgs(rtStream_t stream) { | ||||
size_t args_size = input_num_ + output_num_ + workspaces_.size(); | size_t args_size = input_num_ + output_num_ + workspaces_.size(); | ||||
if (tiling_buffer_ != nullptr) { | if (tiling_buffer_ != nullptr) { | ||||
@@ -433,6 +449,8 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | ||||
GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); | GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); | ||||
GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); | GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); | ||||
GE_CHK_STATUS_RET(CheckAndExecuteAtomic(input_desc, input_buffers, output_desc, output_buffers, stream), | |||||
"[Execute][AtomicTask] failed."); | |||||
GE_CHK_STATUS_RET(UpdateTilingArgs(stream), "[Update][TilingArgs] failed."); | GE_CHK_STATUS_RET(UpdateTilingArgs(stream), "[Update][TilingArgs] failed."); | ||||
GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | ||||
@@ -463,6 +481,85 @@ void TbeOpTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | |||||
} | } | ||||
} | } | ||||
Status AtomicAddrCleanOpTask::UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | |||||
const vector<GeTensorDesc> &output_desc) { | |||||
return SUCCESS; | |||||
} | |||||
Status AtomicAddrCleanOpTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) { | |||||
uintptr_t *arg_base = reinterpret_cast<uintptr_t *>(args_.get()); | |||||
for (auto atomic_output_index : atomic_output_indices_) { | |||||
if (atomic_output_index >= static_cast<int>(outputs.size())) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Update][Args] failed, atomic index must smaller then data size."); | |||||
REPORT_INNER_ERROR("E19999", "[Update][Args] failed, atomic index must smaller then data size."); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | |||||
auto &output_buffer = outputs[atomic_output_index]; | |||||
*arg_base++ = reinterpret_cast<uintptr_t>(output_buffer.data); | |||||
auto tensor_desc = op_desc_->MutableOutputDesc(atomic_output_index); | |||||
int64_t size = 0; | |||||
graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, size); | |||||
if (graph_status != GRAPH_SUCCESS) { | |||||
REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed!"); | |||||
GELOGE(graph_status, "[Get][TensorMemorySize] In Bytes failed!"); | |||||
return FAILED; | |||||
} | |||||
TensorUtils::SetSize(*tensor_desc, size); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status AtomicAddrCleanOpTask::UpdateTilingArgs(rtStream_t stream) { | |||||
if (tiling_buffer_ != nullptr) { | |||||
GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size()); | |||||
GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_, max_tiling_size_, tiling_data_.data(), tiling_data_.size(), | |||||
RT_MEMCPY_HOST_TO_DEVICE_EX, stream)); | |||||
uintptr_t *arg_base = reinterpret_cast<uintptr_t *>(args_.get()); | |||||
size_t idx = atomic_output_indices_.size(); | |||||
arg_base[idx] = reinterpret_cast<uintptr_t>(tiling_buffer_); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status AtomicAddrCleanOpTask::CalcTilingInfo(optiling::utils::OpRunInfo &run_info) { | |||||
auto ret = optiling::OpAtomicCalculateV2(*node_, run_info); | |||||
if (ret != GRAPH_SUCCESS) { | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpAtomicCalculate] failed, ret = %u.", ret); | |||||
REPORT_INNER_ERROR("E19999", "invoke OpAtomicCalculate failed, ret = %u.", ret); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices() { | |||||
GELOGD("[%s] Start to setup AtomicAddrClean task.", op_desc_->GetName().c_str()); | |||||
std::vector<int64_t> atomic_output_indices; | |||||
(void) ge::AttrUtils::GetListInt(op_desc_, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); | |||||
if (atomic_output_indices.empty()) { | |||||
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] atomic_output_indices must not be empty.", op_desc_->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "[%s] atomic_output_indices must not be empty.", op_desc_->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
size_t max_arg_size = tiling_buffer_ == nullptr ? arg_size_ : arg_size_ - 1; | |||||
if (atomic_output_indices.size() > max_arg_size) { | |||||
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] atomic_output_indices invalid. atomic_output_indices size is %zu," | |||||
"arg size is %zu.", op_desc_->GetName().c_str(), atomic_output_indices.size(), arg_size_); | |||||
REPORT_INNER_ERROR("E19999", "[%s] atomic_output_indices invalid. atomic_output_indices size is %zu," | |||||
"arg size is %zu.", op_desc_->GetName().c_str(), atomic_output_indices.size(), arg_size_); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
for (auto output_index : atomic_output_indices) { | |||||
GELOGD("[%s] Adding output index [%ld]", op_desc_->GetName().c_str(), output_index); | |||||
GE_CHECK_GE(output_index, 0); | |||||
GE_CHECK_LE(output_index, INT32_MAX); | |||||
atomic_output_indices_.emplace_back(static_cast<int>(output_index)); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
AiCpuBaseTask::~AiCpuBaseTask() { | AiCpuBaseTask::~AiCpuBaseTask() { | ||||
if (ext_info_addr_dev_ != nullptr) { | if (ext_info_addr_dev_ != nullptr) { | ||||
(void)rtFree(ext_info_addr_dev_); | (void)rtFree(ext_info_addr_dev_); | ||||
@@ -89,6 +89,7 @@ class TbeOpTask : public OpTask { | |||||
void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); | void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); | ||||
void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | ||||
const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | ||||
void SetAtomicAddrCleanTask(OpTask *task) { atomic_task_.reset(task); } | |||||
Status UpdateRunInfo() override; | Status UpdateRunInfo() override; | ||||
Status SetArgIndex(); | Status SetArgIndex(); | ||||
@@ -100,38 +101,63 @@ class TbeOpTask : public OpTask { | |||||
const std::string &GetTaskType() const override; | const std::string &GetTaskType() const override; | ||||
void SetHandle(void *handle); | void SetHandle(void *handle); | ||||
protected: | |||||
NodePtr node_; | |||||
std::unique_ptr<uint8_t[]> args_; | |||||
size_t arg_size_ = 0; | |||||
void *tiling_buffer_ = nullptr; | |||||
uint32_t max_tiling_size_ = 0; | |||||
std::string tiling_data_; | |||||
size_t input_num_; // include const input | |||||
size_t output_num_; | |||||
private: | private: | ||||
friend class SingleOpModel; | friend class SingleOpModel; | ||||
friend class TbeTaskBuilder; | friend class TbeTaskBuilder; | ||||
static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); | static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); | ||||
Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | |||||
const vector<GeTensorDesc> &output_desc); | |||||
Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); | Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); | ||||
Status UpdateTilingArgs(rtStream_t stream); | |||||
Status DoLaunchKernel(rtStream_t stream); | Status DoLaunchKernel(rtStream_t stream); | ||||
Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs); | |||||
Status CheckAndExecuteAtomic(const vector<GeTensorDesc> &input_desc, | |||||
const vector<DataBuffer> &input_buffers, | |||||
vector<GeTensorDesc> &output_desc, | |||||
vector<DataBuffer> &output_buffers, | |||||
rtStream_t stream); | |||||
virtual Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | |||||
const vector<GeTensorDesc> &output_desc); | |||||
virtual Status UpdateTilingArgs(rtStream_t stream); | |||||
virtual Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs); | |||||
virtual Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info); | |||||
const void *stub_func_ = nullptr; | const void *stub_func_ = nullptr; | ||||
std::unique_ptr<uint8_t[]> args_; | |||||
size_t arg_size_ = 0; | |||||
void *sm_desc_ = nullptr; | void *sm_desc_ = nullptr; | ||||
std::string stub_name_; | std::string stub_name_; | ||||
StreamResource *stream_resource_ = nullptr; | StreamResource *stream_resource_ = nullptr; | ||||
void *tiling_buffer_ = nullptr; | |||||
uint32_t max_tiling_size_ = 0; | |||||
std::string tiling_data_; | |||||
std::vector<int64_t> run_info_workspaces_; | std::vector<int64_t> run_info_workspaces_; | ||||
std::vector<void *> workspaces_; | std::vector<void *> workspaces_; | ||||
NodePtr node_; | |||||
uint32_t tiling_key_ = 0; | uint32_t tiling_key_ = 0; | ||||
bool clear_atomic_ = false; | |||||
void* handle_ = nullptr; | void* handle_ = nullptr; | ||||
std::string original_kernel_key_; | std::string original_kernel_key_; | ||||
std::string node_info_; | std::string node_info_; | ||||
std::vector<size_t> arg_index_; // data index in args | std::vector<size_t> arg_index_; // data index in args | ||||
size_t input_num_; // include const input | |||||
size_t output_num_; | |||||
std::unique_ptr<OpTask> atomic_task_; | |||||
}; | |||||
class AtomicAddrCleanOpTask : public TbeOpTask { | |||||
public: | |||||
Status InitAtomicAddrCleanIndices(); | |||||
private: | |||||
Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | |||||
const vector<GeTensorDesc> &output_desc) override; | |||||
Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) override; | |||||
Status UpdateTilingArgs(rtStream_t stream) override; | |||||
Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info) override; | |||||
std::vector<int> atomic_output_indices_; | |||||
}; | }; | ||||
class AiCpuBaseTask : public OpTask { | class AiCpuBaseTask : public OpTask { | ||||
@@ -29,15 +29,8 @@ namespace ge { | |||||
namespace { | namespace { | ||||
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | ||||
constexpr char const *kAttrOpParamSize = "op_para_size"; | constexpr char const *kAttrOpParamSize = "op_para_size"; | ||||
constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | |||||
std::mutex g_reg_mutex; | std::mutex g_reg_mutex; | ||||
inline void GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) { | |||||
(void)AttrUtils::GetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | |||||
} | |||||
inline TBEKernelPtr GetTbeKernel(const OpDescPtr &op_desc) { | |||||
return op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||||
} | |||||
} // namespace | } // namespace | ||||
KernelHolder::KernelHolder(const char *stub_func, std::shared_ptr<ge::OpKernelBin> kernel_bin) | KernelHolder::KernelHolder(const char *stub_func, std::shared_ptr<ge::OpKernelBin> kernel_bin) | ||||
@@ -96,7 +89,15 @@ TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &nod | |||||
task_def_(task_def), | task_def_(task_def), | ||||
kernel_def_(task_def.kernel()), | kernel_def_(task_def.kernel()), | ||||
kernel_def_with_handle_(task_def.kernel_with_handle()), | kernel_def_with_handle_(task_def.kernel_with_handle()), | ||||
stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {} | |||||
model_name_(model_name) {} | |||||
TBEKernelPtr TbeTaskBuilder::GetTbeKernel(const OpDescPtr &op_desc) const { | |||||
return op_desc->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||||
} | |||||
void TbeTaskBuilder::GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const { | |||||
(void)AttrUtils::GetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | |||||
} | |||||
Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, | Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, | ||||
const SingleOpModelParam ¶m) const { | const SingleOpModelParam ¶m) const { | ||||
@@ -124,7 +125,7 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi | |||||
Status TbeTaskBuilder::DoRegisterMeta(void *bin_handle) { | Status TbeTaskBuilder::DoRegisterMeta(void *bin_handle) { | ||||
std::string meta_data; | std::string meta_data; | ||||
(void)AttrUtils::GetStr(op_desc_, TVM_ATTR_NAME_METADATA, meta_data); | |||||
(void)AttrUtils::GetStr(op_desc_, GetKeyForTvmMetaData(), meta_data); | |||||
GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | ||||
if (!meta_data.empty()) { | if (!meta_data.empty()) { | ||||
auto rt_ret = rtMetadataRegister(bin_handle, meta_data.c_str()); | auto rt_ret = rtMetadataRegister(bin_handle, meta_data.c_str()); | ||||
@@ -307,6 +308,15 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status TbeTaskBuilder::InitKernelArgs(void *arg_addr, size_t arg_size, const SingleOpModelParam ¶m) { | |||||
// copy args | |||||
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||||
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||||
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||||
GE_CHK_RT_RET(rtMemcpy(arg_addr, arg_size, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST)); | |||||
return SUCCESS; | |||||
} | |||||
Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc) { | Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc) { | ||||
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | ||||
bool is_task_all_kernel = (task_type == RT_MODEL_TASK_ALL_KERNEL); | bool is_task_all_kernel = (task_type == RT_MODEL_TASK_ALL_KERNEL); | ||||
@@ -331,12 +341,7 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & | |||||
kernel_def_with_handle_.context() : kernel_def_.context(); | kernel_def_with_handle_.context() : kernel_def_.context(); | ||||
const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | ||||
uint16_t offset = *args_offset_tmp; | uint16_t offset = *args_offset_tmp; | ||||
// copy args | |||||
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||||
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||||
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||||
GE_CHK_RT_RET(rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST)); | |||||
GE_CHK_STATUS_RET_NOLOG(InitKernelArgs(args.get() + offset, arg_size - offset, param)); | |||||
if (is_task_all_kernel) { | if (is_task_all_kernel) { | ||||
task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | ||||
@@ -367,8 +372,15 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶ | |||||
} | } | ||||
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | ||||
ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : | |||||
RegisterKernel(task, param); | |||||
if (task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
stub_name_ = model_name_ + "/" + node_->GetName() + "_tvmbin"; | |||||
ret = RegisterKernelWithHandle(task, param); | |||||
} else { | |||||
const domi::KernelDef &kernel_def = task_def_.kernel(); | |||||
stub_name_ = model_name_ + "/" + kernel_def.stub_func() + "_tvmbin"; | |||||
ret = RegisterKernel(task, param); | |||||
} | |||||
task.SetHandle(handle_); | task.SetHandle(handle_); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
@@ -397,8 +409,8 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶ | |||||
Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | ||||
GELOGD("Start alloc tiling data of node %s.", op_desc_->GetName().c_str()); | GELOGD("Start alloc tiling data of node %s.", op_desc_->GetName().c_str()); | ||||
int64_t max_size = -1; | int64_t max_size = -1; | ||||
(void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size); | |||||
GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size); | |||||
(void)AttrUtils::GetInt(op_desc_, GetKeyForOpParamSize(), max_size); | |||||
GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); | |||||
if (max_size < 0) { | if (max_size < 0) { | ||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Get][Int] %s Invalid op_param_size: %ld.", | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Get][Int] %s Invalid op_param_size: %ld.", | ||||
op_desc_->GetName().c_str(), max_size); | op_desc_->GetName().c_str(), max_size); | ||||
@@ -439,4 +451,32 @@ Status TbeTaskBuilder::GetMagic(uint32_t &magic) const { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
std::string TbeTaskBuilder::GetKeyForOpParamSize() const { | |||||
return kAttrOpParamSize; | |||||
} | |||||
std::string TbeTaskBuilder::GetKeyForTvmMetaData() const { | |||||
return TVM_ATTR_NAME_METADATA; | |||||
} | |||||
Status AtomicAddrCleanTaskBuilder::InitKernelArgs(void *args_addr, size_t arg_size, const SingleOpModelParam ¶m) { | |||||
return SUCCESS; | |||||
} | |||||
std::string AtomicAddrCleanTaskBuilder::GetKeyForOpParamSize() const { | |||||
return kAttrAtomicOpParamSize; | |||||
} | |||||
std::string AtomicAddrCleanTaskBuilder::GetKeyForTvmMetaData() const { | |||||
return ATOMIC_ATTR_TVM_METADATA; | |||||
} | |||||
void AtomicAddrCleanTaskBuilder::GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const { | |||||
(void)AttrUtils::GetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name); | |||||
} | |||||
TBEKernelPtr AtomicAddrCleanTaskBuilder::GetTbeKernel(const OpDescPtr &op_desc) const { | |||||
return op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, TBEKernelPtr()); | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -90,10 +90,17 @@ class HandleRegistry { | |||||
class TbeTaskBuilder { | class TbeTaskBuilder { | ||||
public: | public: | ||||
TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def); | TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def); | ||||
~TbeTaskBuilder() = default; | |||||
virtual ~TbeTaskBuilder() = default; | |||||
Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); | Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); | ||||
protected: | |||||
virtual std::string GetKeyForOpParamSize() const; | |||||
virtual std::string GetKeyForTvmMetaData() const; | |||||
virtual TBEKernelPtr GetTbeKernel(const OpDescPtr &op_desc) const; | |||||
virtual void GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const; | |||||
virtual Status InitKernelArgs(void *args_addr, size_t arg_size, const SingleOpModelParam ¶m); | |||||
private: | private: | ||||
Status InitTilingInfo(TbeOpTask &task); | Status InitTilingInfo(TbeOpTask &task); | ||||
Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | ||||
@@ -114,9 +121,24 @@ class TbeTaskBuilder { | |||||
const domi::TaskDef &task_def_; | const domi::TaskDef &task_def_; | ||||
const domi::KernelDef &kernel_def_; | const domi::KernelDef &kernel_def_; | ||||
const domi::KernelDefWithHandle &kernel_def_with_handle_; | const domi::KernelDefWithHandle &kernel_def_with_handle_; | ||||
const std::string stub_name_; | |||||
const std::string model_name_; | |||||
std::string stub_name_; | |||||
void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
}; | }; | ||||
class AtomicAddrCleanTaskBuilder : public TbeTaskBuilder { | |||||
public: | |||||
AtomicAddrCleanTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def) | |||||
: TbeTaskBuilder(model_name, node, task_def) {} | |||||
~AtomicAddrCleanTaskBuilder() override = default; | |||||
protected: | |||||
std::string GetKeyForOpParamSize() const override; | |||||
std::string GetKeyForTvmMetaData() const override; | |||||
TBEKernelPtr GetTbeKernel(const OpDescPtr &op_desc) const override; | |||||
void GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) const override; | |||||
Status InitKernelArgs(void *args_addr, size_t arg_size, const SingleOpModelParam ¶m) override; | |||||
}; | |||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_SINGLE_OP_TASK_TBE_TASK_BUILDER_H_ | #endif // GE_SINGLE_OP_TASK_TBE_TASK_BUILDER_H_ |
@@ -153,7 +153,6 @@ TEST_F(UtestGeHybrid, task_update_tiling_info) { | |||||
ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json"); | ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json"); | ||||
ge::AttrUtils::SetBool(op_desc, "support_dynamicshape", true); | ge::AttrUtils::SetBool(op_desc, "support_dynamicshape", true); | ||||
ge::AttrUtils::SetInt(op_desc, "op_para_size", 1); | ge::AttrUtils::SetInt(op_desc, "op_para_size", 1); | ||||
ge::AttrUtils::SetStr(op_desc, TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF"); | |||||
auto node = graph->AddNode(op_desc); | auto node = graph->AddNode(op_desc); | ||||
std::unique_ptr<NodeItem> node_item; | std::unique_ptr<NodeItem> node_item; | ||||
@@ -40,6 +40,9 @@ using namespace ge; | |||||
namespace { | namespace { | ||||
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | ||||
const char *const kEngineNameAiCore = "AIcoreEngine"; | |||||
const char *const kEngineNameAiCpu = "aicpu_ascend_kernel"; | |||||
const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; | |||||
} // namespace | } // namespace | ||||
class UtestSingleOpModel : public testing::Test { | class UtestSingleOpModel : public testing::Test { | ||||
@@ -222,6 +225,7 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { | |||||
auto graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); | auto graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); | ||||
model.model_helper_.model_->SetGraph(graph); | model.model_helper_.model_->SetGraph(graph); | ||||
model.op_list_[0] = transdata; | |||||
auto op_desc = transdata->GetOpDesc(); | auto op_desc = transdata->GetOpDesc(); | ||||
const vector<string> depend_names = { "Data" }; | const vector<string> depend_names = { "Data" }; | ||||
@@ -330,7 +334,10 @@ TEST_F(UtestSingleOpModel, build_dynamic_task) { | |||||
domi::TaskDef *task_def3 = model_task_def->add_task(); | domi::TaskDef *task_def3 = model_task_def->add_task(); | ||||
task_def3->set_type(RT_MODEL_TASK_ALL_KERNEL); | task_def3->set_type(RT_MODEL_TASK_ALL_KERNEL); | ||||
string model_data_str = "123456789"; | |||||
domi::TaskDef *task_def4 = model_task_def->add_task(); | |||||
task_def4->set_type(RT_MODEL_TASK_KERNEL); | |||||
string model_data_str = "dynamic_model"; | |||||
SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); | SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); | ||||
std::mutex stream_mu; | std::mutex stream_mu; | ||||
rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
@@ -347,9 +354,15 @@ TEST_F(UtestSingleOpModel, build_dynamic_task) { | |||||
StreamResource *res = new (std::nothrow) StreamResource(1); | StreamResource *res = new (std::nothrow) StreamResource(1); | ||||
ASSERT_EQ(model.ParseTasks(), SUCCESS); | ASSERT_EQ(model.ParseTasks(), SUCCESS); | ||||
model.node_tasks_[node] = { *task_def3, *task_def4 }; | |||||
op_desc->SetOpKernelLibName(kEngineNameAiCore); | |||||
model.BuildTaskListForDynamicOp(res, single_op); | |||||
model.node_tasks_[node] = { *task_def }; | |||||
op_desc->SetOpKernelLibName(kEngineNameAiCpuTf); | |||||
ASSERT_EQ(model.BuildTaskListForDynamicOp(res, single_op), SUCCESS); | ASSERT_EQ(model.BuildTaskListForDynamicOp(res, single_op), SUCCESS); | ||||
model.tbe_tasks_.clear(); | |||||
ASSERT_EQ(model.BuildTaskListForDynamicOp(res, single_op), SUCCESS); | |||||
model.aicpu_tasks_[0] = *task_def2; | |||||
model.node_tasks_[node] = { *task_def2 }; | |||||
op_desc->SetOpKernelLibName(kEngineNameAiCpu); | |||||
model.BuildTaskListForDynamicOp(res, single_op); | model.BuildTaskListForDynamicOp(res, single_op); | ||||
} | } |
@@ -154,3 +154,38 @@ TEST_F(UtestSingleOpTask, test_update_ioaddr) { | |||||
task.tiling_buffer_ = nullptr; | task.tiling_buffer_ = nullptr; | ||||
} | } | ||||
TEST_F(UtestSingleOpTask, test_atomic_exec) { | |||||
auto graph = make_shared<ComputeGraph>("graph"); | |||||
auto op_desc = make_shared<OpDesc>("Add", "Add"); | |||||
GeTensorDesc desc; | |||||
op_desc->AddInputDesc(desc); | |||||
op_desc->AddOutputDesc(desc); | |||||
auto node = graph->AddNode(op_desc); | |||||
AtomicAddrCleanOpTask task; | |||||
task.op_desc_ = op_desc; | |||||
task.node_ = node; | |||||
vector<DataBuffer> inputs; | |||||
vector<DataBuffer> outputs; | |||||
std::vector<int64_t> atomic_output_indices; | |||||
ge::AttrUtils::SetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); | |||||
ASSERT_EQ(task.InitAtomicAddrCleanIndices(), INTERNAL_ERROR); | |||||
atomic_output_indices = { 0 }; | |||||
ge::AttrUtils::SetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); | |||||
ASSERT_EQ(task.InitAtomicAddrCleanIndices(), INTERNAL_ERROR); | |||||
task.arg_size_ = sizeof(void *) * 2; | |||||
task.args_.reset(new (std::nothrow) uint8_t[task.arg_size_]); | |||||
ASSERT_EQ(task.InitAtomicAddrCleanIndices(), SUCCESS); | |||||
ASSERT_EQ(task.UpdateIoAddr(inputs, outputs), ACL_ERROR_GE_PARAM_INVALID); | |||||
ge::DataBuffer data_buffer; | |||||
outputs = { data_buffer }; | |||||
ASSERT_EQ(task.UpdateIoAddr(inputs, outputs), SUCCESS); | |||||
task.tiling_buffer_ = (void *)0x0001; | |||||
ASSERT_EQ(task.UpdateTilingArgs(nullptr), SUCCESS); | |||||
task.tiling_buffer_ = nullptr; | |||||
optiling::utils::OpRunInfo run_info(0, true, 0); | |||||
task.CalcTilingInfo(run_info); | |||||
} |