From: @zhengyuanhua Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chentags/v1.2.0
@@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||||
uint32_t stream_id = task.stream_id; | uint32_t stream_id = task.stream_id; | ||||
std::string shape_type = task.shape_type; | std::string shape_type = task.shape_type; | ||||
int64_t cur_iter_num = task.cur_iter_num; | int64_t cur_iter_num = task.cur_iter_num; | ||||
uint32_t task_type = task.task_type; | |||||
data = model_name.append(" ") | data = model_name.append(" ") | ||||
.append(op_name).append(" ") | .append(op_name).append(" ") | ||||
.append(std::to_string(block_dim)).append(" ") | .append(std::to_string(block_dim)).append(" ") | ||||
@@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||||
.append(std::to_string(stream_id)).append(" ") | .append(std::to_string(stream_id)).append(" ") | ||||
.append(std::to_string(model_id)).append(" ") | .append(std::to_string(model_id)).append(" ") | ||||
.append(shape_type).append(" ") | .append(shape_type).append(" ") | ||||
.append(std::to_string(cur_iter_num)).append("\n"); | |||||
.append(std::to_string(cur_iter_num)).append(" ") | |||||
.append(std::to_string(task_type)).append("\n"); | |||||
ReporterData reporter_data{}; | ReporterData reporter_data{}; | ||||
reporter_data.deviceId = device_id; | reporter_data.deviceId = device_id; | ||||
@@ -3065,6 +3065,65 @@ Status DavinciModel::MallocKnownArgs() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, | |||||
const domi::TaskDef &task_def, size_t task_index) { | |||||
task_desc_info_.clear(); | |||||
bool flag = GetL1FusionEnableOption(); | |||||
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||||
int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||||
if (env_flag != 0) { | |||||
flag = true; | |||||
} | |||||
TaskDescInfo task_desc_info; | |||||
if (!om_name_.empty()) { | |||||
task_desc_info.model_name = om_name_; | |||||
} else { | |||||
task_desc_info.model_name = name_; | |||||
} | |||||
task_desc_info.op_name = op->GetName(); | |||||
task_desc_info.block_dim = task_def.kernel().block_dim(); | |||||
task_desc_info.task_id = task->GetTaskID(); | |||||
task_desc_info.stream_id = task->GetStreamId(); | |||||
task_desc_info.shape_type = "static"; | |||||
task_desc_info.cur_iter_num = 0; | |||||
// task type | |||||
task_desc_info.task_type = kTaskTypeInvalid; | |||||
auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
if (model_task_type == RT_MODEL_TASK_KERNEL) { | |||||
const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
const auto &context = kernel_def.context(); | |||||
auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||||
if (kernel_type == ccKernelType::TE) { | |||||
task_desc_info.task_type = kTaskTypeAicore; | |||||
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||||
task_desc_info.task_type = kTaskTypeAicpu; | |||||
} else { | |||||
GELOGD("Other kernel type: %u", context.kernel_type()); | |||||
} | |||||
} else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) { | |||||
task_desc_info.task_type = kTaskTypeAicpu; | |||||
} else { | |||||
GELOGD("Skip task type: %d", static_cast<int>(model_task_type)); | |||||
} | |||||
profiler_report_op_info_[task_desc_info.op_name] = | |||||
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
task_desc_info_.emplace_back(task_desc_info); | |||||
if (flag) { | |||||
if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||||
TaskDescInfo task_desc_info; | |||||
string op_name = "super_kernel_" + to_string(task_index); | |||||
task_desc_info.op_name = op_name; | |||||
task_desc_info.task_id = task->GetSktTaskID(); | |||||
profiler_report_op_info_[task_desc_info.op_name] = | |||||
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
task_desc_info_.emplace_back(task_desc_info); | |||||
} | |||||
} | |||||
return; | |||||
} | |||||
Status DavinciModel::DistributeTask() { | Status DavinciModel::DistributeTask() { | ||||
GELOGI("do Distribute."); | GELOGI("do Distribute."); | ||||
for (auto &task : cpu_task_list_) { | for (auto &task : cpu_task_list_) { | ||||
@@ -3075,19 +3134,11 @@ Status DavinciModel::DistributeTask() { | |||||
GE_CHK_STATUS_RET(task->Distribute()); | GE_CHK_STATUS_RET(task->Distribute()); | ||||
} | } | ||||
task_desc_info_.clear(); | |||||
bool flag = GetL1FusionEnableOption(); | |||||
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||||
int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||||
if (env_flag != 0) { | |||||
flag = true; | |||||
} | |||||
const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | ||||
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | ||||
auto &task_def = model_task_def->task(task_index); | auto &task_def = model_task_def->task(task_index); | ||||
auto &task = task_list_.at(task_index); | auto &task = task_list_.at(task_index); | ||||
GE_CHECK_NOTNULL(task); | |||||
GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | ||||
// for data dump | // for data dump | ||||
auto op_index = std::max(task_def.kernel().context().op_index(), | auto op_index = std::max(task_def.kernel().context().op_index(), | ||||
@@ -3107,33 +3158,9 @@ Status DavinciModel::DistributeTask() { | |||||
GE_IF_BOOL_EXEC(no_need_profiling, continue); | GE_IF_BOOL_EXEC(no_need_profiling, continue); | ||||
SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | ||||
// Load task info for profiling | |||||
TaskDescInfo task_desc_info; | |||||
if (!om_name_.empty()) { | |||||
task_desc_info.model_name = om_name_; | |||||
} else { | |||||
task_desc_info.model_name = name_; | |||||
} | |||||
task_desc_info.op_name = op->GetName(); | |||||
task_desc_info.block_dim = task_def.kernel().block_dim(); | |||||
task_desc_info.task_id = task->GetTaskID(); | |||||
task_desc_info.stream_id = task->GetStreamId(); | |||||
task_desc_info.shape_type = "static"; | |||||
task_desc_info.cur_iter_num = 0; | |||||
profiler_report_op_info_[task_desc_info.op_name] = | |||||
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
task_desc_info_.emplace_back(task_desc_info); | |||||
if (flag) { | |||||
if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||||
TaskDescInfo task_desc_info; | |||||
string op_name = "super_kernel_" + to_string(task_index); | |||||
task_desc_info.op_name = op_name; | |||||
task_desc_info.task_id = task->GetSktTaskID(); | |||||
profiler_report_op_info_[task_desc_info.op_name] = | |||||
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
task_desc_info_.emplace_back(task_desc_info); | |||||
} | |||||
} | |||||
// save task info for profiling | |||||
SaveProfilingTaskDescInfo(op, task, task_def, task_index); | |||||
} | } | ||||
// launch dump kernel to aicpu | // launch dump kernel to aicpu | ||||
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); | GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); | ||||
@@ -623,6 +623,9 @@ class DavinciModel { | |||||
Status DistributeTask(); | Status DistributeTask(); | ||||
void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, | |||||
const domi::TaskDef &task_def, size_t task_index); | |||||
uint8_t *MallocFeatureMapMem(size_t data_size); | uint8_t *MallocFeatureMapMem(size_t data_size); | ||||
uint8_t *MallocWeightsMem(size_t weights_size); | uint8_t *MallocWeightsMem(size_t weights_size); | ||||
@@ -159,27 +159,9 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * | |||||
} | } | ||||
GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); | ||||
auto op_desc = node->GetOpDesc(); | |||||
std::string op_name = op_desc->GetName(); | |||||
std::string dynamic_model_name = model->GetModelName(); | |||||
uint32_t task_id = context_->GetTaskId(); | |||||
uint32_t stream_id = context_->GetStreamId(); | |||||
TaskDescInfo tmp_task_desc_info; | |||||
tmp_task_desc_info.model_name = dynamic_model_name; | |||||
tmp_task_desc_info.op_name = op_name; | |||||
tmp_task_desc_info.block_dim = 0; | |||||
auto task_defs = model->GetTaskDefs(node); | |||||
if (task_defs != nullptr && (*task_defs).size() > 0) { | |||||
const auto &task_def = (*task_defs)[0]; | |||||
tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); | |||||
} | |||||
tmp_task_desc_info.task_id = task_id; | |||||
tmp_task_desc_info.stream_id = stream_id; | |||||
tmp_task_desc_info.shape_type = "dynamic"; | |||||
tmp_task_desc_info.cur_iter_num = graph_context_->iteration; | |||||
GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", | |||||
node->GetName().c_str(), task_id, stream_id); | |||||
task_desc_info.emplace_back(tmp_task_desc_info); | |||||
task_desc_info = context_->GetProfilingTaskDescInfo(); | |||||
context_->ClearProfilingTaskDescInfo(); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -247,7 +229,6 @@ Status NodeDoneCallback::ProfilingReport() { | |||||
GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); | GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); | ||||
std::vector<TaskDescInfo> task_desc_info; | std::vector<TaskDescInfo> task_desc_info; | ||||
TaskDescInfo tmp_task_desc_info; | |||||
auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); | auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); | ||||
if (profiling_ret != RT_ERROR_NONE) { | if (profiling_ret != RT_ERROR_NONE) { | ||||
GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); | GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); | ||||
@@ -182,16 +182,8 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
} | } | ||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | ||||
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | ||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
return rt_ret; | |||||
} | |||||
context.SetTaskId(task_id); | |||||
context.SetStreamId(stream_id); | |||||
GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||||
// save profiling data | |||||
(void)context.SaveProfilingTaskDescInfo(kTaskTypeAicore, (*it)->GetBlockDim()); | |||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
} | } | ||||
@@ -48,6 +48,8 @@ class AiCoreOpTask { | |||||
bool GetClearAtomic() const {return clear_atomic_;} | bool GetClearAtomic() const {return clear_atomic_;} | ||||
uint32_t GetBlockDim() const {return block_dim_;} | |||||
protected: | protected: | ||||
Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
@@ -190,16 +190,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str()); | HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str()); | ||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
return rt_ret; | |||||
} | |||||
context.SetTaskId(task_id); | |||||
context.SetStreamId(stream_id); | |||||
GELOGD("AiCpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | |||||
// save profiling data | |||||
(void)context.SaveProfilingTaskDescInfo(kTaskTypeAicpu, 0); | |||||
auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
GELOGD("Node[%s] callback start.", node_name_.c_str()); | GELOGD("Node[%s] callback start.", node_name_.c_str()); | ||||
@@ -21,6 +21,7 @@ | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
#include "hybrid/executor/subgraph_executor.h" | #include "hybrid/executor/subgraph_executor.h" | ||||
#include "common/profiling/profiling_manager.h" | |||||
namespace ge { | namespace ge { | ||||
namespace hybrid { | namespace hybrid { | ||||
@@ -498,5 +499,42 @@ bool TaskContext::NeedCallback() { | |||||
Status TaskContext::Synchronize() { | Status TaskContext::Synchronize() { | ||||
return execution_context_->Synchronize(GetStream()); | return execution_context_->Synchronize(GetStream()); | ||||
} | } | ||||
Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim) { | |||||
if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
const NodeItem &node_item = GetNodeItem(); | |||||
auto op_desc = node_item.GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
return rt_ret; | |||||
} | |||||
GELOGD("Node[%s] task_id: %u, stream_id: %u.", GetNodeName(), task_id, stream_id); | |||||
const GraphExecutionContext * graph_context = GetExecutionContext(); | |||||
GE_CHECK_NOTNULL(graph_context); | |||||
const HybridModel *model = graph_context->model; | |||||
GE_CHECK_NOTNULL(model); | |||||
std::string op_name = op_desc->GetName(); | |||||
std::string dynamic_model_name = model->GetModelName(); | |||||
TaskDescInfo tmp_task_desc_info; | |||||
tmp_task_desc_info.model_name = dynamic_model_name; | |||||
tmp_task_desc_info.op_name = op_name; | |||||
tmp_task_desc_info.block_dim = block_dim; | |||||
tmp_task_desc_info.task_type = task_type; | |||||
tmp_task_desc_info.task_id = task_id; | |||||
tmp_task_desc_info.stream_id = stream_id; | |||||
tmp_task_desc_info.shape_type = "dynamic"; | |||||
tmp_task_desc_info.cur_iter_num = iteration_; | |||||
task_desc_info.emplace_back(tmp_task_desc_info); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge |
@@ -22,6 +22,7 @@ | |||||
#include <vector> | #include <vector> | ||||
#include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
#include "external/ge/ge_api_error_codes.h" | #include "external/ge/ge_api_error_codes.h" | ||||
#include "framework/common/ge_types.h" | |||||
#include "hybrid/common/tensor_value.h" | #include "hybrid/common/tensor_value.h" | ||||
#include "hybrid/common/npu_memory_allocator.h" | #include "hybrid/common/npu_memory_allocator.h" | ||||
#include "hybrid/executor/rt_callback_manager.h" | #include "hybrid/executor/rt_callback_manager.h" | ||||
@@ -108,6 +109,10 @@ class TaskContext { | |||||
void SetForceInferShape(bool force_infer_shape); | void SetForceInferShape(bool force_infer_shape); | ||||
void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | |||||
Status SaveProfilingTaskDescInfo(uint32_t task_type, uint32_t block_dim); | |||||
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | |||||
private: | private: | ||||
TaskContext(GraphExecutionContext *execution_context, | TaskContext(GraphExecutionContext *execution_context, | ||||
const NodeItem *node_item, | const NodeItem *node_item, | ||||
@@ -127,6 +132,7 @@ class TaskContext { | |||||
uint64_t iteration_ = 0; | uint64_t iteration_ = 0; | ||||
uint32_t task_id_ = 0; | uint32_t task_id_ = 0; | ||||
uint32_t stream_id_ = 0; | uint32_t stream_id_ = 0; | ||||
std::vector<TaskDescInfo> task_desc_info; | |||||
}; | }; | ||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge | ||||
@@ -70,6 +70,7 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||||
tmp_task_desc_info.stream_id = stream_id; | tmp_task_desc_info.stream_id = stream_id; | ||||
tmp_task_desc_info.shape_type = shape_type; | tmp_task_desc_info.shape_type = shape_type; | ||||
tmp_task_desc_info.cur_iter_num = 0; | tmp_task_desc_info.cur_iter_num = 0; | ||||
tmp_task_desc_info.task_type = op_task->GetTaskType(); | |||||
GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | ||||
task_desc_info.emplace_back(tmp_task_desc_info); | task_desc_info.emplace_back(tmp_task_desc_info); | ||||
@@ -145,6 +145,8 @@ Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
return UNSUPPORTED; | return UNSUPPORTED; | ||||
} | } | ||||
uint32_t OpTask::GetTaskType() const { return kTaskTypeInvalid; } | |||||
TbeOpTask::~TbeOpTask() { | TbeOpTask::~TbeOpTask() { | ||||
if (sm_desc_ != nullptr) { | if (sm_desc_ != nullptr) { | ||||
(void)rtMemFreeManaged(sm_desc_); | (void)rtMemFreeManaged(sm_desc_); | ||||
@@ -161,6 +163,8 @@ size_t TbeOpTask::GetArgSize() const { return arg_size_; } | |||||
const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | ||||
uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | |||||
Status TbeOpTask::LaunchKernel(rtStream_t stream) { | Status TbeOpTask::LaunchKernel(rtStream_t stream) { | ||||
GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | ||||
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | ||||
@@ -802,6 +806,8 @@ Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||||
return DoUpdateArgTable(param, false); | return DoUpdateArgTable(param, false); | ||||
} | } | ||||
uint32_t AiCpuBaseTask::GetTaskType() const { return kTaskTypeAicpu; } | |||||
void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | ||||
arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | arg_base = reinterpret_cast<uintptr_t *>(io_addr_host_.data()); | ||||
arg_count = io_addr_host_.size(); | arg_count = io_addr_host_.size(); | ||||
@@ -52,6 +52,7 @@ class OpTask { | |||||
std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
std::vector<DataBuffer> &output_buffers, | std::vector<DataBuffer> &output_buffers, | ||||
rtStream_t stream); | rtStream_t stream); | ||||
virtual uint32_t GetTaskType() const; | |||||
protected: | protected: | ||||
Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | ||||
@@ -85,6 +86,7 @@ class TbeOpTask : public OpTask { | |||||
size_t GetArgSize() const; | size_t GetArgSize() const; | ||||
const std::string &GetStubName() const; | const std::string &GetStubName() const; | ||||
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | ||||
uint32_t GetTaskType() const override; | |||||
private: | private: | ||||
friend class SingleOpModel; | friend class SingleOpModel; | ||||
@@ -113,6 +115,8 @@ class AiCpuBaseTask : public OpTask { | |||||
~AiCpuBaseTask() override; | ~AiCpuBaseTask() override; | ||||
UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | ||||
Status UpdateArgTable(const SingleOpModelParam ¶m) override; | Status UpdateArgTable(const SingleOpModelParam ¶m) override; | ||||
uint32_t GetTaskType() const override; | |||||
protected: | protected: | ||||
Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
Status SetInputConst(); | Status SetInputConst(); | ||||
@@ -41,12 +41,7 @@ enum FrameworkType { | |||||
}; | }; | ||||
const std::map<std::string, std::string> kFwkTypeToStr = { | const std::map<std::string, std::string> kFwkTypeToStr = { | ||||
{"0", "Caffe"}, | |||||
{"1", "MindSpore"}, | |||||
{"3", "TensorFlow"}, | |||||
{"4", "Android_NN"}, | |||||
{"5", "Onnx"} | |||||
}; | |||||
{"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}}; | |||||
enum OpEngineType { | enum OpEngineType { | ||||
ENGINE_SYS = 0, // default engine | ENGINE_SYS = 0, // default engine | ||||
@@ -61,6 +56,11 @@ enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYN | |||||
const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | ||||
const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | ||||
// profiling data | |||||
const uint32_t kTaskTypeAicore = 0; | |||||
const uint32_t kTaskTypeAicpu = 1; | |||||
const uint32_t kTaskTypeInvalid = 0xFFFF; | |||||
// Data cache, including data address and length | // Data cache, including data address and length | ||||
struct DataBuffer { | struct DataBuffer { | ||||
public: | public: | ||||
@@ -256,6 +256,7 @@ struct TaskDescInfo { | |||||
uint32_t stream_id; | uint32_t stream_id; | ||||
std::string shape_type; | std::string shape_type; | ||||
int64_t cur_iter_num; | int64_t cur_iter_num; | ||||
uint32_t task_type; | |||||
}; | }; | ||||
// Profiling info of graph | // Profiling info of graph | ||||
@@ -1 +1 @@ | |||||
Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467 | |||||
Subproject commit b00c50c2a8c2ce06929b27f7b74185a950737ec8 |
@@ -1 +1 @@ | |||||
Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75 | |||||
Subproject commit f0109a2c70981d74932bb38bb56722caff3323a5 |