@@ -145,6 +145,7 @@ void Analyzer::DestroyGraphJsonObject(uint64_t session_id, uint64_t graph_id) { | |||||
if (iter1 == (iter->second).end()) { | if (iter1 == (iter->second).end()) { | ||||
GELOGW("Can not find the graph json object by session_id[%lu] and graph_id[%lu]. Do nothing.", session_id, | GELOGW("Can not find the graph json object by session_id[%lu] and graph_id[%lu]. Do nothing.", session_id, | ||||
graph_id); | graph_id); | ||||
return; | |||||
} | } | ||||
(iter->second).erase(iter1); | (iter->second).erase(iter1); | ||||
} | } | ||||
@@ -159,9 +159,13 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
auto op_desc = context.GetNodeItem().op_desc; | auto op_desc = context.GetNodeItem().op_desc; | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
GELOGI("[%s] ExecuteAsync Start.", op_desc->GetName().c_str()); | GELOGI("[%s] ExecuteAsync Start.", op_desc->GetName().c_str()); | ||||
for (auto &task : tasks_) { | |||||
for (auto it = tasks_.begin(); it != tasks_.end(); ++it) { | |||||
// AtomicAddrClean has 2 tasks | |||||
if (tasks_.size() == 2 && it == tasks_.begin() && !(*(tasks_.rbegin()))->GetClearAtomic()) { | |||||
continue; | |||||
} | |||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | ||||
GE_CHK_STATUS_RET_NOLOG(task->LaunchKernel(context.GetStream())); | |||||
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | |||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
} | } | ||||
@@ -181,8 +185,12 @@ Status AiCoreNodeTask::UpdateArgs(TaskContext &context) { | |||||
auto op_desc = context.GetNodeItem().op_desc; | auto op_desc = context.GetNodeItem().op_desc; | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
GELOGI("[%s] AiCoreNodeTask UpdateArgs Start.", op_desc->GetName().c_str()); | GELOGI("[%s] AiCoreNodeTask UpdateArgs Start.", op_desc->GetName().c_str()); | ||||
for (auto &task : tasks_) { | |||||
GE_CHK_STATUS_RET_NOLOG(task->UpdateArgs(context)); | |||||
for (auto it = tasks_.rbegin(); it != tasks_.rend(); ++it) { | |||||
GE_CHK_STATUS_RET_NOLOG((*it)->UpdateArgs(context)); | |||||
// AtomicAddrClean has 2 tasks | |||||
if (tasks_.size() == 2 && it == tasks_.rbegin() && !(*it)->GetClearAtomic()) { | |||||
break; | |||||
} | |||||
} | } | ||||
GELOGI("[%s] AiCoreNodeTask UpdateArgs End.", op_desc->GetName().c_str()); | GELOGI("[%s] AiCoreNodeTask UpdateArgs End.", op_desc->GetName().c_str()); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -190,8 +198,12 @@ Status AiCoreNodeTask::UpdateArgs(TaskContext &context) { | |||||
Status AiCoreNodeTask::UpdateTilingData(TaskContext &context) { | Status AiCoreNodeTask::UpdateTilingData(TaskContext &context) { | ||||
GELOGD("[%s] PrepareWithShape started", context.GetNodeName()); | GELOGD("[%s] PrepareWithShape started", context.GetNodeName()); | ||||
for (auto &task : tasks_) { | |||||
GE_CHK_STATUS_RET_NOLOG(task->PrepareWithShape(context)); | |||||
for (auto it = tasks_.rbegin(); it != tasks_.rend(); ++it) { | |||||
GE_CHK_STATUS_RET_NOLOG((*it)->PrepareWithShape(context)); | |||||
// AtomicAddrClean has 2 tasks | |||||
if (tasks_.size() == 2 && it == tasks_.rbegin() && !(*it)->GetClearAtomic()) { | |||||
break; | |||||
} | |||||
} | } | ||||
GELOGD("[%s] Done PrepareWithShape successfully.", context.GetNodeName()); | GELOGD("[%s] Done PrepareWithShape successfully.", context.GetNodeName()); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -121,6 +121,7 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { | |||||
GELOGD("[%s] Start to update tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); | GELOGD("[%s] Start to update tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); | ||||
OpRunInfo tiling_info; | OpRunInfo tiling_info; | ||||
tiling_info.block_dim = -1; // codex: Using uninitialized value | tiling_info.block_dim = -1; // codex: Using uninitialized value | ||||
tiling_info.clear_atomic = true; | |||||
auto execution_context = context.GetExecutionContext(); | auto execution_context = context.GetExecutionContext(); | ||||
RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] Start"); | RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] Start"); | ||||
@@ -130,6 +131,7 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { | |||||
// update op args by tiling info | // update op args by tiling info | ||||
block_dim_ = static_cast<uint32_t>(tiling_info.block_dim); | block_dim_ = static_cast<uint32_t>(tiling_info.block_dim); | ||||
op_desc->SetWorkspaceBytes(tiling_info.workspaces); | op_desc->SetWorkspaceBytes(tiling_info.workspaces); | ||||
clear_atomic_ = tiling_info.clear_atomic; | |||||
tiling_data_ = tiling_info.tiling_data.str(); | tiling_data_ = tiling_info.tiling_data.str(); | ||||
if (tiling_data_.empty()) { | if (tiling_data_.empty()) { | ||||
@@ -46,6 +46,8 @@ class AiCoreOpTask { | |||||
const std::string& GetName() const; | const std::string& GetName() const; | ||||
bool GetClearAtomic() const {return clear_atomic_;} | |||||
protected: | protected: | ||||
Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
@@ -66,6 +68,7 @@ class AiCoreOpTask { | |||||
std::unique_ptr<uint8_t[]> args_ = nullptr; | std::unique_ptr<uint8_t[]> args_ = nullptr; | ||||
uint32_t args_size_ = 0; | uint32_t args_size_ = 0; | ||||
uint32_t block_dim_ = 1; | uint32_t block_dim_ = 1; | ||||
bool clear_atomic_ = true; | |||||
}; | }; | ||||
class AtomicAddrCleanOpTask : public AiCoreOpTask { | class AtomicAddrCleanOpTask : public AiCoreOpTask { | ||||
@@ -171,7 +171,7 @@ class Impl { | |||||
graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, | graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, | ||||
bool is_dynamic_input); | bool is_dynamic_input); | ||||
void SetRtSocVersion(); | void SetRtSocVersion(); | ||||
void UpdateThreadContext(); | |||||
public: | public: | ||||
ge::GeGenerator generator_; | ge::GeGenerator generator_; | ||||
std::map<std::string, std::string> options_; | std::map<std::string, std::string> options_; | ||||
@@ -225,8 +225,6 @@ graphStatus Impl::Init(const std::map<std::string, std::string> &options) { | |||||
return ret; | return ret; | ||||
} | } | ||||
GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); | |||||
GetThreadLocalContext().SetGraphOption(options_); | |||||
std::string build_mode = (options_.find(BUILD_MODE) == options_.end() || options_[BUILD_MODE] == BUILD_MODE_NORMAL) | std::string build_mode = (options_.find(BUILD_MODE) == options_.end() || options_[BUILD_MODE] == BUILD_MODE_NORMAL) | ||||
? "" : options_[BUILD_MODE]; | ? "" : options_[BUILD_MODE]; | ||||
options_[BUILD_MODE] = build_mode; | options_[BUILD_MODE] = build_mode; | ||||
@@ -286,7 +284,7 @@ graphStatus Impl::Init(const std::map<std::string, std::string> &options) { | |||||
ge::PrintOptionMap(options_, "ge option"); | ge::PrintOptionMap(options_, "ge option"); | ||||
SetRtSocVersion(); | SetRtSocVersion(); | ||||
UpdateThreadContext(); | |||||
// 3. init generator with options_ | // 3. init generator with options_ | ||||
ret = generator_.Initialize(options_, omg_context_); | ret = generator_.Initialize(options_, omg_context_); | ||||
if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
@@ -310,6 +308,11 @@ void Impl::SetRtSocVersion() { | |||||
} | } | ||||
} | } | ||||
void Impl::UpdateThreadContext() { | |||||
GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); | |||||
GetThreadLocalContext().SetGraphOption(options_); | |||||
} | |||||
graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs) { | graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs) { | ||||
auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); | auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); | ||||
GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
@@ -333,13 +336,15 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTe | |||||
data_shape = tensor.GetShape(); | data_shape = tensor.GetShape(); | ||||
GELOGI("Data op get shape from InputDesc in ge ir graph."); | GELOGI("Data op get shape from InputDesc in ge ir graph."); | ||||
} | } | ||||
// If user point input format, do work for all data ops; else do according to tensor_desc | |||||
auto data_format = omg_context_.format != domi::DOMI_TENSOR_ND ? | |||||
ge::TypeUtils::DomiFormatToFormat(omg_context_.format) : tensor.GetFormat(); | |||||
ge::DataType data_type = tensor.GetDataType(); | ge::DataType data_type = tensor.GetDataType(); | ||||
string data_type_str = ge::TypeUtils::DataTypeToSerialString(data_type); | string data_type_str = ge::TypeUtils::DataTypeToSerialString(data_type); | ||||
GELOGI("Data op get data type:%s from InputDesc in ge ir graph.", data_type_str.c_str()); | GELOGI("Data op get data type:%s from InputDesc in ge ir graph.", data_type_str.c_str()); | ||||
ge::GeTensor inputTensor; | ge::GeTensor inputTensor; | ||||
ge::GeTensorDesc desc(data_shape, ge::Format(omg_context_.format), data_type); | |||||
ge::GeTensorDesc desc(data_shape, ge::Format(data_format), data_type); | |||||
inputTensor.SetTensorDesc(desc); | inputTensor.SetTensorDesc(desc); | ||||
inputs.push_back(inputTensor); | inputs.push_back(inputTensor); | ||||
} | } | ||||
@@ -112,14 +112,11 @@ namespace ge { | |||||
Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, | Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, | ||||
bool dynamic_flag, uint64_t session_id) { | bool dynamic_flag, uint64_t session_id) { | ||||
void *io_addr = nullptr; | |||||
void *kernel_workspace = nullptr; | |||||
GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(&io_addr, &kernel_workspace, param, dynamic_flag)); | |||||
GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(&task.io_addr_, &task.workspace_addr_, param, dynamic_flag)); | |||||
STR_FWK_OP_KERNEL fwk_op_kernel = {0}; | STR_FWK_OP_KERNEL fwk_op_kernel = {0}; | ||||
auto ret = SetFmkOpKernel(io_addr, kernel_workspace, fwk_op_kernel); | |||||
auto ret = SetFmkOpKernel(task.io_addr_, task.workspace_addr_, fwk_op_kernel); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
(void)rtFree(io_addr); | |||||
return ret; | return ret; | ||||
} | } | ||||
@@ -150,15 +147,12 @@ namespace ge { | |||||
return FAILED;) | return FAILED;) | ||||
ret = SetKernelArgs(&task.args_, fwk_op_kernel); | ret = SetKernelArgs(&task.args_, fwk_op_kernel); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
(void)rtFree(io_addr); | |||||
return ret; | return ret; | ||||
} | } | ||||
task.arg_size_ = sizeof(STR_FWK_OP_KERNEL); | task.arg_size_ = sizeof(STR_FWK_OP_KERNEL); | ||||
task.op_type_ = op_desc_->GetName(); | task.op_type_ = op_desc_->GetName(); | ||||
task.io_addr_ = io_addr; | |||||
task.task_info_ = kernel_def_.task_info(); | task.task_info_ = kernel_def_.task_info(); | ||||
task.workspace_addr_ = kernel_workspace; | |||||
task.dynamic_flag_ = dynamic_flag; | task.dynamic_flag_ = dynamic_flag; | ||||
auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); | auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); | ||||