From: @zhangxiaokun9 Reviewed-by: @wangxiaotian22,@xchu42 Signed-off-by: @ji_chentags/v1.2.0
| @@ -96,6 +96,29 @@ const int32_t kModelAbortNormalNew = 507024; | |||||
| inline bool IsDataOp(const std::string &node_type) { | inline bool IsDataOp(const std::string &node_type) { | ||||
| return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; | return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; | ||||
| } | } | ||||
| inline bool IsTbeTask(const OpDescPtr &op_desc) { | |||||
| uint32_t run_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||||
| if (!AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, run_mode)) { | |||||
| return false; | |||||
| } | |||||
| if (run_mode != static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||||
| return false; | |||||
| } | |||||
| // Skip no_task operator, such as concat and split. | |||||
| bool attr_no_task = false; | |||||
| bool get_attr_no_task_flag = AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_no_task); | |||||
| if (get_attr_no_task_flag && attr_no_task) { | |||||
| GELOGI("Node[name:%s, type:%s] does not generate task, skip initialization.", | |||||
| op_desc->GetName().c_str(), op_desc->GetType().c_str()); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| inline bool IsNoTaskAndDumpNeeded(const OpDescPtr &op_desc) { | inline bool IsNoTaskAndDumpNeeded(const OpDescPtr &op_desc) { | ||||
| bool save_dump_info = false; | bool save_dump_info = false; | ||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NO_TASK_AND_DUMP_NEEDED, save_dump_info); | (void)ge::AttrUtils::GetBool(op_desc, ATTR_NO_TASK_AND_DUMP_NEEDED, save_dump_info); | ||||
| @@ -689,7 +712,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed"); | GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed"); | ||||
| SetDataDumperArgs(compute_graph); | |||||
| GE_TIMESTAMP_START(DoTaskSink); | GE_TIMESTAMP_START(DoTaskSink); | ||||
| GE_CHK_STATUS_RET(DoTaskSink(), "Task sink failed"); | GE_CHK_STATUS_RET(DoTaskSink(), "Task sink failed"); | ||||
| GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); | GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); | ||||
| @@ -825,7 +847,6 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| typedef Status (DavinciModel::*OpDescCall)(const OpDescPtr &); | typedef Status (DavinciModel::*OpDescCall)(const OpDescPtr &); | ||||
| static std::map<std::string, OpDescCall> op_desc_handle = { | static std::map<std::string, OpDescCall> op_desc_handle = { | ||||
| {VARIABLE, &DavinciModel::InitVariable}, | |||||
| {CONSTANTOP, &DavinciModel::InitConstant}, | {CONSTANTOP, &DavinciModel::InitConstant}, | ||||
| {STREAMACTIVE, &DavinciModel::InitStreamActive}, | {STREAMACTIVE, &DavinciModel::InitStreamActive}, | ||||
| {STREAMSWITCH, &DavinciModel::InitStreamSwitch}, | {STREAMSWITCH, &DavinciModel::InitStreamSwitch}, | ||||
| @@ -836,15 +857,13 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| vector<OpDescPtr> output_op_list; | vector<OpDescPtr> output_op_list; | ||||
| map<uint32_t, OpDescPtr> data_by_index; | map<uint32_t, OpDescPtr> data_by_index; | ||||
| map<string, OpDescPtr> variable_by_name; | |||||
| auto nodes = compute_graph->GetAllNodes(); | auto nodes = compute_graph->GetAllNodes(); | ||||
| const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); | const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); | ||||
| for (size_t i = 0; i < nodes.size(); ++i) { | for (size_t i = 0; i < nodes.size(); ++i) { | ||||
| auto node = nodes.at(i); | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| GELOGE(PARAM_INVALID, "op_desc is null."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| const auto &node = nodes.at(i); | |||||
| const auto &op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| op_list_[op_desc->GetId()] = op_desc; | op_list_[op_desc->GetId()] = op_desc; | ||||
| @@ -873,6 +892,14 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| if (op_desc->GetType() == VARIABLE) { | |||||
| if (InitVariable(op_desc, variable_by_name) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Variable init failed, Name: %s", op_desc->GetName().c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| continue; | |||||
| } | |||||
| auto it = op_desc_handle.find(op_desc->GetType()); | auto it = op_desc_handle.find(op_desc->GetType()); | ||||
| if (it != op_desc_handle.end()) { | if (it != op_desc_handle.end()) { | ||||
| if ((this->*it->second)(op_desc) != SUCCESS) { | if ((this->*it->second)(op_desc) != SUCCESS) { | ||||
| @@ -907,17 +934,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| } | } | ||||
| GE_TIMESTAMP_RESTART(InitTbeHandle); | GE_TIMESTAMP_RESTART(InitTbeHandle); | ||||
| uint32_t run_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||||
| if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, run_mode) && | |||||
| run_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||||
| // Skip no_task operator, such as concat and split. | |||||
| bool attr_notask = false; | |||||
| bool get_attr_notask_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask); | |||||
| GE_IF_BOOL_EXEC(get_attr_notask_flag && attr_notask, | |||||
| GELOGI("Node[name:%s, type:%s] does not generate task, skip initialization.", | |||||
| op_desc->GetName().c_str(), op_desc->GetType().c_str()); | |||||
| continue;); | |||||
| if (IsTbeTask(op_desc)) { | |||||
| Status status = InitTbeHandle(op_desc); | Status status = InitTbeHandle(op_desc); | ||||
| if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
| GELOGE(status, "TBE init failed. %s", op_desc->GetName().c_str()); | GELOGE(status, "TBE init failed. %s", op_desc->GetName().c_str()); | ||||
| @@ -927,6 +944,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| GE_TIMESTAMP_ADD(InitTbeHandle); | GE_TIMESTAMP_ADD(InitTbeHandle); | ||||
| } | } | ||||
| SetDataDumperArgs(compute_graph, variable_by_name); | |||||
| GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | ||||
| GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | ||||
| return GenInputOutputInfo(data_by_index, output_op_list); | return GenInputOutputInfo(data_by_index, output_op_list); | ||||
| @@ -1405,8 +1423,23 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::InitVariable(const OpDescPtr &op_desc) { | |||||
| variable_op_list_.push_back(op_desc); | |||||
| Status DavinciModel::InitVariable(const OpDescPtr &op_desc, map<string, OpDescPtr> &variable_by_name) { | |||||
| if (op_desc->GetName() == NODE_NAME_GLOBAL_STEP) { | |||||
| const auto output_sizes = ModelUtils::GetOutputSize(op_desc); | |||||
| if (!output_sizes.empty()) { | |||||
| global_step_size_ = output_sizes[0]; | |||||
| } | |||||
| const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc); | |||||
| if (!output_addrs.empty()) { | |||||
| global_step_addr_ = output_addrs[0]; | |||||
| } | |||||
| } | |||||
| if (op_desc->HasAttr(VAR_ATTR_VAR_IS_BROADCAST)) { | |||||
| broadcast_variable_[op_desc->GetName()] = op_desc->GetOutputDesc(0); | |||||
| } | |||||
| variable_by_name[op_desc->GetName()] = op_desc; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -2119,25 +2152,16 @@ Status DavinciModel::SyncVarData() { | |||||
| GELOGI("Sync var data, model id:%u", model_id_); | GELOGI("Sync var data, model id:%u", model_id_); | ||||
| Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
| OpDescPtr global_step = GetVariableOp(NODE_NAME_GLOBAL_STEP); | |||||
| if (global_step != nullptr) { | |||||
| auto v_output_size = ModelUtils::GetOutputSize(global_step); | |||||
| auto v_output_addr = ModelUtils::GetOutputDataAddrs(runtime_param_, global_step); | |||||
| if (v_output_size.empty() || v_output_addr.empty()) { | |||||
| GELOGE(PARAM_INVALID, "global step op:%s not set output", global_step->GetName().c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| std::vector<uint64_t> v_step; | |||||
| v_step.push_back(iterator_count_); | |||||
| GE_CHK_RT_RET(rtMemcpy(v_output_addr[0], v_output_size[0], v_step.data(), v_step.size() * sizeof(uint64_t), | |||||
| if (global_step_addr_ != nullptr && global_step_size_ != 0) { | |||||
| const vector<uint64_t> v_step = { iterator_count_ }; | |||||
| GE_CHK_RT_RET(rtMemcpy(global_step_addr_, global_step_size_, v_step.data(), v_step.size() * sizeof(uint64_t), | |||||
| RT_MEMCPY_HOST_TO_DEVICE)); | RT_MEMCPY_HOST_TO_DEVICE)); | ||||
| } | } | ||||
| for (auto op_desc : variable_op_list_) { | |||||
| ret = | |||||
| VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); | |||||
| for (const auto &item : broadcast_variable_) { | |||||
| ret = VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, item.first, item.second, mem_base_); | |||||
| GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | ||||
| op_desc->GetName().c_str()); | |||||
| item.first.c_str()); | |||||
| } | } | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -2622,11 +2646,11 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b | |||||
| /// | /// | ||||
| Status DavinciModel::ReturnNoOutput(uint32_t data_id) { | Status DavinciModel::ReturnNoOutput(uint32_t data_id) { | ||||
| GELOGI("ReturnNoOutput model id:%u", model_id_); | GELOGI("ReturnNoOutput model id:%u", model_id_); | ||||
| for (auto op_desc : variable_op_list_) { | |||||
| for (const auto item : broadcast_variable_) { | |||||
| Status ret = VarManager::Instance(session_id_) | Status ret = VarManager::Instance(session_id_) | ||||
| ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); | |||||
| ->SyncBroadCastData2Var(runtime_param_.graph_id, item.first, item.second, mem_base_); | |||||
| GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | ||||
| op_desc->GetName().c_str()); | |||||
| item.first.c_str()); | |||||
| } | } | ||||
| GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); | GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); | ||||
| @@ -3921,11 +3945,11 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { | |||||
| void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) { | |||||
| data_dumper_.SetModelName(name_); | data_dumper_.SetModelName(name_); | ||||
| data_dumper_.SetModelId(model_id_); | data_dumper_.SetModelId(model_id_); | ||||
| data_dumper_.SetOmName(om_name_); | data_dumper_.SetOmName(om_name_); | ||||
| data_dumper_.SetComputeGraph(compute_graph); | |||||
| data_dumper_.SetComputeGraph(graph); | |||||
| data_dumper_.SetRefInfo(saved_task_addrs_); | data_dumper_.SetRefInfo(saved_task_addrs_); | ||||
| data_dumper_.SetL1FusionAddr(l1_fusion_addr_); | data_dumper_.SetL1FusionAddr(l1_fusion_addr_); | ||||
| @@ -3938,22 +3962,23 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { | |||||
| data_dumper_.SetDeviceId(device_id); | data_dumper_.SetDeviceId(device_id); | ||||
| // set loop count addr | // set loop count addr | ||||
| auto get_var_addr = [](const OpDescPtr &op, const RuntimeParam &runtime_param) -> void *{ | |||||
| if (op != nullptr) { | |||||
| auto v_output_size = ModelUtils::GetOutputSize(op); | |||||
| auto v_output_addr = ModelUtils::GetOutputDataAddrs(runtime_param, op); | |||||
| if (v_output_size.empty() || v_output_addr.empty()) { | |||||
| auto get_var_addr = [&](const string &name) -> void *{ | |||||
| const auto it = variable_by_name.find(name); | |||||
| if (it != variable_by_name.end()) { | |||||
| const auto output_sizes = ModelUtils::GetOutputSize(it->second); | |||||
| const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, it->second); | |||||
| if (output_sizes.empty() || output_addrs.empty()) { | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| return v_output_addr[0]; | |||||
| return output_addrs[0]; | |||||
| } | } | ||||
| GELOGD("op is null."); | |||||
| GELOGD("op: %s is null.", name.c_str()); | |||||
| return nullptr; | return nullptr; | ||||
| }; | }; | ||||
| data_dumper_.SetLoopAddr(get_var_addr(GetVariableOp(NODE_NAME_GLOBAL_STEP), runtime_param_), | |||||
| get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), runtime_param_), | |||||
| get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_COND), runtime_param_)); | |||||
| data_dumper_.SetLoopAddr(get_var_addr(NODE_NAME_GLOBAL_STEP), | |||||
| get_var_addr(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), | |||||
| get_var_addr(NODE_NAME_FLOWCTRL_LOOP_COND)); | |||||
| } | } | ||||
| uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { | uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { | ||||
| @@ -268,14 +268,7 @@ class DavinciModel { | |||||
| return op_list_.at(index); | return op_list_.at(index); | ||||
| } | } | ||||
| OpDescPtr GetVariableOp(const string &name) { | |||||
| for (auto op_desc : variable_op_list_) { | |||||
| if (op_desc != nullptr && op_desc->GetName() == name) { | |||||
| return op_desc; | |||||
| } | |||||
| } | |||||
| return nullptr; | |||||
| } | |||||
| void *GetGlobalStep() const { return global_step_addr_; } | |||||
| // get task info for profiling | // get task info for profiling | ||||
| const vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; } | const vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; } | ||||
| @@ -689,7 +682,7 @@ class DavinciModel { | |||||
| /// | /// | ||||
| Status InitConstant(const OpDescPtr &op_desc); | Status InitConstant(const OpDescPtr &op_desc); | ||||
| Status InitVariable(const OpDescPtr &op_desc); | |||||
| Status InitVariable(const OpDescPtr &op_desc, map<string, OpDescPtr> &variable_by_name); | |||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief LabelSet Op Initialize. | /// @brief LabelSet Op Initialize. | ||||
| @@ -828,7 +821,7 @@ class DavinciModel { | |||||
| // get desc info of graph for profiling | // get desc info of graph for profiling | ||||
| Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | ||||
| void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); | |||||
| void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | |||||
| Status InitModelProfile(); | Status InitModelProfile(); | ||||
| Status SinkModelProfile(); | Status SinkModelProfile(); | ||||
| @@ -877,7 +870,9 @@ class DavinciModel { | |||||
| map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init | map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init | ||||
| vector<OpDescPtr> variable_op_list_; | |||||
| map<string, GeTensorDesc> broadcast_variable_; | |||||
| void *global_step_addr_{nullptr}; | |||||
| uint64_t global_step_size_{0}; | |||||
| map<uint32_t, ZeroCopyOffset> new_input_data_info_; | map<uint32_t, ZeroCopyOffset> new_input_data_info_; | ||||
| map<uint32_t, ZeroCopyOffset> new_output_data_info_; | map<uint32_t, ZeroCopyOffset> new_output_data_info_; | ||||
| @@ -78,14 +78,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_); | op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_); | ||||
| // 2.1 get loop cond variable for tensor array write | // 2.1 get loop cond variable for tensor array write | ||||
| uint64_t step_id_addr = 0; | |||||
| OpDescPtr step_id_node = davinci_model_->GetVariableOp(NODE_NAME_GLOBAL_STEP); | |||||
| if (step_id_node != nullptr) { | |||||
| vector<void *> v_step_id_addr = ModelUtils::GetOutputDataAddrs(rts_param, step_id_node); | |||||
| if (!v_step_id_addr.empty()) { | |||||
| step_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(v_step_id_addr[0])); | |||||
| } | |||||
| } | |||||
| uint64_t step_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model_->GetGlobalStep())); | |||||
| auto session_id = davinci_model_->GetSessionId(); | auto session_id = davinci_model_->GetSessionId(); | ||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; | fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; | ||||
| @@ -183,51 +183,32 @@ ge::Status VarResource::GetBroadCastInfo(uint32_t graph_id, const string &var_na | |||||
| } | } | ||||
| ge::Status VarResource::SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | ge::Status VarResource::SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | ||||
| const ge::ConstOpDescPtr &var_op_desc, uint8_t *base_ptr) { | |||||
| if (var_op_desc == nullptr) { | |||||
| GELOGE(FAILED, "[SyncVarData2BroadCast] var opdesc is null!"); | |||||
| return FAILED; | |||||
| } | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
| GE_CHECK_NOTNULL(base_ptr); | GE_CHECK_NOTNULL(base_ptr); | ||||
| GELOGI("SyncVarData2BroadCast graph_id: %u, var_name: %s.", graph_id, var_name.c_str()); | GELOGI("SyncVarData2BroadCast graph_id: %u, var_name: %s.", graph_id, var_name.c_str()); | ||||
| VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | ||||
| uint8_t *dst_addr = base_ptr + var_broadcast_info.input_offset; | uint8_t *dst_addr = base_ptr + var_broadcast_info.input_offset; | ||||
| ge::GeTensorDesc var_tensor_desc = var_op_desc->GetOutputDesc(0); | |||||
| return ge::TransVarDataUtils::SyncVarData2BroadCast(var_name, var_tensor_desc, dst_addr, | return ge::TransVarDataUtils::SyncVarData2BroadCast(var_name, var_tensor_desc, dst_addr, | ||||
| var_broadcast_info.input_size, session_id_); | var_broadcast_info.input_size, session_id_); | ||||
| } | } | ||||
| ge::Status VarResource::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ge::Status VarResource::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ||||
| const ge::ConstOpDescPtr &var_op_desc, uint8_t *base_ptr) { | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
| GELOGI("SyncBroadCastData2Var var_name: %s", var_name.c_str()); | GELOGI("SyncBroadCastData2Var var_name: %s", var_name.c_str()); | ||||
| GE_CHECK_NOTNULL(var_op_desc); | |||||
| string var_is_broadcast; | |||||
| bool is_broadcast = AttrUtils::GetStr(var_op_desc, VAR_ATTR_VAR_IS_BROADCAST, var_is_broadcast); | |||||
| if (!is_broadcast) { | |||||
| return SUCCESS; | |||||
| } | |||||
| VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | ||||
| // subgraph base_ptr could be nullptr, task it as base 0 | // subgraph base_ptr could be nullptr, task it as base 0 | ||||
| uint8_t *dst_addr = base_ptr + var_broadcast_info.output_offset; | uint8_t *dst_addr = base_ptr + var_broadcast_info.output_offset; | ||||
| ge::GeTensorDesc var_tensor_desc = var_op_desc->GetOutputDesc(0); | |||||
| return ge::TransVarDataUtils::SyncBroadCastData2Var(dst_addr, var_broadcast_info.output_size, var_name, | return ge::TransVarDataUtils::SyncBroadCastData2Var(dst_addr, var_broadcast_info.output_size, var_name, | ||||
| var_tensor_desc, session_id_); | var_tensor_desc, session_id_); | ||||
| } | } | ||||
| ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_name, | ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_name, | ||||
| const ge::ConstOpDescPtr &var_op_desc, uint8_t *base_ptr) { | |||||
| GE_CHECK_NOTNULL(var_op_desc); | |||||
| string var_is_broadcast; | |||||
| bool is_broadcast = AttrUtils::GetStr(var_op_desc, VAR_ATTR_VAR_IS_BROADCAST, var_is_broadcast); | |||||
| if (!is_broadcast) { | |||||
| return SUCCESS; | |||||
| } | |||||
| return SyncVarData2BroadCast(graph_id, var_name, var_op_desc, base_ptr); | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
| return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr); | |||||
| } | } | ||||
| bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; } | bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; } | ||||
| @@ -570,14 +551,14 @@ bool VarManager::IsVarExist(const std::string &var_name) { | |||||
| return var_resource_->IsVarExist(var_name); | return var_resource_->IsVarExist(var_name); | ||||
| } | } | ||||
| ge::Status VarManager::SyncVarData(uint32_t graph_id, const std::string &var_name, ge::ConstOpDescPtr var_op_desc, | |||||
| ge::Status VarManager::SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
| uint8_t *base_ptr) { | uint8_t *base_ptr) { | ||||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
| if (var_resource_ == nullptr) { | if (var_resource_ == nullptr) { | ||||
| GELOGW("VarManager has not been init."); | GELOGW("VarManager has not been init."); | ||||
| return ge::INTERNAL_ERROR; | return ge::INTERNAL_ERROR; | ||||
| } | } | ||||
| return var_resource_->SyncVarData(graph_id, var_name, std::move(var_op_desc), base_ptr); | |||||
| return var_resource_->SyncVarData(graph_id, var_name, var_tensor_desc, base_ptr); | |||||
| } | } | ||||
| ge::Status VarManager::GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc) { | ge::Status VarManager::GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc) { | ||||
| @@ -630,13 +611,13 @@ ge::Status VarManager::RenewCurVarDesc(const std::string &var_name, ge::OpDescPt | |||||
| } | } | ||||
| ge::Status VarManager::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ge::Status VarManager::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ||||
| ge::ConstOpDescPtr var_op_desc, uint8_t *base_ptr) { | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
| if (var_resource_ == nullptr) { | if (var_resource_ == nullptr) { | ||||
| GELOGW("VarManager has not been init."); | GELOGW("VarManager has not been init."); | ||||
| return ge::INTERNAL_ERROR; | return ge::INTERNAL_ERROR; | ||||
| } | } | ||||
| return var_resource_->SyncBroadCastData2Var(graph_id, var_name, std::move(var_op_desc), base_ptr); | |||||
| return var_resource_->SyncBroadCastData2Var(graph_id, var_name, var_tensor_desc, base_ptr); | |||||
| } | } | ||||
| bool VarManager::IsVarAddr(const int64_t &offset) { | bool VarManager::IsVarAddr(const int64_t &offset) { | ||||
| @@ -119,12 +119,12 @@ class VarResource { | |||||
| ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ||||
| ge::Status SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | ge::Status SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | ||||
| const ge::ConstOpDescPtr &var_op_desc, uint8_t *base_ptr); | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr); | |||||
| ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ||||
| const ge::ConstOpDescPtr &var_op_desc, uint8_t *base_ptr); | |||||
| const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr); | |||||
| ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, const ge::ConstOpDescPtr &var_op_desc, | |||||
| ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
| uint8_t *base_ptr); | uint8_t *base_ptr); | ||||
| Status SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { | Status SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { | ||||
| @@ -215,14 +215,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { | |||||
| ge::Status GetVarAddr(const std::string &var_name, const ge::GeTensorDesc &tensor_desc, uint8_t **dev_ptr); | ge::Status GetVarAddr(const std::string &var_name, const ge::GeTensorDesc &tensor_desc, uint8_t **dev_ptr); | ||||
| ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, ge::ConstOpDescPtr var_op_desc, | |||||
| ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
| uint8_t *base_ptr); | uint8_t *base_ptr); | ||||
| ge::Status SaveBroadCastInfo(uint32_t graph_id, const VarBroadCastInfo &broad_cast_info); | ge::Status SaveBroadCastInfo(uint32_t graph_id, const VarBroadCastInfo &broad_cast_info); | ||||
| ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ||||
| ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, ge::ConstOpDescPtr var_op_desc, | |||||
| ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
| uint8_t *base_ptr); | uint8_t *base_ptr); | ||||
| ge::Status GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc); | ge::Status GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc); | ||||
| @@ -306,8 +306,12 @@ TEST_F(UtestDavinciModel, init_unknown) { | |||||
| EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); | EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); | ||||
| } | } | ||||
| TEST_F(UtestDavinciModel, ReturnNoOutput_test) { | |||||
| TEST_F(UtestDavinciModel, Init_variable_op) { | |||||
| DavinciModel model(0, nullptr); | DavinciModel model(0, nullptr); | ||||
| model.ge_model_ = make_shared<GeModel>(); | |||||
| model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
| model.runtime_param_.mem_size = 5120000; | |||||
| ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | ||||
| TensorUtils::SetSize(tensor, 512); | TensorUtils::SetSize(tensor, 512); | ||||
| @@ -317,27 +321,19 @@ TEST_F(UtestDavinciModel, ReturnNoOutput_test) { | |||||
| var1->AddOutputDesc(tensor); | var1->AddOutputDesc(tensor); | ||||
| var1->SetInputOffset({1024}); | var1->SetInputOffset({1024}); | ||||
| var1->SetOutputOffset({1024}); | var1->SetOutputOffset({1024}); | ||||
| AttrUtils::SetBool(var1, VAR_ATTR_VAR_IS_BROADCAST, true); | |||||
| graph->AddNode(var1); | |||||
| model.variable_op_list_.push_back(var1); | |||||
| OpDescPtr var2 = CreateOpDesc(NODE_NAME_GLOBAL_STEP, VARIABLE); | |||||
| var2->AddInputDesc(tensor); | |||||
| var2->AddOutputDesc(tensor); | |||||
| var2->SetInputOffset({1024}); | |||||
| var2->SetOutputOffset({1024}); | |||||
| graph->AddNode(var2); | |||||
| EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
| EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); | EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); | ||||
| } | |||||
| TEST_F(UtestDavinciModel, SyncVarData_test) { | |||||
| DavinciModel model(0, nullptr); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor, 512); | |||||
| OpDescPtr var1 = CreateOpDesc("var1", VARIABLE); | |||||
| var1->AddInputDesc(tensor); | |||||
| var1->AddOutputDesc(tensor); | |||||
| var1->SetInputOffset({1024}); | |||||
| var1->SetOutputOffset({1024}); | |||||
| model.variable_op_list_.push_back(var1); | |||||
| EXPECT_NE(model.SyncVarData(), SUCCESS); | EXPECT_NE(model.SyncVarData(), SUCCESS); | ||||
| } | } | ||||
| @@ -378,7 +374,7 @@ TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ2) { | |||||
| GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), case1_node->GetInDataAnchor(0)); | GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), case1_node->GetInDataAnchor(0)); | ||||
| GraphUtils::AddEdge(case1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | GraphUtils::AddEdge(case1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | ||||
| (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1;2;4;8"); | (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1;2;4;8"); | ||||
| (void)AttrUtils::SetBool(case_node, ATTR_INSERT_BY_MBATCH, true); | (void)AttrUtils::SetBool(case_node, ATTR_INSERT_BY_MBATCH, true); | ||||
| @@ -64,10 +64,6 @@ TEST_F(UtestKernelExTaskInfo, success_kernel_ex_task_init) { | |||||
| string value1(arg_size, 'a'); | string value1(arg_size, 'a'); | ||||
| kernel_ex_def->set_args_size(arg_size); | kernel_ex_def->set_args_size(arg_size); | ||||
| kernel_ex_def->set_args(value1); | kernel_ex_def->set_args(value1); | ||||
| OpDescPtr v_op_desc = CreateOpDesc("ge_global_step", "Variable"); | |||||
| model.variable_op_list_.push_back(v_op_desc); | |||||
| model.op_list_[0]->SetWorkspace({100331008}); // offset | |||||
| model.op_list_[0]->SetWorkspaceBytes({150}); // length | |||||
| EXPECT_EQ(kernel_ex_task_info.Init(task_def, &model), FAILED); | EXPECT_EQ(kernel_ex_task_info.Init(task_def, &model), FAILED); | ||||