From: @zhangxiaokun9 Reviewed-by: @wangxiaotian22,@xchu42 Signed-off-by: @ji_chentags/v1.2.0
@@ -96,6 +96,29 @@ const int32_t kModelAbortNormalNew = 507024; | |||||
inline bool IsDataOp(const std::string &node_type) { | inline bool IsDataOp(const std::string &node_type) { | ||||
return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; | return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; | ||||
} | } | ||||
inline bool IsTbeTask(const OpDescPtr &op_desc) { | |||||
uint32_t run_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||||
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, run_mode)) { | |||||
return false; | |||||
} | |||||
if (run_mode != static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||||
return false; | |||||
} | |||||
// Skip no_task operator, such as concat and split. | |||||
bool attr_no_task = false; | |||||
bool get_attr_no_task_flag = AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_no_task); | |||||
if (get_attr_no_task_flag && attr_no_task) { | |||||
GELOGI("Node[name:%s, type:%s] does not generate task, skip initialization.", | |||||
op_desc->GetName().c_str(), op_desc->GetType().c_str()); | |||||
return false; | |||||
} | |||||
return true; | |||||
} | |||||
inline bool IsNoTaskAndDumpNeeded(const OpDescPtr &op_desc) { | inline bool IsNoTaskAndDumpNeeded(const OpDescPtr &op_desc) { | ||||
bool save_dump_info = false; | bool save_dump_info = false; | ||||
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NO_TASK_AND_DUMP_NEEDED, save_dump_info); | (void)ge::AttrUtils::GetBool(op_desc, ATTR_NO_TASK_AND_DUMP_NEEDED, save_dump_info); | ||||
@@ -689,7 +712,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed"); | GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed"); | ||||
SetDataDumperArgs(compute_graph); | |||||
GE_TIMESTAMP_START(DoTaskSink); | GE_TIMESTAMP_START(DoTaskSink); | ||||
GE_CHK_STATUS_RET(DoTaskSink(), "Task sink failed"); | GE_CHK_STATUS_RET(DoTaskSink(), "Task sink failed"); | ||||
GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); | GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); | ||||
@@ -825,7 +847,6 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
typedef Status (DavinciModel::*OpDescCall)(const OpDescPtr &); | typedef Status (DavinciModel::*OpDescCall)(const OpDescPtr &); | ||||
static std::map<std::string, OpDescCall> op_desc_handle = { | static std::map<std::string, OpDescCall> op_desc_handle = { | ||||
{VARIABLE, &DavinciModel::InitVariable}, | |||||
{CONSTANTOP, &DavinciModel::InitConstant}, | {CONSTANTOP, &DavinciModel::InitConstant}, | ||||
{STREAMACTIVE, &DavinciModel::InitStreamActive}, | {STREAMACTIVE, &DavinciModel::InitStreamActive}, | ||||
{STREAMSWITCH, &DavinciModel::InitStreamSwitch}, | {STREAMSWITCH, &DavinciModel::InitStreamSwitch}, | ||||
@@ -836,15 +857,13 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
vector<OpDescPtr> output_op_list; | vector<OpDescPtr> output_op_list; | ||||
map<uint32_t, OpDescPtr> data_by_index; | map<uint32_t, OpDescPtr> data_by_index; | ||||
map<string, OpDescPtr> variable_by_name; | |||||
auto nodes = compute_graph->GetAllNodes(); | auto nodes = compute_graph->GetAllNodes(); | ||||
const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); | const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); | ||||
for (size_t i = 0; i < nodes.size(); ++i) { | for (size_t i = 0; i < nodes.size(); ++i) { | ||||
auto node = nodes.at(i); | |||||
auto op_desc = node->GetOpDesc(); | |||||
if (op_desc == nullptr) { | |||||
GELOGE(PARAM_INVALID, "op_desc is null."); | |||||
return PARAM_INVALID; | |||||
} | |||||
const auto &node = nodes.at(i); | |||||
const auto &op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
op_list_[op_desc->GetId()] = op_desc; | op_list_[op_desc->GetId()] = op_desc; | ||||
@@ -873,6 +892,14 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
continue; | continue; | ||||
} | } | ||||
if (op_desc->GetType() == VARIABLE) { | |||||
if (InitVariable(op_desc, variable_by_name) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "Variable init failed, Name: %s", op_desc->GetName().c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
continue; | |||||
} | |||||
auto it = op_desc_handle.find(op_desc->GetType()); | auto it = op_desc_handle.find(op_desc->GetType()); | ||||
if (it != op_desc_handle.end()) { | if (it != op_desc_handle.end()) { | ||||
if ((this->*it->second)(op_desc) != SUCCESS) { | if ((this->*it->second)(op_desc) != SUCCESS) { | ||||
@@ -907,17 +934,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
} | } | ||||
GE_TIMESTAMP_RESTART(InitTbeHandle); | GE_TIMESTAMP_RESTART(InitTbeHandle); | ||||
uint32_t run_mode = static_cast<uint32_t>(domi::ImplyType::INVALID); | |||||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, run_mode) && | |||||
run_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) { | |||||
// Skip no_task operator, such as concat and split. | |||||
bool attr_notask = false; | |||||
bool get_attr_notask_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask); | |||||
GE_IF_BOOL_EXEC(get_attr_notask_flag && attr_notask, | |||||
GELOGI("Node[name:%s, type:%s] does not generate task, skip initialization.", | |||||
op_desc->GetName().c_str(), op_desc->GetType().c_str()); | |||||
continue;); | |||||
if (IsTbeTask(op_desc)) { | |||||
Status status = InitTbeHandle(op_desc); | Status status = InitTbeHandle(op_desc); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "TBE init failed. %s", op_desc->GetName().c_str()); | GELOGE(status, "TBE init failed. %s", op_desc->GetName().c_str()); | ||||
@@ -927,6 +944,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
GE_TIMESTAMP_ADD(InitTbeHandle); | GE_TIMESTAMP_ADD(InitTbeHandle); | ||||
} | } | ||||
SetDataDumperArgs(compute_graph, variable_by_name); | |||||
GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | ||||
GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | ||||
return GenInputOutputInfo(data_by_index, output_op_list); | return GenInputOutputInfo(data_by_index, output_op_list); | ||||
@@ -1405,8 +1423,23 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DavinciModel::InitVariable(const OpDescPtr &op_desc) { | |||||
variable_op_list_.push_back(op_desc); | |||||
Status DavinciModel::InitVariable(const OpDescPtr &op_desc, map<string, OpDescPtr> &variable_by_name) { | |||||
if (op_desc->GetName() == NODE_NAME_GLOBAL_STEP) { | |||||
const auto output_sizes = ModelUtils::GetOutputSize(op_desc); | |||||
if (!output_sizes.empty()) { | |||||
global_step_size_ = output_sizes[0]; | |||||
} | |||||
const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc); | |||||
if (!output_addrs.empty()) { | |||||
global_step_addr_ = output_addrs[0]; | |||||
} | |||||
} | |||||
if (op_desc->HasAttr(VAR_ATTR_VAR_IS_BROADCAST)) { | |||||
broadcast_variable_[op_desc->GetName()] = op_desc->GetOutputDesc(0); | |||||
} | |||||
variable_by_name[op_desc->GetName()] = op_desc; | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -2119,25 +2152,16 @@ Status DavinciModel::SyncVarData() { | |||||
GELOGI("Sync var data, model id:%u", model_id_); | GELOGI("Sync var data, model id:%u", model_id_); | ||||
Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
OpDescPtr global_step = GetVariableOp(NODE_NAME_GLOBAL_STEP); | |||||
if (global_step != nullptr) { | |||||
auto v_output_size = ModelUtils::GetOutputSize(global_step); | |||||
auto v_output_addr = ModelUtils::GetOutputDataAddrs(runtime_param_, global_step); | |||||
if (v_output_size.empty() || v_output_addr.empty()) { | |||||
GELOGE(PARAM_INVALID, "global step op:%s not set output", global_step->GetName().c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
std::vector<uint64_t> v_step; | |||||
v_step.push_back(iterator_count_); | |||||
GE_CHK_RT_RET(rtMemcpy(v_output_addr[0], v_output_size[0], v_step.data(), v_step.size() * sizeof(uint64_t), | |||||
if (global_step_addr_ != nullptr && global_step_size_ != 0) { | |||||
const vector<uint64_t> v_step = { iterator_count_ }; | |||||
GE_CHK_RT_RET(rtMemcpy(global_step_addr_, global_step_size_, v_step.data(), v_step.size() * sizeof(uint64_t), | |||||
RT_MEMCPY_HOST_TO_DEVICE)); | RT_MEMCPY_HOST_TO_DEVICE)); | ||||
} | } | ||||
for (auto op_desc : variable_op_list_) { | |||||
ret = | |||||
VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); | |||||
for (const auto &item : broadcast_variable_) { | |||||
ret = VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, item.first, item.second, mem_base_); | |||||
GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | ||||
op_desc->GetName().c_str()); | |||||
item.first.c_str()); | |||||
} | } | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -2622,11 +2646,11 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b | |||||
/// | /// | ||||
Status DavinciModel::ReturnNoOutput(uint32_t data_id) { | Status DavinciModel::ReturnNoOutput(uint32_t data_id) { | ||||
GELOGI("ReturnNoOutput model id:%u", model_id_); | GELOGI("ReturnNoOutput model id:%u", model_id_); | ||||
for (auto op_desc : variable_op_list_) { | |||||
for (const auto item : broadcast_variable_) { | |||||
Status ret = VarManager::Instance(session_id_) | Status ret = VarManager::Instance(session_id_) | ||||
->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); | |||||
->SyncBroadCastData2Var(runtime_param_.graph_id, item.first, item.second, mem_base_); | |||||
GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | ||||
op_desc->GetName().c_str()); | |||||
item.first.c_str()); | |||||
} | } | ||||
GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); | GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); | ||||
@@ -3921,11 +3945,11 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { | |||||
void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) { | |||||
data_dumper_.SetModelName(name_); | data_dumper_.SetModelName(name_); | ||||
data_dumper_.SetModelId(model_id_); | data_dumper_.SetModelId(model_id_); | ||||
data_dumper_.SetOmName(om_name_); | data_dumper_.SetOmName(om_name_); | ||||
data_dumper_.SetComputeGraph(compute_graph); | |||||
data_dumper_.SetComputeGraph(graph); | |||||
data_dumper_.SetRefInfo(saved_task_addrs_); | data_dumper_.SetRefInfo(saved_task_addrs_); | ||||
data_dumper_.SetL1FusionAddr(l1_fusion_addr_); | data_dumper_.SetL1FusionAddr(l1_fusion_addr_); | ||||
@@ -3938,22 +3962,23 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { | |||||
data_dumper_.SetDeviceId(device_id); | data_dumper_.SetDeviceId(device_id); | ||||
// set loop count addr | // set loop count addr | ||||
auto get_var_addr = [](const OpDescPtr &op, const RuntimeParam &runtime_param) -> void *{ | |||||
if (op != nullptr) { | |||||
auto v_output_size = ModelUtils::GetOutputSize(op); | |||||
auto v_output_addr = ModelUtils::GetOutputDataAddrs(runtime_param, op); | |||||
if (v_output_size.empty() || v_output_addr.empty()) { | |||||
auto get_var_addr = [&](const string &name) -> void *{ | |||||
const auto it = variable_by_name.find(name); | |||||
if (it != variable_by_name.end()) { | |||||
const auto output_sizes = ModelUtils::GetOutputSize(it->second); | |||||
const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, it->second); | |||||
if (output_sizes.empty() || output_addrs.empty()) { | |||||
return nullptr; | return nullptr; | ||||
} | } | ||||
return v_output_addr[0]; | |||||
return output_addrs[0]; | |||||
} | } | ||||
GELOGD("op is null."); | |||||
GELOGD("op: %s is null.", name.c_str()); | |||||
return nullptr; | return nullptr; | ||||
}; | }; | ||||
data_dumper_.SetLoopAddr(get_var_addr(GetVariableOp(NODE_NAME_GLOBAL_STEP), runtime_param_), | |||||
get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), runtime_param_), | |||||
get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_COND), runtime_param_)); | |||||
data_dumper_.SetLoopAddr(get_var_addr(NODE_NAME_GLOBAL_STEP), | |||||
get_var_addr(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), | |||||
get_var_addr(NODE_NAME_FLOWCTRL_LOOP_COND)); | |||||
} | } | ||||
uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { | uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { | ||||
@@ -268,14 +268,7 @@ class DavinciModel { | |||||
return op_list_.at(index); | return op_list_.at(index); | ||||
} | } | ||||
OpDescPtr GetVariableOp(const string &name) { | |||||
for (auto op_desc : variable_op_list_) { | |||||
if (op_desc != nullptr && op_desc->GetName() == name) { | |||||
return op_desc; | |||||
} | |||||
} | |||||
return nullptr; | |||||
} | |||||
void *GetGlobalStep() const { return global_step_addr_; } | |||||
// get task info for profiling | // get task info for profiling | ||||
const vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; } | const vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; } | ||||
@@ -689,7 +682,7 @@ class DavinciModel { | |||||
/// | /// | ||||
Status InitConstant(const OpDescPtr &op_desc); | Status InitConstant(const OpDescPtr &op_desc); | ||||
Status InitVariable(const OpDescPtr &op_desc); | |||||
Status InitVariable(const OpDescPtr &op_desc, map<string, OpDescPtr> &variable_by_name); | |||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief LabelSet Op Initialize. | /// @brief LabelSet Op Initialize. | ||||
@@ -828,7 +821,7 @@ class DavinciModel { | |||||
// get desc info of graph for profiling | // get desc info of graph for profiling | ||||
Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info); | ||||
void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); | |||||
void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name); | |||||
Status InitModelProfile(); | Status InitModelProfile(); | ||||
Status SinkModelProfile(); | Status SinkModelProfile(); | ||||
@@ -877,7 +870,9 @@ class DavinciModel { | |||||
map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init | map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init | ||||
vector<OpDescPtr> variable_op_list_; | |||||
map<string, GeTensorDesc> broadcast_variable_; | |||||
void *global_step_addr_{nullptr}; | |||||
uint64_t global_step_size_{0}; | |||||
map<uint32_t, ZeroCopyOffset> new_input_data_info_; | map<uint32_t, ZeroCopyOffset> new_input_data_info_; | ||||
map<uint32_t, ZeroCopyOffset> new_output_data_info_; | map<uint32_t, ZeroCopyOffset> new_output_data_info_; | ||||
@@ -78,14 +78,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_); | op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_); | ||||
// 2.1 get loop cond variable for tensor array write | // 2.1 get loop cond variable for tensor array write | ||||
uint64_t step_id_addr = 0; | |||||
OpDescPtr step_id_node = davinci_model_->GetVariableOp(NODE_NAME_GLOBAL_STEP); | |||||
if (step_id_node != nullptr) { | |||||
vector<void *> v_step_id_addr = ModelUtils::GetOutputDataAddrs(rts_param, step_id_node); | |||||
if (!v_step_id_addr.empty()) { | |||||
step_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(v_step_id_addr[0])); | |||||
} | |||||
} | |||||
uint64_t step_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model_->GetGlobalStep())); | |||||
auto session_id = davinci_model_->GetSessionId(); | auto session_id = davinci_model_->GetSessionId(); | ||||
fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; | fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; | ||||
@@ -183,51 +183,32 @@ ge::Status VarResource::GetBroadCastInfo(uint32_t graph_id, const string &var_na | |||||
} | } | ||||
ge::Status VarResource::SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | ge::Status VarResource::SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | ||||
const ge::ConstOpDescPtr &var_op_desc, uint8_t *base_ptr) { | |||||
if (var_op_desc == nullptr) { | |||||
GELOGE(FAILED, "[SyncVarData2BroadCast] var opdesc is null!"); | |||||
return FAILED; | |||||
} | |||||
const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
GE_CHECK_NOTNULL(base_ptr); | GE_CHECK_NOTNULL(base_ptr); | ||||
GELOGI("SyncVarData2BroadCast graph_id: %u, var_name: %s.", graph_id, var_name.c_str()); | GELOGI("SyncVarData2BroadCast graph_id: %u, var_name: %s.", graph_id, var_name.c_str()); | ||||
VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | ||||
uint8_t *dst_addr = base_ptr + var_broadcast_info.input_offset; | uint8_t *dst_addr = base_ptr + var_broadcast_info.input_offset; | ||||
ge::GeTensorDesc var_tensor_desc = var_op_desc->GetOutputDesc(0); | |||||
return ge::TransVarDataUtils::SyncVarData2BroadCast(var_name, var_tensor_desc, dst_addr, | return ge::TransVarDataUtils::SyncVarData2BroadCast(var_name, var_tensor_desc, dst_addr, | ||||
var_broadcast_info.input_size, session_id_); | var_broadcast_info.input_size, session_id_); | ||||
} | } | ||||
ge::Status VarResource::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ge::Status VarResource::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ||||
const ge::ConstOpDescPtr &var_op_desc, uint8_t *base_ptr) { | |||||
const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
GELOGI("SyncBroadCastData2Var var_name: %s", var_name.c_str()); | GELOGI("SyncBroadCastData2Var var_name: %s", var_name.c_str()); | ||||
GE_CHECK_NOTNULL(var_op_desc); | |||||
string var_is_broadcast; | |||||
bool is_broadcast = AttrUtils::GetStr(var_op_desc, VAR_ATTR_VAR_IS_BROADCAST, var_is_broadcast); | |||||
if (!is_broadcast) { | |||||
return SUCCESS; | |||||
} | |||||
VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | VarBroadCastInfo var_broadcast_info = var_broad_cast_info_[graph_id][var_name]; | ||||
// subgraph base_ptr could be nullptr, task it as base 0 | // subgraph base_ptr could be nullptr, task it as base 0 | ||||
uint8_t *dst_addr = base_ptr + var_broadcast_info.output_offset; | uint8_t *dst_addr = base_ptr + var_broadcast_info.output_offset; | ||||
ge::GeTensorDesc var_tensor_desc = var_op_desc->GetOutputDesc(0); | |||||
return ge::TransVarDataUtils::SyncBroadCastData2Var(dst_addr, var_broadcast_info.output_size, var_name, | return ge::TransVarDataUtils::SyncBroadCastData2Var(dst_addr, var_broadcast_info.output_size, var_name, | ||||
var_tensor_desc, session_id_); | var_tensor_desc, session_id_); | ||||
} | } | ||||
ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_name, | ge::Status VarResource::SyncVarData(uint32_t graph_id, const std::string &var_name, | ||||
const ge::ConstOpDescPtr &var_op_desc, uint8_t *base_ptr) { | |||||
GE_CHECK_NOTNULL(var_op_desc); | |||||
string var_is_broadcast; | |||||
bool is_broadcast = AttrUtils::GetStr(var_op_desc, VAR_ATTR_VAR_IS_BROADCAST, var_is_broadcast); | |||||
if (!is_broadcast) { | |||||
return SUCCESS; | |||||
} | |||||
return SyncVarData2BroadCast(graph_id, var_name, var_op_desc, base_ptr); | |||||
const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
return SyncVarData2BroadCast(graph_id, var_name, var_tensor_desc, base_ptr); | |||||
} | } | ||||
bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; } | bool VarResource::IsVarAddr(const int64_t &offset) { return var_offset_set_.count(offset) > 0; } | ||||
@@ -570,14 +551,14 @@ bool VarManager::IsVarExist(const std::string &var_name) { | |||||
return var_resource_->IsVarExist(var_name); | return var_resource_->IsVarExist(var_name); | ||||
} | } | ||||
ge::Status VarManager::SyncVarData(uint32_t graph_id, const std::string &var_name, ge::ConstOpDescPtr var_op_desc, | |||||
ge::Status VarManager::SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
uint8_t *base_ptr) { | uint8_t *base_ptr) { | ||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
if (var_resource_ == nullptr) { | if (var_resource_ == nullptr) { | ||||
GELOGW("VarManager has not been init."); | GELOGW("VarManager has not been init."); | ||||
return ge::INTERNAL_ERROR; | return ge::INTERNAL_ERROR; | ||||
} | } | ||||
return var_resource_->SyncVarData(graph_id, var_name, std::move(var_op_desc), base_ptr); | |||||
return var_resource_->SyncVarData(graph_id, var_name, var_tensor_desc, base_ptr); | |||||
} | } | ||||
ge::Status VarManager::GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc) { | ge::Status VarManager::GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc) { | ||||
@@ -630,13 +611,13 @@ ge::Status VarManager::RenewCurVarDesc(const std::string &var_name, ge::OpDescPt | |||||
} | } | ||||
ge::Status VarManager::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ge::Status VarManager::SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ||||
ge::ConstOpDescPtr var_op_desc, uint8_t *base_ptr) { | |||||
const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr) { | |||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
if (var_resource_ == nullptr) { | if (var_resource_ == nullptr) { | ||||
GELOGW("VarManager has not been init."); | GELOGW("VarManager has not been init."); | ||||
return ge::INTERNAL_ERROR; | return ge::INTERNAL_ERROR; | ||||
} | } | ||||
return var_resource_->SyncBroadCastData2Var(graph_id, var_name, std::move(var_op_desc), base_ptr); | |||||
return var_resource_->SyncBroadCastData2Var(graph_id, var_name, var_tensor_desc, base_ptr); | |||||
} | } | ||||
bool VarManager::IsVarAddr(const int64_t &offset) { | bool VarManager::IsVarAddr(const int64_t &offset) { | ||||
@@ -119,12 +119,12 @@ class VarResource { | |||||
ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ||||
ge::Status SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | ge::Status SyncVarData2BroadCast(uint32_t graph_id, const std::string &var_name, | ||||
const ge::ConstOpDescPtr &var_op_desc, uint8_t *base_ptr); | |||||
const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr); | |||||
ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, | ||||
const ge::ConstOpDescPtr &var_op_desc, uint8_t *base_ptr); | |||||
const GeTensorDesc &var_tensor_desc, uint8_t *base_ptr); | |||||
ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, const ge::ConstOpDescPtr &var_op_desc, | |||||
ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
uint8_t *base_ptr); | uint8_t *base_ptr); | ||||
Status SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { | Status SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { | ||||
@@ -215,14 +215,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { | |||||
ge::Status GetVarAddr(const std::string &var_name, const ge::GeTensorDesc &tensor_desc, uint8_t **dev_ptr); | ge::Status GetVarAddr(const std::string &var_name, const ge::GeTensorDesc &tensor_desc, uint8_t **dev_ptr); | ||||
ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, ge::ConstOpDescPtr var_op_desc, | |||||
ge::Status SyncVarData(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
uint8_t *base_ptr); | uint8_t *base_ptr); | ||||
ge::Status SaveBroadCastInfo(uint32_t graph_id, const VarBroadCastInfo &broad_cast_info); | ge::Status SaveBroadCastInfo(uint32_t graph_id, const VarBroadCastInfo &broad_cast_info); | ||||
ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); | ||||
ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, ge::ConstOpDescPtr var_op_desc, | |||||
ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, const GeTensorDesc &var_tensor_desc, | |||||
uint8_t *base_ptr); | uint8_t *base_ptr); | ||||
ge::Status GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc); | ge::Status GetCurVarDesc(const std::string &var_name, ge::GeTensorDesc &tensor_desc); | ||||
@@ -306,8 +306,12 @@ TEST_F(UtestDavinciModel, init_unknown) { | |||||
EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); | EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); | ||||
} | } | ||||
TEST_F(UtestDavinciModel, ReturnNoOutput_test) { | |||||
TEST_F(UtestDavinciModel, Init_variable_op) { | |||||
DavinciModel model(0, nullptr); | DavinciModel model(0, nullptr); | ||||
model.ge_model_ = make_shared<GeModel>(); | |||||
model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
model.runtime_param_.mem_size = 5120000; | |||||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | ||||
TensorUtils::SetSize(tensor, 512); | TensorUtils::SetSize(tensor, 512); | ||||
@@ -317,27 +321,19 @@ TEST_F(UtestDavinciModel, ReturnNoOutput_test) { | |||||
var1->AddOutputDesc(tensor); | var1->AddOutputDesc(tensor); | ||||
var1->SetInputOffset({1024}); | var1->SetInputOffset({1024}); | ||||
var1->SetOutputOffset({1024}); | var1->SetOutputOffset({1024}); | ||||
AttrUtils::SetBool(var1, VAR_ATTR_VAR_IS_BROADCAST, true); | |||||
graph->AddNode(var1); | |||||
model.variable_op_list_.push_back(var1); | |||||
OpDescPtr var2 = CreateOpDesc(NODE_NAME_GLOBAL_STEP, VARIABLE); | |||||
var2->AddInputDesc(tensor); | |||||
var2->AddOutputDesc(tensor); | |||||
var2->SetInputOffset({1024}); | |||||
var2->SetOutputOffset({1024}); | |||||
graph->AddNode(var2); | |||||
EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); | EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); | ||||
} | |||||
TEST_F(UtestDavinciModel, SyncVarData_test) { | |||||
DavinciModel model(0, nullptr); | |||||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
TensorUtils::SetSize(tensor, 512); | |||||
OpDescPtr var1 = CreateOpDesc("var1", VARIABLE); | |||||
var1->AddInputDesc(tensor); | |||||
var1->AddOutputDesc(tensor); | |||||
var1->SetInputOffset({1024}); | |||||
var1->SetOutputOffset({1024}); | |||||
model.variable_op_list_.push_back(var1); | |||||
EXPECT_NE(model.SyncVarData(), SUCCESS); | EXPECT_NE(model.SyncVarData(), SUCCESS); | ||||
} | } | ||||
@@ -378,7 +374,7 @@ TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ2) { | |||||
GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), case1_node->GetInDataAnchor(0)); | GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), case1_node->GetInDataAnchor(0)); | ||||
GraphUtils::AddEdge(case1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | GraphUtils::AddEdge(case1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | ||||
(void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1;2;4;8"); | (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1;2;4;8"); | ||||
(void)AttrUtils::SetBool(case_node, ATTR_INSERT_BY_MBATCH, true); | (void)AttrUtils::SetBool(case_node, ATTR_INSERT_BY_MBATCH, true); | ||||
@@ -64,10 +64,6 @@ TEST_F(UtestKernelExTaskInfo, success_kernel_ex_task_init) { | |||||
string value1(arg_size, 'a'); | string value1(arg_size, 'a'); | ||||
kernel_ex_def->set_args_size(arg_size); | kernel_ex_def->set_args_size(arg_size); | ||||
kernel_ex_def->set_args(value1); | kernel_ex_def->set_args(value1); | ||||
OpDescPtr v_op_desc = CreateOpDesc("ge_global_step", "Variable"); | |||||
model.variable_op_list_.push_back(v_op_desc); | |||||
model.op_list_[0]->SetWorkspace({100331008}); // offset | |||||
model.op_list_[0]->SetWorkspaceBytes({150}); // length | |||||
EXPECT_EQ(kernel_ex_task_info.Init(task_def, &model), FAILED); | EXPECT_EQ(kernel_ex_task_info.Init(task_def, &model), FAILED); | ||||