From: @shenwei41 Reviewed-by: @lilongfei15,@liucunwei Signed-off-by: @lilongfei15,@liucunweitags/v1.2.0
| @@ -96,7 +96,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf | |||||
| dump_mode = dump_config.dump_mode; | dump_mode = dump_config.dump_mode; | ||||
| GELOGI("Dump mode is %s", dump_mode.c_str()); | GELOGI("Dump mode is %s", dump_mode.c_str()); | ||||
| dump_properties.SetDumpMode(dump_mode); | dump_properties.SetDumpMode(dump_mode); | ||||
| dump_properties_map_.emplace(kInferSessionId, dump_properties); | |||||
| dump_properties_map_[kInferSessionId] = dump_properties; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include "common/ge/datatype_util.h" | #include "common/ge/datatype_util.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/util.h" | #include "framework/common/util.h" | ||||
| #include "framework/common/types.h" | |||||
| #include "graph/anchor.h" | #include "graph/anchor.h" | ||||
| #include "graph/ge_tensor.h" | #include "graph/ge_tensor.h" | ||||
| #include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
| @@ -55,8 +56,10 @@ void DumpOp::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond | |||||
| loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond); | loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond); | ||||
| } | } | ||||
| void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id) { | |||||
| void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, | |||||
| uint32_t dynamic_model_id) { | |||||
| dynamic_model_name_ = dynamic_model_name; | dynamic_model_name_ = dynamic_model_name; | ||||
| dynamic_om_name_ = dynamic_om_name; | |||||
| dynamic_model_id_ = dynamic_model_id; | dynamic_model_id_ = dynamic_model_id; | ||||
| } | } | ||||
| @@ -200,6 +203,32 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DumpOp::SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||||
| if (dynamic_model_name_.empty() && dynamic_om_name_.empty()) { | |||||
| GELOGI("Single op dump, no need set model name"); | |||||
| return SUCCESS; | |||||
| } | |||||
| std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | |||||
| bool not_find_by_omname = model_list.find(dynamic_om_name_) == model_list.end(); | |||||
| bool not_find_by_modelname = model_list.find(dynamic_model_name_) == model_list.end(); | |||||
| std::string dump_model_name = not_find_by_omname ? dynamic_model_name_ : dynamic_om_name_; | |||||
| if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { | |||||
| if (not_find_by_omname && not_find_by_modelname) { | |||||
| std::string model_list_str; | |||||
| for (auto &model : model_list) { | |||||
| model_list_str += "[" + model + "]."; | |||||
| } | |||||
| GELOGW("Model %s will not be set to dump, dump list: %s", dump_model_name.c_str(), model_list_str.c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| if (!dump_model_name.empty() && dump_properties_.IsDumpOpen()) { | |||||
| GELOGD("Dump model name is %s", dump_model_name.c_str()); | |||||
| op_mapping_info.set_model_name(dump_model_name); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DumpOp::LaunchDumpOp() { | Status DumpOp::LaunchDumpOp() { | ||||
| GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); | GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); | ||||
| int32_t device_id = 0; | int32_t device_id = 0; | ||||
| @@ -209,8 +238,7 @@ Status DumpOp::LaunchDumpOp() { | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| if (device_id < 0) { | if (device_id < 0) { | ||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, | |||||
| "Check device_id failed, device_id = %d, which should be not less than 0.", | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Check device_id failed, device_id = %d, which should be not less than 0.", | |||||
| device_id); | device_id); | ||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | return ACL_ERROR_GE_INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -220,11 +248,12 @@ Status DumpOp::LaunchDumpOp() { | |||||
| op_mapping_info.set_flag(kAicpuLoadFlag); | op_mapping_info.set_flag(kAicpuLoadFlag); | ||||
| op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | ||||
| op_mapping_info.set_model_id(dynamic_model_id_); | op_mapping_info.set_model_id(dynamic_model_id_); | ||||
| if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) { | |||||
| op_mapping_info.set_model_name(dynamic_model_name_); | |||||
| if (SetDumpModelName(op_mapping_info) != SUCCESS) { | |||||
| return SUCCESS; | |||||
| } | } | ||||
| SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | ||||
| GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||||
| GELOGI("Dump step is %s ,dump path is %s in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||||
| dump_path.c_str()); | dump_path.c_str()); | ||||
| uint32_t task_id = 0; | uint32_t task_id = 0; | ||||
| uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
| @@ -273,4 +302,4 @@ Status DumpOp::LaunchDumpOp() { | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namesapce ge | |||||
| } // namespace ge | |||||
| @@ -34,12 +34,13 @@ class DumpOp { | |||||
| vector<uintptr_t> output_addrs, rtStream_t stream); | vector<uintptr_t> output_addrs, rtStream_t stream); | ||||
| Status LaunchDumpOp(); | Status LaunchDumpOp(); | ||||
| void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); | void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); | ||||
| void SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id); | |||||
| void SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, uint32_t dynamic_model_id); | |||||
| private: | private: | ||||
| Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info); | Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info); | ||||
| Status DumpOutput(aicpu::dump::Task &task); | Status DumpOutput(aicpu::dump::Task &task); | ||||
| Status DumpInput(aicpu::dump::Task &task); | Status DumpInput(aicpu::dump::Task &task); | ||||
| Status SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info); | |||||
| DumpProperties dump_properties_; | DumpProperties dump_properties_; | ||||
| OpDescPtr op_desc_; | OpDescPtr op_desc_; | ||||
| @@ -54,6 +55,7 @@ class DumpOp { | |||||
| uintptr_t loop_cond_; | uintptr_t loop_cond_; | ||||
| std::string dynamic_model_name_; | std::string dynamic_model_name_; | ||||
| std::string dynamic_om_name_; | |||||
| std::uint32_t dynamic_model_id_; | std::uint32_t dynamic_model_id_; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -35,14 +35,14 @@ const std::string kDumpStatusOpen = "on"; | |||||
| const uint32_t kAicoreOverflow = (0x1 << 0); | const uint32_t kAicoreOverflow = (0x1 << 0); | ||||
| const uint32_t kAtomicOverflow = (0x1 << 1); | const uint32_t kAtomicOverflow = (0x1 << 1); | ||||
| const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | ||||
| } | |||||
| } // namespace | |||||
| namespace ge { | namespace ge { | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { | ||||
| CopyFrom(other); | CopyFrom(other); | ||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( | ||||
| const DumpProperties &other) { | |||||
| const DumpProperties &other) { | |||||
| CopyFrom(other); | CopyFrom(other); | ||||
| return *this; | return *this; | ||||
| } | } | ||||
| @@ -97,7 +97,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||||
| // The following is the new dump scenario of the fusion operator | // The following is the new dump scenario of the fusion operator | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( | ||||
| const std::string &model, const std::set<std::string> &layers) { | |||||
| const std::string &model, const std::set<std::string> &layers) { | |||||
| for (const std::string &layer : layers) { | for (const std::string &layer : layers) { | ||||
| GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); | GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); | ||||
| } | } | ||||
| @@ -138,7 +138,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope | |||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue( | ||||
| const std::string &model) const { | |||||
| const std::string &model) const { | |||||
| auto iter = model_dump_properties_map_.find(model); | auto iter = model_dump_properties_map_.find(model); | ||||
| if (iter != model_dump_properties_map_.end()) { | if (iter != model_dump_properties_map_.end()) { | ||||
| return iter->second; | return iter->second; | ||||
| @@ -147,8 +147,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope | |||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( | ||||
| const std::string &model, const std::string &om_name, const std::string &op_name) const { | |||||
| const std::string &model, const std::string &om_name, const std::string &op_name) const { | |||||
| // if dump all | // if dump all | ||||
| GELOGD("model name is %s om name is %s op is %s in layer need dump", model.c_str(), om_name.c_str(), op_name.c_str()); | |||||
| if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { | if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -203,7 +204,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti | |||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( | ||||
| const std::string &dump_op_switch) { | |||||
| const std::string &dump_op_switch) { | |||||
| dump_op_switch_ = dump_op_switch; | dump_op_switch_ = dump_op_switch; | ||||
| } | } | ||||
| @@ -270,4 +271,4 @@ void DumpProperties::SetDumpDebugOptions() { | |||||
| GELOGI("ge.exec.enableDumpDebug is false or is not set."); | GELOGI("ge.exec.enableDumpDebug is false or is not set."); | ||||
| } | } | ||||
| } | } | ||||
| } // namespace | |||||
| } // namespace ge | |||||
| @@ -15,6 +15,8 @@ | |||||
| */ | */ | ||||
| #include "common/tbe_kernel_store.h" | #include "common/tbe_kernel_store.h" | ||||
| #include "graph/utils/attr_utils.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| namespace ge { | namespace ge { | ||||
| @@ -31,6 +33,15 @@ void TBEKernelStore::LoadTBEKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc> | |||||
| GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin), | GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin), | ||||
| GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");) | GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");) | ||||
| GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | ||||
| std::string atomic_kernel_name; | |||||
| (void) AttrUtils::GetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, atomic_kernel_name); | |||||
| if (!atomic_kernel_name.empty()) { | |||||
| GELOGI("Get atomic kernel name is %s.", atomic_kernel_name.c_str()); | |||||
| auto atomic_kernel_bin = FindKernel(atomic_kernel_name); | |||||
| GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, atomic_kernel_bin), | |||||
| GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for atomic kernel_bin failed");) | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -67,6 +67,9 @@ bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | |||||
| } | } | ||||
| return false; | return false; | ||||
| } | } | ||||
| bool IsOptional(const ge::GeTensorDesc &tensor_desc) { | |||||
| return tensor_desc.GetFormat() == ge::FORMAT_RESERVED && tensor_desc.GetDataType() == ge::DT_UNDEFINED; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||
| @@ -154,7 +157,7 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty | |||||
| } | } | ||||
| static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, | static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, | ||||
| bool attr) { | |||||
| bool attr, int32_t &data_index) { | |||||
| GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | ||||
| GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | ||||
| @@ -197,9 +200,10 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const | |||||
| "[Add][InputDesc]fail for node:%s", data_op->GetName().c_str()); | "[Add][InputDesc]fail for node:%s", data_op->GetName().c_str()); | ||||
| GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | ||||
| "[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str()); | "[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str()); | ||||
| if (attr) { | |||||
| GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, | |||||
| if (attr && !is_const) { | |||||
| GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, data_index), return FAILED, | |||||
| "[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str()); | "[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str()); | ||||
| ++data_index; | |||||
| } | } | ||||
| ge::NodePtr arg_node = graph->AddNode(data_op); | ge::NodePtr arg_node = graph->AddNode(data_op); | ||||
| @@ -691,6 +695,34 @@ namespace { | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| bool CheckNoAicore(const ComputeGraphPtr &graph) { | |||||
| for (const auto &node : graph->GetDirectNode()) { | |||||
| if (node == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| if (op_desc->GetOpEngineName() == kAIcoreEngine) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } | |||||
| void GeGenerator::RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | |||||
| for (auto &input : inputs) { | |||||
| GeTensorDesc input_desc = input.GetTensorDesc(); | |||||
| bool is_const = false; | |||||
| (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); | |||||
| bool is_optional = IsOptional(input_desc); | |||||
| if (!is_optional && !is_const) { | |||||
| outputs.emplace_back(input); | |||||
| } | |||||
| } | |||||
| } | } | ||||
| Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
| @@ -757,7 +789,9 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| GELOGI("ATC parser success in single op build."); | GELOGI("ATC parser success in single op build."); | ||||
| GeRootModelPtr ge_root_model = nullptr; | GeRootModelPtr ge_root_model = nullptr; | ||||
| GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); | |||||
| vector<GeTensor> data_inputs; | |||||
| RemoveConst(inputs, data_inputs); | |||||
| GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, data_inputs, ge_root_model)); | |||||
| map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | ||||
| GE_CHECK_NOTNULL(ge_root_model); | GE_CHECK_NOTNULL(ge_root_model); | ||||
| GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | ||||
| @@ -773,7 +807,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| bool all_shape = false; | bool all_shape = false; | ||||
| (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); | (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); | ||||
| if (all_shape) { | |||||
| if (all_shape && CheckNoAicore(root_graph)) { | |||||
| GELOGD("Get aicpu all_shape kernel!"); | GELOGD("Get aicpu all_shape kernel!"); | ||||
| vector<GeTensor> inputs_dynamic; | vector<GeTensor> inputs_dynamic; | ||||
| vector<GeTensor> outputs_dynamic; | vector<GeTensor> outputs_dynamic; | ||||
| @@ -840,18 +874,19 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor | |||||
| // 2. Create InputData node. | // 2. Create InputData node. | ||||
| int32_t arg_index = 0; | int32_t arg_index = 0; | ||||
| int32_t data_index = 0; | |||||
| if (inputs.empty()) { | if (inputs.empty()) { | ||||
| for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { | for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { | ||||
| GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); | GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); | ||||
| if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { | if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false)); | |||||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false, data_index)); | |||||
| arg_index++; | arg_index++; | ||||
| } | } | ||||
| } else { | } else { | ||||
| for (const auto &in_desc : inputs) { | for (const auto &in_desc : inputs) { | ||||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true)); | |||||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true, data_index)); | |||||
| arg_index++; | arg_index++; | ||||
| } | } | ||||
| } | } | ||||
| @@ -382,58 +382,6 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt | |||||
| return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); | return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); | ||||
| } | } | ||||
| static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, | |||||
| const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) { | |||||
| GE_CHECK_NOTNULL(out_anchor); | |||||
| NodePtr in_node = out_anchor->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL(in_node); | |||||
| OpDescBuilder op_desc_builder(name, MEMCPYASYNC); | |||||
| OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) | |||||
| .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) | |||||
| .Build(); | |||||
| (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); | |||||
| if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) { | |||||
| if (graph->GetGraphUnknownFlag()) { | |||||
| GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| for (auto &node : graph->GetDirectNode()) { | |||||
| // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto op_type = op_desc->GetType(); | |||||
| if (op_type == NETOUTPUT) { | |||||
| for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
| const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||||
| NodePtr in_node = peer_out_anchor->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL(in_node); | |||||
| std::string in_node_op_type = in_node->GetType(); | |||||
| if (in_node_op_type == CONSTANT) { | |||||
| GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | |||||
| std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | |||||
| if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { | |||||
| GELOGE(FAILED, "Insert memcpy between %s and %s failed.", | |||||
| in_node->GetName().c_str(), node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | ||||
| bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); | bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); | ||||
| com_graph->SetGraphUnknownFlag(false); | com_graph->SetGraphUnknownFlag(false); | ||||
| @@ -516,9 +464,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
| !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed."); | |||||
| if (sub_graph->GetGraphUnknownFlag()) { | if (sub_graph->GetGraphUnknownFlag()) { | ||||
| // unknown shape build flow | // unknown shape build flow | ||||
| GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | ||||
| @@ -574,6 +574,50 @@ Status ModelBuilder::MergeWeights() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status ModelBuilder::SaveAtomicTBEKernel(const OpDescPtr &op_desc) { | |||||
| ge::NodePtr atomic_clean_node = nullptr; | |||||
| atomic_clean_node = op_desc->TryGetExtAttr("atomic_clean_node_ptr", atomic_clean_node); | |||||
| if (atomic_clean_node == nullptr) { | |||||
| return SUCCESS; | |||||
| } | |||||
| ge::OpDescPtr atomic_op_desc = atomic_clean_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(atomic_op_desc); | |||||
| TBEKernelPtr tbe_kernel = atomic_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||||
| if (tbe_kernel == nullptr) { | |||||
| std::string kernel_name; | |||||
| GeAttrValue::BYTES kernel_buffer; | |||||
| (void) AttrUtils::GetStr(atomic_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name); | |||||
| (void) AttrUtils::GetBytes(atomic_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer); | |||||
| if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { | |||||
| GE_CHECK_NOTNULL(kernel_buffer.GetData()); | |||||
| std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); | |||||
| tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data)); | |||||
| GE_CHECK_NOTNULL(tbe_kernel); | |||||
| } | |||||
| } | |||||
| if (tbe_kernel == nullptr) { | |||||
| GELOGD("Atomic_clean_node doesn't have tbe_kernel."); | |||||
| return SUCCESS; | |||||
| } | |||||
| tbe_kernel_store_.AddTBEKernel(tbe_kernel); | |||||
| GELOGD("Atomic_clean_node tbe_kernel_name %s!", tbe_kernel->GetName().c_str()); | |||||
| (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, tbe_kernel->GetName()); | |||||
| std::string kernel_name; | |||||
| (void) AttrUtils::GetStr(atomic_op_desc, atomic_op_desc->GetName() + "_kernelname", kernel_name); | |||||
| (void) AttrUtils::SetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name); | |||||
| std::string meta_data; | |||||
| (void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_METADATA, meta_data); | |||||
| (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_METADATA, meta_data); | |||||
| std::string json_string; | |||||
| (void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_MAGIC, json_string); | |||||
| (void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_MAGIC, json_string); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | ||||
| // Add weight | // Add weight | ||||
| ge_model.SetWeight(weight_buffer_); | ge_model.SetWeight(weight_buffer_); | ||||
| @@ -607,6 +651,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||||
| } | } | ||||
| tbe_name_set.insert(tbe_kernel->GetName()); | tbe_name_set.insert(tbe_kernel->GetName()); | ||||
| tbe_kernel_store_.AddTBEKernel(tbe_kernel); | tbe_kernel_store_.AddTBEKernel(tbe_kernel); | ||||
| GE_CHK_STATUS_RET(SaveAtomicTBEKernel(node_op_desc), "[Save][TBEKernel] save atomic tbekernel failed!"); | |||||
| } | } | ||||
| SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types); | SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types); | ||||
| @@ -89,6 +89,8 @@ class ModelBuilder { | |||||
| void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types, | void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types, | ||||
| std::set<std::string> &aicpu_tf_op_types); | std::set<std::string> &aicpu_tf_op_types); | ||||
| Status SaveAtomicTBEKernel(const OpDescPtr &op_desc); | |||||
| uint64_t session_id_; | uint64_t session_id_; | ||||
| map<int64_t, size_t> mem_type_to_mem_offset_; | map<int64_t, size_t> mem_type_to_mem_offset_; | ||||
| @@ -3067,9 +3067,8 @@ Status DavinciModel::DistributeTask() { | |||||
| task_def.kernel_ex().op_index()); | task_def.kernel_ex().op_index()); | ||||
| OpDescPtr op = GetOpByIndex(op_index); | OpDescPtr op = GetOpByIndex(op_index); | ||||
| GE_CHECK_NOTNULL(op); | GE_CHECK_NOTNULL(op); | ||||
| if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) { | if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) { | ||||
| bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo(); | |||||
| bool call_dump = OpNeedDump(op->GetName()) && task->CallSaveDumpInfo(); | |||||
| if (call_dump || is_op_debug_reg_) { | if (call_dump || is_op_debug_reg_) { | ||||
| SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | ||||
| } | } | ||||
| @@ -3089,11 +3088,16 @@ Status DavinciModel::DistributeTask() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||||
| bool DavinciModel::ModelNeedDump() { | |||||
| auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | ||||
| bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); | |||||
| bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); | |||||
| if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { | |||||
| bool ret = all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||||
| all_dump_model.find(dump_model_name_) != all_dump_model.end() || | |||||
| all_dump_model.find(om_name_) != all_dump_model.end(); | |||||
| return ret; | |||||
| } | |||||
| void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||||
| if (ModelNeedDump()) { | |||||
| GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); | GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); | ||||
| data_dumper_.SaveEndGraphId(task_id, stream_id); | data_dumper_.SaveEndGraphId(task_id, stream_id); | ||||
| } | } | ||||
| @@ -3893,7 +3897,10 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) | |||||
| } | } | ||||
| void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) { | void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) { | ||||
| data_dumper_.SetModelName(name_); | |||||
| if(dump_model_name_.empty()) { | |||||
| dump_model_name_ = name_; | |||||
| } | |||||
| data_dumper_.SetModelName(dump_model_name_); | |||||
| data_dumper_.SetModelId(model_id_); | data_dumper_.SetModelId(model_id_); | ||||
| data_dumper_.SetOmName(om_name_); | data_dumper_.SetOmName(om_name_); | ||||
| data_dumper_.SetComputeGraph(graph); | data_dumper_.SetComputeGraph(graph); | ||||
| @@ -4082,7 +4089,7 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) { | |||||
| Status DavinciModel::InitL1DataDumperArgs() { | Status DavinciModel::InitL1DataDumperArgs() { | ||||
| auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | auto all_dump_model = GetDumpProperties().GetAllDumpModel(); | ||||
| bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end(); | bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end(); | ||||
| bool find_by_model_name = all_dump_model.find(name_) != all_dump_model.end(); | |||||
| bool find_by_model_name = all_dump_model.find(dump_model_name_) != all_dump_model.end(); | |||||
| bool dump_l1fusion_op = | bool dump_l1fusion_op = | ||||
| (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name; | (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name; | ||||
| if (dump_l1fusion_op) { | if (dump_l1fusion_op) { | ||||
| @@ -248,7 +248,10 @@ class DavinciModel { | |||||
| string Name() const { return name_; } | string Name() const { return name_; } | ||||
| // om_name | // om_name | ||||
| string OmName() const { return om_name_; } | |||||
| const string &OmName() const { return om_name_; } | |||||
| // dump_model_name | |||||
| const string &DumpModelName() const { return dump_model_name_; } | |||||
| // version | // version | ||||
| uint32_t Version() const { return version_; } | uint32_t Version() const { return version_; } | ||||
| @@ -483,6 +486,12 @@ class DavinciModel { | |||||
| data_dumper_.DumpShrink(); | data_dumper_.DumpShrink(); | ||||
| } | } | ||||
| bool OpNeedDump(const string &op_name) { | |||||
| return GetDumpProperties().IsLayerNeedDump(dump_model_name_, om_name_, op_name); | |||||
| } | |||||
| bool ModelNeedDump(); | |||||
| void SetEndGraphId(uint32_t task_id, uint32_t stream_id); | void SetEndGraphId(uint32_t task_id, uint32_t stream_id); | ||||
| DavinciModel &operator=(const DavinciModel &model) = delete; | DavinciModel &operator=(const DavinciModel &model) = delete; | ||||
| @@ -542,6 +551,7 @@ class DavinciModel { | |||||
| // om file name | // om file name | ||||
| void SetOmName(const string &om_name) { om_name_ = om_name; } | void SetOmName(const string &om_name) { om_name_ = om_name; } | ||||
| void SetDumpModelName(const string &dump_model_name) { dump_model_name_ = dump_model_name; } | |||||
| void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } | void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } | ||||
| const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } | const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } | ||||
| @@ -888,6 +898,7 @@ class DavinciModel { | |||||
| // used for inference data dump | // used for inference data dump | ||||
| string om_name_; | string om_name_; | ||||
| string dump_model_name_; | |||||
| uint32_t version_; | uint32_t version_; | ||||
| GeModelPtr ge_model_; // release after DavinciModel::Init | GeModelPtr ge_model_; // release after DavinciModel::Init | ||||
| @@ -271,7 +271,7 @@ ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uin | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &model_name, | |||||
| ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &om_name, | |||||
| const shared_ptr<ge::GeRootModel> &ge_root_model, | const shared_ptr<ge::GeRootModel> &ge_root_model, | ||||
| const shared_ptr<ModelListener> &listener) { | const shared_ptr<ModelListener> &listener) { | ||||
| auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); | auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); | ||||
| @@ -279,7 +279,7 @@ ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string | |||||
| hybrid_model->SetListener(listener); | hybrid_model->SetListener(listener); | ||||
| hybrid_model->SetModelId(model_id); | hybrid_model->SetModelId(model_id); | ||||
| hybrid_model->SetDeviceId(GetContext().DeviceId()); | hybrid_model->SetDeviceId(GetContext().DeviceId()); | ||||
| hybrid_model->SetModelName(model_name); | |||||
| hybrid_model->SetOmName(om_name); | |||||
| GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id); | GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id); | ||||
| auto shared_model = std::shared_ptr<hybrid::HybridDavinciModel>(hybrid_model.release()); | auto shared_model = std::shared_ptr<hybrid::HybridDavinciModel>(hybrid_model.release()); | ||||
| InsertModel(model_id, shared_model); | InsertModel(model_id, shared_model); | ||||
| @@ -309,9 +309,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||||
| GenModelId(&model_id); | GenModelId(&model_id); | ||||
| } | } | ||||
| auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | ||||
| string model_name = ""; | |||||
| string om_name; | |||||
| if (IsNeedHybridLoad(*ge_root_model)) { | if (IsNeedHybridLoad(*ge_root_model)) { | ||||
| return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener); | |||||
| return DoLoadHybridModelOnline(model_id, om_name, ge_root_model, listener); | |||||
| } | } | ||||
| mmTimespec timespec = mmGetTickCount(); | mmTimespec timespec = mmGetTickCount(); | ||||
| @@ -45,10 +45,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| Status EndGraphTaskInfo::Distribute() { | Status EndGraphTaskInfo::Distribute() { | ||||
| GELOGI("EndGraphTaskInfo Distribute Start."); | GELOGI("EndGraphTaskInfo Distribute Start."); | ||||
| GE_CHECK_NOTNULL(davinci_model_); | GE_CHECK_NOTNULL(davinci_model_); | ||||
| auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); | |||||
| if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||||
| all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || | |||||
| all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { | |||||
| if (davinci_model_->ModelNeedDump()) { | |||||
| GELOGI("Start to call rtEndGraphEx"); | GELOGI("Start to call rtEndGraphEx"); | ||||
| rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| @@ -238,8 +238,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| } | } | ||||
| void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { | void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { | ||||
| if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||||
| op_desc->GetName())) { | |||||
| if (davinci_model_->OpNeedDump(op_desc->GetName())) { | |||||
| dump_flag_ = RT_KERNEL_DUMPFLAG; | dump_flag_ = RT_KERNEL_DUMPFLAG; | ||||
| dump_args_ = addr; | dump_args_ = addr; | ||||
| } | } | ||||
| @@ -409,10 +409,7 @@ Status KernelTaskInfo::Distribute() { | |||||
| call_skt, task_id_, skt_id_, skt_info.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | call_skt, task_id_, skt_id_, skt_info.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | ||||
| // l1 fusion enable and env flag open (kCloseSkt for skt debug) | // l1 fusion enable and env flag open (kCloseSkt for skt debug) | ||||
| bool open_dump = false; | bool open_dump = false; | ||||
| auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); | |||||
| if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||||
| all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || | |||||
| all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { | |||||
| if (davinci_model_->ModelNeedDump()) { | |||||
| open_dump = true; | open_dump = true; | ||||
| } | } | ||||
| if (call_skt && (env_flag != kCloseSkt) && !open_dump) { | if (call_skt && (env_flag != kCloseSkt) && !open_dump) { | ||||
| @@ -980,8 +977,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| } | } | ||||
| void KernelTaskInfo::InitDumpTask(uint32_t offset) { | void KernelTaskInfo::InitDumpTask(uint32_t offset) { | ||||
| if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||||
| op_desc_->GetName())) { | |||||
| if (davinci_model_->OpNeedDump(op_desc_->GetName())) { | |||||
| if (IsL1FusionOp(op_desc_)) { | if (IsL1FusionOp(op_desc_)) { | ||||
| dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; | dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; | ||||
| } else { | } else { | ||||
| @@ -222,6 +222,39 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return LinkToPotentialPrecedenceNode(graph, clean_addr_node); | |||||
| } | |||||
| // Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean | |||||
| // node. We hope that atomic clean node can execute with the highest priority in the entire graph. Because of stream | |||||
| // concurrency mechanism, only placing it at the head can not ensure that priority. Therefore, we need to add control | |||||
| // edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on | |||||
| // each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the | |||||
| // successors of Data/Variable. | |||||
| Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node) { | |||||
| GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.", | |||||
| atomic_clean_node->GetName().c_str()); | |||||
| auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor(); | |||||
| GE_CHECK_NOTNULL(out_ctrl_anchor); | |||||
| for (const auto &node : graph->GetDirectNode()) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| bool need_handle = (node->GetType() == DATA || node->GetType() == VARIABLE) && node->GetInAllNodes().empty(); | |||||
| if (!need_handle) { | |||||
| continue; | |||||
| } | |||||
| auto second_nodes = node->GetOutAllNodes(); | |||||
| for (const auto &second_node : second_nodes) { | |||||
| GE_CHECK_NOTNULL(second_node); | |||||
| auto in_ctrl_anchor = second_node->GetInControlAnchor(); | |||||
| GE_CHECK_NOTNULL(in_ctrl_anchor); | |||||
| if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) { | |||||
| GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(in_ctrl_anchor)); | |||||
| GELOGD("Add control edge from %s to %s.", atomic_clean_node->GetName().c_str(), second_node->GetName().c_str()); | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -67,6 +67,14 @@ class AtomicAddrCleanPass : public GraphPass { | |||||
| */ | */ | ||||
| Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node); | Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node); | ||||
| /** | |||||
| * Link atomic clean node to all potential precedence nodes which may execute before atomic clean node | |||||
| * @param graph | |||||
| * @param atomic_clean_node | |||||
| * @return | |||||
| */ | |||||
| Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node); | |||||
| /** | /** | ||||
| * Check if this node is atomic op. | * Check if this node is atomic op. | ||||
| * @param node | * @param node | ||||
| @@ -428,7 +428,8 @@ Status AippOp::ConvertRelatedInputNameToRank() { | |||||
| if (!convert_flag) { | if (!convert_flag) { | ||||
| string error_msg = "Top name " + related_input_name + "convert rank failed, Please" | string error_msg = "Top name " + related_input_name + "convert rank failed, Please" | ||||
| " ensure top name in aipp config is the top name of data node."; | " ensure top name in aipp config is the top name of data node."; | ||||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -124,13 +124,15 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { | |||||
| if (another_item->related_input_name().empty()) { | if (another_item->related_input_name().empty()) { | ||||
| string error_msg = "Can not both set related_input_name and related_input_rank!" | string error_msg = "Can not both set related_input_name and related_input_rank!" | ||||
| " Please ensure param is the same with the first aipp config(related_input_name)."; | " Please ensure param is the same with the first aipp config(related_input_name)."; | ||||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| if (item->related_input_name() == another_item->related_input_name()) { | if (item->related_input_name() == another_item->related_input_name()) { | ||||
| string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" | string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" | ||||
| " param is different in different aipp config."; | " param is different in different aipp config."; | ||||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| } | } | ||||
| @@ -150,13 +152,15 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() { | |||||
| if (!another_item->related_input_name().empty()) { | if (!another_item->related_input_name().empty()) { | ||||
| string error_msg = "Can not both set related_input_rank and related_input_name!" | string error_msg = "Can not both set related_input_rank and related_input_name!" | ||||
| " Please ensure param is the same with the first aipp config(related_input_rank)."; | " Please ensure param is the same with the first aipp config(related_input_rank)."; | ||||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| if (item->related_input_rank() == another_item->related_input_rank()) { | if (item->related_input_rank() == another_item->related_input_rank()) { | ||||
| string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" | string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" | ||||
| " param is different in different aipp config."; | " param is different in different aipp config."; | ||||
| GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); | |||||
| GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str()); | |||||
| REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg})); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| } | } | ||||
| @@ -68,7 +68,7 @@ struct GraphExecutionContext { | |||||
| DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
| bool trace_enabled = false; | bool trace_enabled = false; | ||||
| bool dump_enabled = false; | bool dump_enabled = false; | ||||
| std::atomic_bool is_eos_; | |||||
| std::atomic_bool is_eos_{false}; | |||||
| long profiling_level = 0; | long profiling_level = 0; | ||||
| long iteration = 0; | long iteration = 0; | ||||
| void *global_step = nullptr; | void *global_step = nullptr; | ||||
| @@ -46,10 +46,6 @@ void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) { | |||||
| model_id_ = model_id; | model_id_ = model_id; | ||||
| } | } | ||||
| void HybridModelAsyncExecutor::SetModelName(const string &model_name) { | |||||
| om_name_ = model_name; | |||||
| } | |||||
| Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper> &data) { | Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper> &data) { | ||||
| GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL, | GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL, | ||||
| "Data queue is full, please call again later, model_id %u ", model_id_); | "Data queue is full, please call again later, model_id %u ", model_id_); | ||||
| @@ -51,8 +51,6 @@ class HybridModelAsyncExecutor { | |||||
| void SetModelId(uint32_t model_id); | void SetModelId(uint32_t model_id); | ||||
| void SetModelName(const string &model_name); | |||||
| Status Stop(); | Status Stop(); | ||||
| Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data); | Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data); | ||||
| @@ -97,7 +95,6 @@ class HybridModelAsyncExecutor { | |||||
| std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_; | std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_; | ||||
| std::vector<bool> is_input_dynamic_; | std::vector<bool> is_input_dynamic_; | ||||
| std::shared_ptr<ModelListener> listener_; | std::shared_ptr<ModelListener> listener_; | ||||
| string om_name_; | |||||
| DataDumper data_dumper_; | DataDumper data_dumper_; | ||||
| bool is_op_debug_reg_ = false; | bool is_op_debug_reg_ = false; | ||||
| OpdebugRegister op_debug_register_; | OpdebugRegister op_debug_register_; | ||||
| @@ -33,9 +33,6 @@ HybridModelExecutor::~HybridModelExecutor() { | |||||
| if (context_.rt_gen_context != nullptr) { | if (context_.rt_gen_context != nullptr) { | ||||
| (void) rtCtxDestroy(context_.rt_gen_context); | (void) rtCtxDestroy(context_.rt_gen_context); | ||||
| } | } | ||||
| if (context_.global_step != nullptr) { | |||||
| (void) rtFree(context_.global_step); | |||||
| } | |||||
| } | } | ||||
| Status HybridModelExecutor::Init() { | Status HybridModelExecutor::Init() { | ||||
| @@ -49,9 +46,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||||
| GELOGD("Start to execute model."); | GELOGD("Start to execute model."); | ||||
| auto root_graph_item = model_->GetRootGraphItem(); | auto root_graph_item = model_->GetRootGraphItem(); | ||||
| GE_CHECK_NOTNULL(root_graph_item); | GE_CHECK_NOTNULL(root_graph_item); | ||||
| GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | |||||
| sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | |||||
| if (context_.global_step != nullptr) { | |||||
| GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | |||||
| sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | |||||
| } | |||||
| SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); | SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); | ||||
| auto ret = ExecuteGraphInternal(executor, args); | auto ret = ExecuteGraphInternal(executor, args); | ||||
| Cleanup(); | Cleanup(); | ||||
| @@ -102,8 +100,8 @@ Status HybridModelExecutor::InitExecutionContext() { | |||||
| GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); | GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); | ||||
| GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); | ||||
| GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); | ||||
| GE_CHK_RT_RET(rtMalloc(&context_.global_step, sizeof(uint64_t), RT_MEMORY_HBM)); | |||||
| context_.global_step = model_->GetGlobalStep(); | |||||
| context_.stream = stream_; | context_.stream = stream_; | ||||
| context_.model = model_; | context_.model = model_; | ||||
| context_.is_eos_ = false; | context_.is_eos_ = false; | ||||
| @@ -136,6 +134,16 @@ Status HybridModelExecutor::ResetExecutionContext(GraphExecutionContext &context | |||||
| string ctx_id = std::to_string(context.context_id); | string ctx_id = std::to_string(context.context_id); | ||||
| RuntimeInferenceContext::DestroyContext(ctx_id); | RuntimeInferenceContext::DestroyContext(ctx_id); | ||||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | ||||
| RuntimeInferenceContext *ctx = nullptr; | |||||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); | |||||
| for (auto &host_tensor : context.model->GetHostTensors()) { | |||||
| auto node_id = host_tensor.first; | |||||
| for (const auto &output_idx_and_tensor : host_tensor.second) { | |||||
| auto output_idx = output_idx_and_tensor.first; | |||||
| GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); | |||||
| ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| @@ -38,6 +38,16 @@ Status StageExecutor::ResetExecutionContext(GraphExecutionContext &context) { | |||||
| string ctx_id = std::to_string(context.context_id); | string ctx_id = std::to_string(context.context_id); | ||||
| RuntimeInferenceContext::DestroyContext(ctx_id); | RuntimeInferenceContext::DestroyContext(ctx_id); | ||||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); | ||||
| RuntimeInferenceContext *ctx = nullptr; | |||||
| GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); | |||||
| for (auto &host_tensor : context.model->GetHostTensors()) { | |||||
| auto node_id = host_tensor.first; | |||||
| for (const auto &output_idx_and_tensor : host_tensor.second) { | |||||
| auto output_idx = output_idx_and_tensor.first; | |||||
| GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); | |||||
| ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -206,31 +206,35 @@ Status NodeDoneCallback::DumpDynamicNode() { | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(graph_context_); | |||||
| const HybridModel *model = graph_context_->model; | |||||
| GE_CHECK_NOTNULL(model); | |||||
| std::string dynamic_model_name = model->GetModelName(); | |||||
| std::string dynamic_om_name = model->GetOmName(); | |||||
| uint32_t model_id = model->GetModelId(); | |||||
| if (!context_->GetDumpProperties().IsLayerNeedDump(dynamic_model_name, dynamic_om_name, op_desc->GetName())) { | |||||
| GELOGI("[%s] is not in dump list, no need dump", op_desc->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| dump_op_.SetDynamicModelInfo(dynamic_model_name, dynamic_om_name, model_id); | |||||
| auto stream = context_->GetStream(); | auto stream = context_->GetStream(); | ||||
| vector<uintptr_t> input_addrs; | vector<uintptr_t> input_addrs; | ||||
| vector<uintptr_t> output_addrs; | vector<uintptr_t> output_addrs; | ||||
| for (int i = 0; i < context_->NumInputs(); i++) { | for (int i = 0; i < context_->NumInputs(); i++) { | ||||
| auto tensor_value = context_->GetInput(i); | auto tensor_value = context_->GetInput(i); | ||||
| GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr"); | GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr"); | ||||
| uint64_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData()); | |||||
| uintptr_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData()); | |||||
| input_addrs.emplace_back(input_addr); | input_addrs.emplace_back(input_addr); | ||||
| } | } | ||||
| for (int j = 0; j < context_->NumOutputs(); j++) { | for (int j = 0; j < context_->NumOutputs(); j++) { | ||||
| auto tensor_value = context_->GetOutput(j); | auto tensor_value = context_->GetOutput(j); | ||||
| GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr"); | GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr"); | ||||
| uint64_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData()); | |||||
| uintptr_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData()); | |||||
| output_addrs.emplace_back(output_addr); | output_addrs.emplace_back(output_addr); | ||||
| } | } | ||||
| dump_op_.SetDumpInfo(context_->GetDumpProperties(), op_desc, input_addrs, output_addrs, stream); | dump_op_.SetDumpInfo(context_->GetDumpProperties(), op_desc, input_addrs, output_addrs, stream); | ||||
| GE_CHECK_NOTNULL(graph_context_); | |||||
| const HybridModel *model = graph_context_->model; | |||||
| GE_CHECK_NOTNULL(model); | |||||
| std::string dynamic_model_name = model->GetModelName(); | |||||
| uint32_t model_id = model->GetModelId(); | |||||
| dump_op_.SetDynamicModelInfo(dynamic_model_name, model_id); | |||||
| void *loop_per_iter = nullptr; | void *loop_per_iter = nullptr; | ||||
| TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); | TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); | ||||
| if (varible_loop_per_iter != nullptr) { | if (varible_loop_per_iter != nullptr) { | ||||
| @@ -76,9 +76,8 @@ class HybridDavinciModel::Impl { | |||||
| executor_.SetDeviceId(device_id); | executor_.SetDeviceId(device_id); | ||||
| } | } | ||||
| void SetModelName(const string &model_name) { | |||||
| model_.SetModelName(model_name); | |||||
| executor_.SetModelName(model_name); | |||||
| void SetOmName(const string &model_name) { | |||||
| model_.SetOmName(model_name); | |||||
| } | } | ||||
| uint64_t GetSessionId() { | uint64_t GetSessionId() { | ||||
| @@ -181,9 +180,9 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) { | |||||
| } | } | ||||
| } | } | ||||
| void HybridDavinciModel::SetModelName(const string &model_name) { | |||||
| void HybridDavinciModel::SetOmName(const string &om_name) { | |||||
| if (impl_ != nullptr) { | if (impl_ != nullptr) { | ||||
| impl_->SetModelName(model_name); | |||||
| impl_->SetOmName(om_name); | |||||
| } | } | ||||
| } | } | ||||
| @@ -57,7 +57,7 @@ class HybridDavinciModel { | |||||
| void SetDeviceId(uint32_t device_id); | void SetDeviceId(uint32_t device_id); | ||||
| void SetModelName(const string &model_name); | |||||
| void SetOmName(const string &om_name); | |||||
| uint64_t GetSessionId(); | uint64_t GetSessionId(); | ||||
| @@ -61,7 +61,7 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) { | |||||
| void HybridDavinciModel::SetDeviceId(uint32_t device_id) { | void HybridDavinciModel::SetDeviceId(uint32_t device_id) { | ||||
| } | } | ||||
| void HybridDavinciModel::SetModelName(const string &model_name) { | |||||
| void HybridDavinciModel::SetOmName(const string &om_name) { | |||||
| } | } | ||||
| uint64_t HybridDavinciModel::GetSessionId() { | uint64_t HybridDavinciModel::GetSessionId() { | ||||
| @@ -357,5 +357,25 @@ TensorValue *HybridModel::GetTensor(const NodePtr &node) const { | |||||
| return GetVariable(node->GetName()); | return GetVariable(node->GetName()); | ||||
| } | } | ||||
| const map<int64_t, std::vector<std::pair<int, Tensor>>> &HybridModel::GetHostTensors() const { | |||||
| return host_tensors_; | |||||
| } | |||||
| void *HybridModel::GetGlobalStep() const { | |||||
| if (global_step_ == nullptr) { | |||||
| return nullptr; | |||||
| } | |||||
| return global_step_->GetData(); | |||||
| } | |||||
| TensorBuffer *HybridModel::GetModelWeight(const string &subgraph_name) const { | |||||
| auto it = weight_buffer_map_.find(subgraph_name); | |||||
| if (it == weight_buffer_map_.end()) { | |||||
| GELOGD("Model weight not found, subgraph name = %s", subgraph_name.c_str()); | |||||
| return nullptr; | |||||
| } | |||||
| return it->second.get(); | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -45,6 +45,8 @@ class HybridModel { | |||||
| return root_runtime_param_.session_id; | return root_runtime_param_.session_id; | ||||
| } | } | ||||
| void *GetGlobalStep() const; | |||||
| GeModelPtr GetGeModel(const NodePtr &node) const; | GeModelPtr GetGeModel(const NodePtr &node) const; | ||||
| NodeItem *MutableNodeItem(const NodePtr &node); | NodeItem *MutableNodeItem(const NodePtr &node); | ||||
| @@ -69,8 +71,8 @@ class HybridModel { | |||||
| model_id_ = model_id; | model_id_ = model_id; | ||||
| } | } | ||||
| void SetModelName(const string &model_name) { | |||||
| om_name_ = model_name; | |||||
| void SetOmName(const string &om_name) { | |||||
| om_name_ = om_name; | |||||
| } | } | ||||
| const std::string &GetOmName() const { | const std::string &GetOmName() const { | ||||
| @@ -91,6 +93,10 @@ class HybridModel { | |||||
| TensorValue* GetTensor(const NodePtr &node) const; | TensorValue* GetTensor(const NodePtr &node) const; | ||||
| TensorBuffer* GetModelWeight(const std::string &subgraph_name) const; | |||||
| const std::map<int64_t, std::vector<std::pair<int, Tensor>>> &GetHostTensors() const; | |||||
| const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const; | const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const; | ||||
| const GraphItem *GetRootGraphItem() const; | const GraphItem *GetRootGraphItem() const; | ||||
| @@ -146,6 +152,7 @@ class HybridModel { | |||||
| std::unique_ptr<GraphItem> root_graph_item_; | std::unique_ptr<GraphItem> root_graph_item_; | ||||
| std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_; | std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_; | ||||
| std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | ||||
| std::map<int64_t, std::vector<std::pair<int, Tensor>>> host_tensors_; | |||||
| bool is_new_model_desc_ = false; // support aipp | bool is_new_model_desc_ = false; // support aipp | ||||
| bool is_single_op_ = false; | bool is_single_op_ = false; | ||||
| @@ -154,10 +161,10 @@ class HybridModel { | |||||
| uint32_t device_id_ = 0; | uint32_t device_id_ = 0; | ||||
| uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
| uint8_t *var_mem_base_ = nullptr; | uint8_t *var_mem_base_ = nullptr; | ||||
| std::unique_ptr<TensorBuffer> weight_buffer_; | |||||
| std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_; | std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_; | ||||
| RuntimeParam root_runtime_param_; | RuntimeParam root_runtime_param_; | ||||
| string om_name_; | string om_name_; | ||||
| std::unique_ptr<TensorBuffer> global_step_; | |||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -145,6 +145,9 @@ Status HybridModelBuilder::Build() { | |||||
| GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName()); | GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName()); | GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); | GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(OptimizeDependenciesForConstantInputs(), | |||||
| "[%s] Failed to optimize dependencies for constant inputs", | |||||
| GetGraphName()); | |||||
| GELOGI("[%s] Done building hybrid model successfully.", GetGraphName()); | GELOGI("[%s] Done building hybrid model successfully.", GetGraphName()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -315,6 +318,18 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s | |||||
| } | } | ||||
| } | } | ||||
| for (const auto &src_node : ge_node->GetInControlNodes()) { | |||||
| auto src_node_item = MutableNodeItem(src_node); | |||||
| if ((src_node_item != nullptr) && (is_hccl_op || src_node_item->IsHcclOp())) { | |||||
| GELOGD("[%s](%s) Add input control dependent node [%s](%s)", | |||||
| ge_node->GetName().c_str(), | |||||
| ge_node->GetType().c_str(), | |||||
| src_node->GetName().c_str(), | |||||
| src_node->GetType().c_str()); | |||||
| dependent_for_execution.emplace(src_node); | |||||
| } | |||||
| } | |||||
| // cond or branch need to be prepared before the execution of IF or CASE | // cond or branch need to be prepared before the execution of IF or CASE | ||||
| if (node_item.node_type == IF || node_item.node_type == STATELESSIF || node_item.node_type == CASE) { | if (node_item.node_type == IF || node_item.node_type == STATELESSIF || node_item.node_type == CASE) { | ||||
| auto src_node = NodeUtils::GetInDataNodeByIndex(*ge_node, 0); // cond input | auto src_node = NodeUtils::GetInDataNodeByIndex(*ge_node, 0); // cond input | ||||
| @@ -346,6 +361,7 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s | |||||
| auto src_node_item = MutableNodeItem(src_node); | auto src_node_item = MutableNodeItem(src_node); | ||||
| src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); | src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); | ||||
| dependent_for_shape_inference.emplace(src_node); | dependent_for_shape_inference.emplace(src_node); | ||||
| host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item); | |||||
| GELOGD("[%s] Dependent added from output of [%s:%d]", | GELOGD("[%s] Dependent added from output of [%s:%d]", | ||||
| node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
| src_node_item->NodeName().c_str(), | src_node_item->NodeName().c_str(), | ||||
| @@ -1494,7 +1510,7 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { | |||||
| src_node->GetName().c_str(), | src_node->GetName().c_str(), | ||||
| src_op_type.c_str()); | src_op_type.c_str()); | ||||
| if (src_op_type != CONSTANTOP && src_op_type != VARIABLE) { | |||||
| if (src_op_type != CONSTANTOP && src_op_type != CONSTANT && src_op_type != VARIABLE) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -1503,6 +1519,9 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { | |||||
| GELOGD("Got parent output index = %u", parent_index); | GELOGD("Got parent output index = %u", parent_index); | ||||
| GE_CHECK_LE(parent_index, INT32_MAX); | GE_CHECK_LE(parent_index, INT32_MAX); | ||||
| node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node); | node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node); | ||||
| if (src_op_type == CONSTANTOP || src_op_type == CONSTANT) { | |||||
| known_subgraph_constant_output_refs_[&node_item].emplace(parent_index, src_node); | |||||
| } | |||||
| } | } | ||||
| // Data nodes marked with REF_VAR_SRC_VAR_NAME | // Data nodes marked with REF_VAR_SRC_VAR_NAME | ||||
| @@ -1568,6 +1587,10 @@ Status HybridModelBuilder::InitModelMem() { | |||||
| } | } | ||||
| runtime_param_.var_base = hybrid_model_.var_mem_base_; | runtime_param_.var_base = hybrid_model_.var_mem_base_; | ||||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | |||||
| GE_CHECK_NOTNULL(allocator); | |||||
| hybrid_model_.global_step_ = TensorBuffer::Create(allocator, sizeof(int64_t)); | |||||
| GE_CHECK_NOTNULL(hybrid_model_.global_step_); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -2044,8 +2067,9 @@ Status HybridModelBuilder::CollectParallelGroups(NodeItem *node_item) { | |||||
| const auto &node = node_item->node; | const auto &node = node_item->node; | ||||
| auto executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node); | auto executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node); | ||||
| if (executor_type == NodeExecutorManager::ExecutorType::HCCL) { | if (executor_type == NodeExecutorManager::ExecutorType::HCCL) { | ||||
| std::string parallel_group; | |||||
| if (AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group)) { | |||||
| int64_t parallel_group_val = -1; | |||||
| if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group_val)) { | |||||
| std::string parallel_group = std::to_string(parallel_group_val); | |||||
| GELOGD("[%s] Got parallel group = [%s]", node_item->NodeName().c_str(), parallel_group.c_str()); | GELOGD("[%s] Got parallel group = [%s]", node_item->NodeName().c_str(), parallel_group.c_str()); | ||||
| parallel_group_to_nodes_[parallel_group].emplace(node_item); | parallel_group_to_nodes_[parallel_group].emplace(node_item); | ||||
| std::set<std::string> group{parallel_group}; | std::set<std::string> group{parallel_group}; | ||||
| @@ -2061,8 +2085,9 @@ Status HybridModelBuilder::CollectParallelGroups(NodeItem *node_item) { | |||||
| auto subgraph = hybrid_model_.root_graph_->GetSubgraph(subgraph_name); | auto subgraph = hybrid_model_.root_graph_->GetSubgraph(subgraph_name); | ||||
| GE_CHECK_NOTNULL(subgraph); | GE_CHECK_NOTNULL(subgraph); | ||||
| for (const auto &sub_node : subgraph->GetAllNodes()) { | for (const auto &sub_node : subgraph->GetAllNodes()) { | ||||
| std::string parallel_group; | |||||
| if (AttrUtils::GetStr(sub_node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group)) { | |||||
| int64_t parallel_group_val = -1; | |||||
| if (AttrUtils::GetInt(sub_node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group_val)) { | |||||
| std::string parallel_group = std::to_string(parallel_group_val); | |||||
| GELOGD("[%s::%s] Got parallel group = %s", | GELOGD("[%s::%s] Got parallel group = %s", | ||||
| subgraph_name.c_str(), | subgraph_name.c_str(), | ||||
| sub_node->GetName().c_str(), | sub_node->GetName().c_str(), | ||||
| @@ -2127,5 +2152,88 @@ Status HybridModelBuilder::ParseDependentByParallelGroup() { | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelBuilder::OptimizeDependenciesForConstantInputs() { | |||||
| std::map<NodePtr, std::set<uint32_t>> converted; | |||||
| for (auto &it : host_input_value_dependencies_) { | |||||
| auto node_item = it.first; | |||||
| std::map<NodeItem *, int> ref_counts; | |||||
| bool changed = false; | |||||
| for (auto output_idx_and_node : it.second) { | |||||
| auto output_idx = output_idx_and_node.first; | |||||
| auto src_node_item = output_idx_and_node.second; | |||||
| ++ref_counts[src_node_item]; | |||||
| NodePtr constant_node; | |||||
| if (src_node_item->node_type == CONSTANT || src_node_item->node_type == CONSTANTOP) { | |||||
| constant_node = src_node_item->node; | |||||
| GELOGD("src node [%s] is a constant", src_node_item->NodeName().c_str()); | |||||
| } else { | |||||
| auto iter = known_subgraph_constant_output_refs_.find(src_node_item); | |||||
| if (iter != known_subgraph_constant_output_refs_.end()) { | |||||
| constant_node = iter->second[output_idx]; | |||||
| if (constant_node != nullptr) { | |||||
| GELOGD("Output[%u] of subgraph [%s] is a constant", output_idx, src_node_item->NodeName().c_str()); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (constant_node == nullptr) { | |||||
| GELOGD("Output[%u] of [%s] is not a constant", output_idx, src_node_item->NodeName().c_str()); | |||||
| continue; | |||||
| } | |||||
| if (converted[constant_node].count(output_idx) == 0) { | |||||
| GE_CHK_STATUS_RET(Convert2HostTensor(constant_node, src_node_item->node_id, output_idx), | |||||
| "[%s] Failed to convert constant to host tensor", constant_node->GetName().c_str()); | |||||
| converted[constant_node].emplace(output_idx); | |||||
| } | |||||
| src_node_item->to_const_output_id_list.erase(output_idx); | |||||
| --ref_counts[src_node_item]; | |||||
| changed = true; | |||||
| } | |||||
| if (changed) { | |||||
| std::vector<NodePtr> depends_to_keep; | |||||
| for (auto &ref_count_it : ref_counts) { | |||||
| if (ref_count_it.second == 0) { | |||||
| GELOGD("[%s] no longer depends on [%s] for shape inference", | |||||
| node_item->NodeName().c_str(), | |||||
| ref_count_it.first->NodeName().c_str()); | |||||
| } else { | |||||
| depends_to_keep.emplace_back(ref_count_it.first->node); | |||||
| } | |||||
| } | |||||
| node_item->dependents_for_shape_inference.swap(depends_to_keep); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelBuilder::Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx) { | |||||
| auto tensor_value = hybrid_model_.GetTensor(node); | |||||
| GE_CHECK_NOTNULL(tensor_value); | |||||
| auto tensor_desc = node->GetOpDesc()->MutableOutputDesc(0); | |||||
| GE_CHECK_NOTNULL(tensor_desc); | |||||
| Tensor tensor(TensorAdapter::GeTensorDesc2TensorDesc(*tensor_desc)); | |||||
| int64_t tensor_size = -1; | |||||
| GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorSizeInBytes(*tensor_desc, tensor_size), | |||||
| "[%s] Failed to get tensor size", node->GetName().c_str()); | |||||
| if (tensor_size > 0) { | |||||
| auto copy_size = static_cast<size_t>(tensor_size); | |||||
| GE_CHECK_GE(tensor_value->GetSize(), copy_size); | |||||
| std::vector<uint8_t> buffer(copy_size); | |||||
| GE_CHK_RT_RET(rtMemcpy(buffer.data(), | |||||
| copy_size, | |||||
| tensor_value->GetData(), | |||||
| copy_size, | |||||
| RT_MEMCPY_DEVICE_TO_HOST)); | |||||
| tensor.SetData(std::move(buffer)); | |||||
| GELOGD("[%s] Copy constant tensor to host successfully, size = %zu", node->GetName().c_str(), copy_size); | |||||
| } | |||||
| hybrid_model_.host_tensors_[node_id].emplace_back(output_idx, std::move(tensor)); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -91,6 +91,8 @@ class HybridModelBuilder { | |||||
| Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | ||||
| Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); | ||||
| Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list); | Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list); | ||||
| Status OptimizeDependenciesForConstantInputs(); | |||||
| Status Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx); | |||||
| const char* GetGraphName() const { | const char* GetGraphName() const { | ||||
| return hybrid_model_.model_name_.c_str(); | return hybrid_model_.model_name_.c_str(); | ||||
| @@ -110,6 +112,12 @@ class HybridModelBuilder { | |||||
| RuntimeParam &runtime_param_; | RuntimeParam &runtime_param_; | ||||
| VarManager *var_manager_ = nullptr; | VarManager *var_manager_ = nullptr; | ||||
| // map<known_node_item, map<output_idx, constant_node>> | |||||
| std::map<NodeItem *, std::map<uint32_t, NodePtr>> known_subgraph_constant_output_refs_; | |||||
| // map<dst_node_item, vector<output_idx, src_node_item>> | |||||
| std::map<NodeItem *, std::vector<std::pair<uint32_t, NodeItem *>>> host_input_value_dependencies_; | |||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -71,22 +71,22 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) | |||||
| } | } | ||||
| Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | ||||
| auto op_desc_ptr = std::make_shared<OpDesc>(op_desc); | |||||
| GE_CHECK_NOTNULL(op_desc_ptr); | |||||
| auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||||
| if (tbe_kernel == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | |||||
| rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | ||||
| if (rt_ret != RT_ERROR_NONE || is_single_op_) { | if (rt_ret != RT_ERROR_NONE || is_single_op_) { | ||||
| auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | |||||
| GE_CHECK_NOTNULL(op_desc_ptr); | |||||
| auto tbe_kernel = op_desc_ptr->TryGetExtAttr(GetKeyForTbeKernel(), TBEKernelPtr()); | |||||
| if (tbe_kernel == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | |||||
| void *bin_handle = nullptr; | void *bin_handle = nullptr; | ||||
| if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | ||||
| GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); | GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); | ||||
| rtDevBinary_t binary; | rtDevBinary_t binary; | ||||
| std::string json_string; | std::string json_string; | ||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), | |||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMagic(), json_string), | |||||
| GELOGI("Get original type of session_graph_id.")); | GELOGI("Get original type of session_graph_id.")); | ||||
| if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | ||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | ||||
| @@ -104,7 +104,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
| GELOGI("TBE: binary.length: %lu", binary.length); | GELOGI("TBE: binary.length: %lu", binary.length); | ||||
| GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); | GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); | ||||
| std::string meta_data; | std::string meta_data; | ||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data), | |||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMetaData(), meta_data), | |||||
| GELOGI("Get original type of json_string")); | GELOGI("Get original type of json_string")); | ||||
| GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | ||||
| GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | ||||
| @@ -114,7 +114,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
| kernel_store.ReferTBEHandle(stub_name_.c_str()); | kernel_store.ReferTBEHandle(stub_name_.c_str()); | ||||
| } | } | ||||
| std::string kernel_name; | std::string kernel_name; | ||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name), | |||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForKernelName(op_desc), kernel_name), | |||||
| GELOGI("Get original type of kernel_name")); | GELOGI("Get original type of kernel_name")); | ||||
| GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); | GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); | ||||
| GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); | GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); | ||||
| @@ -349,9 +349,6 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) | |||||
| GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), | GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), | ||||
| "Failed calc tiling data of node %s.", | "Failed calc tiling data of node %s.", | ||||
| node->GetName().c_str()); | node->GetName().c_str()); | ||||
| if (is_single_op_) { | |||||
| tiling_info.clear_atomic = false; | |||||
| } | |||||
| GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); | GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -468,6 +465,22 @@ std::string AiCoreOpTask::GetKeyForOpParamSize() const { | |||||
| return kAttrOpParamSize; | return kAttrOpParamSize; | ||||
| } | } | ||||
| std::string AiCoreOpTask::GetKeyForTbeKernel() const { | |||||
| return OP_EXTATTR_NAME_TBE_KERNEL; | |||||
| } | |||||
| std::string AiCoreOpTask::GetKeyForTvmMagic() const { | |||||
| return TVM_ATTR_NAME_MAGIC; | |||||
| } | |||||
| std::string AiCoreOpTask::GetKeyForTvmMetaData() const { | |||||
| return TVM_ATTR_NAME_METADATA; | |||||
| } | |||||
| std::string AiCoreOpTask::GetKeyForKernelName(const OpDesc &op_desc) const { | |||||
| return op_desc.GetName() + "_kernelname"; | |||||
| } | |||||
| Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
| GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def)); | GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def)); | ||||
| return InitAtomicAddrCleanIndices(op_desc); | return InitAtomicAddrCleanIndices(op_desc); | ||||
| @@ -524,6 +537,22 @@ std::string AtomicAddrCleanOpTask::GetKeyForOpParamSize() const { | |||||
| return kAttrAtomicOpParamSize; | return kAttrAtomicOpParamSize; | ||||
| } | } | ||||
| std::string AtomicAddrCleanOpTask::GetKeyForTbeKernel() const { | |||||
| return EXT_ATTR_ATOMIC_TBE_KERNEL; | |||||
| } | |||||
| std::string AtomicAddrCleanOpTask::GetKeyForTvmMagic() const { | |||||
| return ATOMIC_ATTR_TVM_MAGIC; | |||||
| } | |||||
| std::string AtomicAddrCleanOpTask::GetKeyForTvmMetaData() const { | |||||
| return ATOMIC_ATTR_TVM_METADATA; | |||||
| } | |||||
| std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) const { | |||||
| return op_desc.GetName() + "_atomic_kernelname"; | |||||
| } | |||||
| Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | ||||
| GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | ||||
| GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), | GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), | ||||
| @@ -81,6 +81,10 @@ class AiCoreOpTask { | |||||
| protected: | protected: | ||||
| Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
| virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
| virtual std::string GetKeyForTbeKernel() const; | |||||
| virtual std::string GetKeyForTvmMagic() const; | |||||
| virtual std::string GetKeyForTvmMetaData() const; | |||||
| virtual std::string GetKeyForKernelName(const OpDesc &op_desc) const; | |||||
| virtual Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info); | virtual Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info); | ||||
| std::unique_ptr<TensorBuffer> tiling_buffer_ = nullptr; | std::unique_ptr<TensorBuffer> tiling_buffer_ = nullptr; | ||||
| @@ -119,6 +123,10 @@ class AtomicAddrCleanOpTask : public AiCoreOpTask { | |||||
| protected: | protected: | ||||
| std::string GetKeyForOpParamSize() const override; | std::string GetKeyForOpParamSize() const override; | ||||
| std::string GetKeyForTbeKernel() const override; | |||||
| std::string GetKeyForTvmMagic() const override; | |||||
| std::string GetKeyForTvmMetaData() const override; | |||||
| std::string GetKeyForKernelName(const OpDesc &op_desc) const override; | |||||
| Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override; | Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override; | ||||
| private: | private: | ||||
| @@ -70,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<AiCoreNodeTask> &node_task, | |||||
| auto atomic_task = | auto atomic_task = | ||||
| std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | ||||
| GE_CHECK_NOTNULL(atomic_task); | GE_CHECK_NOTNULL(atomic_task); | ||||
| atomic_task->SetSingleOp(is_single_op); | |||||
| GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | ||||
| "[%s] Failed to init task for AtomicAddrClean", | "[%s] Failed to init task for AtomicAddrClean", | ||||
| op_desc_->GetName().c_str()); | op_desc_->GetName().c_str()); | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/fmk_error_codes.h" | #include "framework/common/fmk_error_codes.h" | ||||
| #include "common/dump/dump_manager.h" | |||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "graph/attr_value.h" | #include "graph/attr_value.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| @@ -110,15 +111,6 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
| GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", | GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", | ||||
| davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); | davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); | ||||
| } | } | ||||
| if (!load_flag_) { | |||||
| auto dump_properties = context.GetDumpProperties(); | |||||
| if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||||
| davinci_model_->SetDumpProperties(dump_properties); | |||||
| void *global_step = context.GetExecutionContext()->global_step; | |||||
| davinci_model_->SetKnownShapeGlobalStep(global_step); | |||||
| } | |||||
| load_flag_ = true; | |||||
| } | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), | GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), | ||||
| davinci_model_->Id(), davinci_model_->SubModelId()), | davinci_model_->Id(), davinci_model_->SubModelId()), | ||||
| "KnownNodeTask::Init destroy aicpu kernel failed."); | "KnownNodeTask::Init destroy aicpu kernel failed."); | ||||
| @@ -126,20 +118,35 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status KnownNodeTask::InitDavinciModel() { | |||||
| GELOGD("[Init][Model] start"); | |||||
| Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer) { | |||||
| GELOGD("[Init][DavinciModel] start"); | |||||
| davinci_model_->InitRuntimeParams(); | davinci_model_->InitRuntimeParams(); | ||||
| GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed"); | GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed"); | ||||
| int32_t device_id = 0; | int32_t device_id = 0; | ||||
| GE_CHK_RT_RET(rtGetDevice(&device_id)); | GE_CHK_RT_RET(rtGetDevice(&device_id)); | ||||
| davinci_model_->SetDeviceId(static_cast<uint32_t>(device_id)); | davinci_model_->SetDeviceId(static_cast<uint32_t>(device_id)); | ||||
| GE_CHK_STATUS_RET(DoInitDavinciModel(), "[Init][Model] Failed to init davinci model."); | |||||
| auto dump_properties = DumpManager::GetInstance().GetDumpProperties(model.GetSessionId()); | |||||
| if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||||
| davinci_model_->SetDumpProperties(dump_properties); | |||||
| void *global_step = model.GetGlobalStep(); | |||||
| davinci_model_->SetKnownShapeGlobalStep(global_step); | |||||
| } | |||||
| void *weight = nullptr; | |||||
| size_t weight_size = 0; | |||||
| if (weight_buffer != nullptr) { | |||||
| weight = weight_buffer->GetData(); | |||||
| weight_size = weight_buffer->GetSize(); | |||||
| } | |||||
| GELOGD("Start to init davinci model, weight size = %zu", weight_size); | |||||
| GE_CHK_STATUS_RET(DoInitDavinciModel(weight, weight_size), "[Init][Model] Failed to init davinci model."); | |||||
| GELOGD("[Init][Model] success"); | GELOGD("[Init][Model] success"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status KnownNodeTask::DoInitDavinciModel() { | |||||
| return davinci_model_->Init(); | |||||
| Status KnownNodeTask::DoInitDavinciModel(void *weight, size_t weight_size) { | |||||
| return davinci_model_->Init(nullptr, 0, weight, weight_size); | |||||
| } | } | ||||
| Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { | Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { | ||||
| @@ -165,12 +172,17 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node | |||||
| const GeModelPtr ge_model = model.GetGeModel(node); | const GeModelPtr ge_model = model.GetGeModel(node); | ||||
| GE_CHECK_NOTNULL(ge_model); | GE_CHECK_NOTNULL(ge_model); | ||||
| AscendString graph_name; | |||||
| GE_CHK_GRAPH_STATUS_RET(ge_model->GetGraph().GetName(graph_name), "Failed to get graph name"); | |||||
| auto weight_buffer = model.GetModelWeight(graph_name.GetString()); | |||||
| std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr); | std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr); | ||||
| GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
| // set known node flag as true | // set known node flag as true | ||||
| davinci_model->SetKnownNode(true); | davinci_model->SetKnownNode(true); | ||||
| davinci_model->SetId(model.GetModelId()); | davinci_model->SetId(model.GetModelId()); | ||||
| davinci_model->SetDumpModelName(model.GetModelName()); | |||||
| davinci_model->SetOmName(model.GetOmName()); | davinci_model->SetOmName(model.GetOmName()); | ||||
| // set model id as root node's node id | // set model id as root node's node id | ||||
| davinci_model->SetSubModelId(node->GetOpDesc()->GetId()); | davinci_model->SetSubModelId(node->GetOpDesc()->GetId()); | ||||
| @@ -180,7 +192,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node | |||||
| auto known_node_task = MakeShared<KnownNodeTask>(davinci_model); | auto known_node_task = MakeShared<KnownNodeTask>(davinci_model); | ||||
| GE_CHECK_NOTNULL(known_node_task); | GE_CHECK_NOTNULL(known_node_task); | ||||
| GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel()); | |||||
| GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model, weight_buffer)); | |||||
| GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); | GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); | ||||
| task = std::move(known_node_task); | task = std::move(known_node_task); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -36,13 +36,12 @@ class KnownNodeTask : public NodeTask { | |||||
| Status UpdateArgs(TaskContext &context) override; | Status UpdateArgs(TaskContext &context) override; | ||||
| Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | ||||
| Status Init(TaskContext &context) override; | Status Init(TaskContext &context) override; | ||||
| Status InitDavinciModel(); | |||||
| Status InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer); | |||||
| protected: | protected: | ||||
| virtual Status DoInitDavinciModel(); | |||||
| virtual Status DoInitDavinciModel(void *weight, size_t weight_size); | |||||
| private: | private: | ||||
| std::shared_ptr<DavinciModel> davinci_model_ = nullptr; | std::shared_ptr<DavinciModel> davinci_model_ = nullptr; | ||||
| bool load_flag_ = false; | |||||
| }; | }; | ||||
| class KnownNodeExecutor : public NodeExecutor { | class KnownNodeExecutor : public NodeExecutor { | ||||
| @@ -127,7 +127,7 @@ void SingleOpModel::ParseOpModelParams(ModelHelper &model_helper, SingleOpModelP | |||||
| ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_CORE_TYPE, value); | ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_CORE_TYPE, value); | ||||
| param.core_type = ret ? value : 0; | param.core_type = ret ? value : 0; | ||||
| GELOGI("ParseOpModelParams(), total_memory_size:%lu, zero_copy_size:%lu, weight_size:%lu. core_type = %lu", | |||||
| GELOGI("ParseOpModelParams(), total_memory_size:%lu, zero_copy_size:%lu, weight_size:%lu, core_type = %lu", | |||||
| param.memory_size, param.zero_copy_mem_size, param.weight_size, param.core_type); | param.memory_size, param.zero_copy_mem_size, param.weight_size, param.core_type); | ||||
| } | } | ||||
| @@ -454,7 +454,7 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | ||||
| if (kernel_type == ccKernelType::TE) { | if (kernel_type == ccKernelType::TE) { | ||||
| GELOGD("Building TBE task"); | |||||
| GELOGD("Building TBE task."); | |||||
| TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | ||||
| tbe_task->SetModelArgs(model_name_, model_id_); | tbe_task->SetModelArgs(model_name_, model_id_); | ||||
| @@ -482,7 +482,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
| auto tasks = ge_model->GetModelTaskDefPtr()->task(); | auto tasks = ge_model->GetModelTaskDefPtr()->task(); | ||||
| for (int i = 0; i < tasks.size(); ++i) { | for (int i = 0; i < tasks.size(); ++i) { | ||||
| const TaskDef &task_def = tasks[i]; | const TaskDef &task_def = tasks[i]; | ||||
| GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(), | |||||
| GELOGI("[%s] Task[%d], type = [%u], DebugString = [%s]", model_name_.c_str(), i, task_def.type(), | |||||
| task_def.DebugString().c_str()); | task_def.DebugString().c_str()); | ||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
| if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | ||||
| @@ -121,7 +121,7 @@ Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id | |||||
| } | } | ||||
| GE_CHECK_NOTNULL(op_desc_); | GE_CHECK_NOTNULL(op_desc_); | ||||
| string op_name = op_desc_->GetName(); | string op_name = op_desc_->GetName(); | ||||
| GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); | |||||
| GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u].", op_name.c_str(), task_id, stream_id); | |||||
| model_id = model_id_; | model_id = model_id_; | ||||
| task_desc_info.model_name = model_name_; | task_desc_info.model_name = model_name_; | ||||
| task_desc_info.block_dim = block_dim_; | task_desc_info.block_dim = block_dim_; | ||||
| @@ -459,10 +459,14 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
| continue; | continue; | ||||
| } | } | ||||
| GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), ACL_ERROR_GE_PARAM_INVALID, | GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), ACL_ERROR_GE_PARAM_INVALID, | ||||
| "Input_desc size is %zu, but get non_const_index is %zu", | |||||
| input_desc.size(), non_const_index); | |||||
| "Input_desc size is %zu, but get non_const_index is %zu", input_desc.size(), | |||||
| non_const_index); | |||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]), | GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]), | ||||
| "Input[%zu] update input shape failed.", input_index); | "Input[%zu] update input shape failed.", input_index); | ||||
| if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { | |||||
| GE_CHK_STATUS_RET(op_desc_->UpdateInputDesc(input_index, input_desc[non_const_index]), | |||||
| "AicpuTask Update [%zu]th input desc failed", input_index); | |||||
| } | |||||
| non_const_index++; | non_const_index++; | ||||
| } | } | ||||
| @@ -470,6 +474,10 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
| for (size_t j = 0; j < num_outputs_; ++j) { | for (size_t j = 0; j < num_outputs_; ++j) { | ||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), | GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), | ||||
| "Output[%zu] UpdateOutputShapeAndType failed.", j); | "Output[%zu] UpdateOutputShapeAndType failed.", j); | ||||
| if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { | |||||
| GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(j, output_desc[j]), "AicpuTask Update [%zu]th output desc failed", | |||||
| j); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -98,6 +98,7 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||||
| Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | ||||
| const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | ||||
| bool is_offline = true); | bool is_offline = true); | ||||
| void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | |||||
| Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | ||||
| using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>; | using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>; | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit 8cf3c51d53a9f4ebd6d601a2383f62788e3b8176 | |||||
| Subproject commit 7aa912ab473b780c3d2f9c907760e4cb32dc0fb6 | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit d851e1d467768b6cefd8f5f44745be1c5312121a | |||||
| Subproject commit d4587c1c33d2d50ef157bbc0449484a196e91429 | |||||
| @@ -166,6 +166,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" | "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" | ||||
| "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" | "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" | ||||
| "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" | |||||
| "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | ||||
| "${GE_CODE_DIR}/ge/model/ge_root_model.cc" | "${GE_CODE_DIR}/ge/model/ge_root_model.cc" | ||||
| "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | ||||
| @@ -742,6 +743,7 @@ set(MULTI_PARTS_TEST_FILES | |||||
| "graph/transop_util_unittest.cc" | "graph/transop_util_unittest.cc" | ||||
| "common/datatype_transfer_unittest.cc" | "common/datatype_transfer_unittest.cc" | ||||
| "common/dump_manager_unittest.cc" | "common/dump_manager_unittest.cc" | ||||
| "common/dump_op_unittest.cc" | |||||
| "common/opdebug_register_unittest.cc" | "common/opdebug_register_unittest.cc" | ||||
| "common/format_transfer_unittest.cc" | "common/format_transfer_unittest.cc" | ||||
| "common/format_transfer_transpose_unittest.cc" | "common/format_transfer_transpose_unittest.cc" | ||||
| @@ -0,0 +1,61 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #define protected public | |||||
| #define private public | |||||
| #include "common/dump/dump_op.h" | |||||
| #include "common/debug/log.h" | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "common/dump/dump_properties.h" | |||||
| #undef private | |||||
| #undef protected | |||||
| namespace ge { | |||||
| class UTEST_dump_op : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UTEST_dump_op, launch_dump_op_success) { | |||||
| DumpOp dump_op; | |||||
| DumpProperties dump_properties; | |||||
| OpDescPtr op_desc = std::make_shared<OpDesc>("GatherV2", "GatherV2"); | |||||
| std::set<std::string> temp; | |||||
| dump_properties.model_dump_properties_map_.emplace("model1", temp); | |||||
| dump_properties.enable_dump_ = "1"; | |||||
| dump_op.SetDynamicModelInfo("model1", "model2", 1); | |||||
| dump_op.SetDumpInfo(dump_properties, op_desc, {}, {}, nullptr); | |||||
| auto ret = dump_op.LaunchDumpOp(); | |||||
| EXPECT_EQ(ret, ge::SUCCESS); | |||||
| } | |||||
| TEST_F(UTEST_dump_op, launch_dump_op_success_2) { | |||||
| DumpOp dump_op; | |||||
| DumpProperties dump_properties; | |||||
| OpDescPtr op_desc = std::make_shared<OpDesc>("GatherV2", "GatherV2"); | |||||
| std::set<std::string> temp; | |||||
| dump_properties.model_dump_properties_map_.emplace("model1", temp); | |||||
| dump_properties.enable_dump_ = "1"; | |||||
| dump_op.SetDynamicModelInfo("modle2", "model2", 1); | |||||
| dump_op.SetDumpInfo(dump_properties, op_desc, {}, {}, nullptr); | |||||
| auto ret = dump_op.LaunchDumpOp(); | |||||
| EXPECT_EQ(ret, ge::SUCCESS); | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -48,18 +48,49 @@ public: | |||||
| return node; | return node; | ||||
| } | } | ||||
| int CountOfAtomicCleanNode() { | |||||
| int node_num = 0; | |||||
| for (NodePtr &node : graph_->GetDirectNode()) { | |||||
| if (node->GetType() == ATOMICADDRCLEAN) { | |||||
| ++node_num; | |||||
| } | |||||
| } | |||||
| return node_num; | |||||
| } | |||||
| ComputeGraphPtr graph_; | ComputeGraphPtr graph_; | ||||
| }; | }; | ||||
| // node1 -> node2 -> node3 | |||||
| /* | |||||
| * Data Data Atomic_clean | |||||
| * | | / | | |||||
| * relu relu | | |||||
| * | ==> | | | |||||
| * relu(atomic) relu(atomic) | |||||
| * | | | |||||
| * netoutput netoutput | |||||
| */ | |||||
| TEST_F(UtestGraphPassesAtomicAddrCleanPass, pass_run_success) { | TEST_F(UtestGraphPassesAtomicAddrCleanPass, pass_run_success) { | ||||
| auto node1 = NewNode("node1", DATA, 0, 1); | auto node1 = NewNode("node1", DATA, 0, 1); | ||||
| auto node2 = NewNode("node2", RELU, 1, 1); | auto node2 = NewNode("node2", RELU, 1, 1); | ||||
| auto node3 = NewNode("node3", NETOUTPUT, 1, 0); | |||||
| auto node3 = NewNode("node3", RELU, 1, 1); | |||||
| auto op_desc = node3->GetOpDesc(); | |||||
| vector<int64_t> atomic_input_index = {123, 456}; | |||||
| AttrUtils::SetListInt(op_desc, "atomic_input_index", atomic_input_index); | |||||
| auto node4 = NewNode("node4", NETOUTPUT, 1, 0); | |||||
| GraphUtils::AddEdge(node1->GetOutDataAnchor(0), node2->GetInDataAnchor(0)); | GraphUtils::AddEdge(node1->GetOutDataAnchor(0), node2->GetInDataAnchor(0)); | ||||
| GraphUtils::AddEdge(node2->GetOutDataAnchor(0), node3->GetInDataAnchor(0)); | GraphUtils::AddEdge(node2->GetOutDataAnchor(0), node3->GetInDataAnchor(0)); | ||||
| GraphUtils::AddEdge(node3->GetOutDataAnchor(0), node4->GetInDataAnchor(0)); | |||||
| AtomicAddrCleanPass atomi_addr_clean_pass; | AtomicAddrCleanPass atomi_addr_clean_pass; | ||||
| Status ret = atomi_addr_clean_pass.Run(graph_); | Status ret = atomi_addr_clean_pass.Run(graph_); | ||||
| EXPECT_EQ(ret, SUCCESS); | EXPECT_EQ(ret, SUCCESS); | ||||
| EXPECT_EQ(1, CountOfAtomicCleanNode()); | |||||
| auto atomic_clean = graph_->FindNode("atomic_addr_clean"); | |||||
| EXPECT_NE(atomic_clean, nullptr); | |||||
| auto out_ctrl_nodes = atomic_clean->GetOutControlNodes(); | |||||
| EXPECT_EQ(out_ctrl_nodes.size(), 2); | |||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||