| @@ -195,6 +195,7 @@ set(TRAIN_SRC_LIST | |||||
| "graph/passes/atomic_addr_clean_pass.cc" | "graph/passes/atomic_addr_clean_pass.cc" | ||||
| "graph/passes/mark_same_addr_pass.cc" | "graph/passes/mark_same_addr_pass.cc" | ||||
| "graph/passes/mark_graph_unknown_status_pass.cc" | "graph/passes/mark_graph_unknown_status_pass.cc" | ||||
| "graph/passes/mark_node_unknown_shape_pass.cc" | |||||
| "graph/passes/mark_agnostic_pass.cc" | "graph/passes/mark_agnostic_pass.cc" | ||||
| "graph/partition/dynamic_shape_partition.cc" | "graph/partition/dynamic_shape_partition.cc" | ||||
| "graph/partition/stage_partition.cc" | "graph/partition/stage_partition.cc" | ||||
| @@ -509,6 +510,7 @@ set(INFER_SRC_LIST | |||||
| "graph/passes/atomic_addr_clean_pass.cc" | "graph/passes/atomic_addr_clean_pass.cc" | ||||
| "graph/passes/mark_same_addr_pass.cc" | "graph/passes/mark_same_addr_pass.cc" | ||||
| "graph/passes/mark_graph_unknown_status_pass.cc" | "graph/passes/mark_graph_unknown_status_pass.cc" | ||||
| "graph/passes/mark_node_unknown_shape_pass.cc" | |||||
| "graph/passes/mark_agnostic_pass.cc" | "graph/passes/mark_agnostic_pass.cc" | ||||
| "graph/common/omg_util.cc" | "graph/common/omg_util.cc" | ||||
| "graph/common/bcast.cc" | "graph/common/bcast.cc" | ||||
| @@ -114,6 +114,7 @@ OMG_HOST_SRC_FILES := \ | |||||
| graph/passes/atomic_addr_clean_pass.cc \ | graph/passes/atomic_addr_clean_pass.cc \ | ||||
| graph/passes/mark_same_addr_pass.cc \ | graph/passes/mark_same_addr_pass.cc \ | ||||
| graph/passes/mark_graph_unknown_status_pass.cc \ | graph/passes/mark_graph_unknown_status_pass.cc \ | ||||
| graph/passes/mark_node_unknown_shape_pass.cc \ | |||||
| graph/passes/mark_agnostic_pass.cc \ | graph/passes/mark_agnostic_pass.cc \ | ||||
| graph/common/omg_util.cc \ | graph/common/omg_util.cc \ | ||||
| graph/common/bcast.cc \ | graph/common/bcast.cc \ | ||||
| @@ -114,6 +114,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| graph/passes/atomic_addr_clean_pass.cc \ | graph/passes/atomic_addr_clean_pass.cc \ | ||||
| graph/passes/mark_same_addr_pass.cc \ | graph/passes/mark_same_addr_pass.cc \ | ||||
| graph/passes/mark_graph_unknown_status_pass.cc \ | graph/passes/mark_graph_unknown_status_pass.cc \ | ||||
| graph/passes/mark_node_unknown_shape_pass.cc \ | |||||
| graph/passes/mark_agnostic_pass.cc \ | graph/passes/mark_agnostic_pass.cc \ | ||||
| graph/partition/dynamic_shape_partition.cc \ | graph/partition/dynamic_shape_partition.cc \ | ||||
| graph/partition/stage_partition.cc \ | graph/partition/stage_partition.cc \ | ||||
| @@ -53,6 +53,7 @@ constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | |||||
| const int64_t kDynamicDimValue = -2; | const int64_t kDynamicDimValue = -2; | ||||
| const int kDefaultDeviceId = 0; | const int kDefaultDeviceId = 0; | ||||
| const int kDefaultJobId = 0; | const int kDefaultJobId = 0; | ||||
| const int32_t kFuzzBuildPattern = 1; | |||||
| std::map<ge::OpEngineType, std::string> engine_type_map{ | std::map<ge::OpEngineType, std::string> engine_type_map{ | ||||
| {ge::ENGINE_SYS, kEngineNameDefault}, | {ge::ENGINE_SYS, kEngineNameDefault}, | ||||
| @@ -296,13 +297,44 @@ static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTenso | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| static Status GetFuzzBuildAttrs(const OpDescPtr &op_desc, const GeRootModelPtr &ge_root_model, | |||||
| GeAttrValue::LIST_NAMED_ATTRS &fuzz_build_attrs) { | |||||
| GELOGD("Start get fuzz build attrs of %s.", op_desc->GetName().c_str()); | |||||
| GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||||
| for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| GELOGD("Delete fuzz build attr of %s after build.", node->GetName().c_str()); | |||||
| node->GetOpDesc()->DelAttr(ATTR_NAME_FUZZ_BUILD); | |||||
| } | |||||
| (void)AttrUtils::GetListNamedAttrs(op_desc, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs); | |||||
| if (!fuzz_build_attrs.empty()) { | |||||
| GELOGD("%s has split, get ATTR_NAME_FUZZ_BUILD_RES_ATTRS directly.", op_desc->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } else { | |||||
| GELOGW("%s build with fuzz build pattern, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", op_desc->GetName().c_str()); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| static bool HasShapeRange(const vector<GeTensor> &inputs) { | |||||
| for (const auto &input : inputs) { | |||||
| vector<pair<int64_t, int64_t>> shape_range; | |||||
| (void)input.GetTensorDesc().GetShapeRange(shape_range); | |||||
| if (!shape_range.empty()) { | |||||
| GELOGD("Has set shape range."); | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| class GeGenerator::Impl { | class GeGenerator::Impl { | ||||
| public: | public: | ||||
| Impl(OmgContext &omg_context) : omg_context_(omg_context) {} | Impl(OmgContext &omg_context) : omg_context_(omg_context) {} | ||||
| ~Impl() = default; | ~Impl() = default; | ||||
| Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models); | Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models); | ||||
| Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); | Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); | ||||
| Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff); | Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff); | ||||
| @@ -742,7 +774,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> | |||||
| Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | ||||
| const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | ||||
| bool is_offline) { | |||||
| bool is_offline, int32_t compile_flag) { | |||||
| GELOGD("Inputs size is %zu, outputs size is %zu.", inputs.size(), outputs.size()); | |||||
| GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | ||||
| impl_->is_offline_ = is_offline; | impl_->is_offline_ = is_offline; | ||||
| if (!is_offline) { | if (!is_offline) { | ||||
| @@ -764,6 +797,16 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); | OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); | ||||
| GE_CHECK_NOTNULL(op_desc_tmp); | GE_CHECK_NOTNULL(op_desc_tmp); | ||||
| bool fuzz_compile_flag = false; | |||||
| if (!HasShapeRange(inputs) && compile_flag == kFuzzBuildPattern) { | |||||
| fuzz_compile_flag = true; | |||||
| } | |||||
| if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) { | |||||
| GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| impl_->omg_context_.fuzz_compile_flag = fuzz_compile_flag; | |||||
| // 1. Create ComputeGraph. | // 1. Create ComputeGraph. | ||||
| string name = ge::CurrentTimeInStr() + "_" + model_file_name; | string name = ge::CurrentTimeInStr() + "_" + model_file_name; | ||||
| Graph graph; | Graph graph; | ||||
| @@ -810,6 +853,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic)); | GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic)); | ||||
| GE_CHK_STATUS_RET_NOLOG( | GE_CHK_STATUS_RET_NOLOG( | ||||
| impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic)); | impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic)); | ||||
| } else if (fuzz_compile_flag) { | |||||
| GELOGD("Get fuzz build result of %s.", op_desc->GetName().c_str()); | |||||
| (void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag); | |||||
| GeAttrValue::LIST_NAMED_ATTRS fuzz_build_attrs; | |||||
| if (GetFuzzBuildAttrs(op_desc, ge_root_model, fuzz_build_attrs) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Get][FuzzRet]Failed to get fuzz build result of %s.", op_desc->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| if (!fuzz_build_attrs.empty()) { | |||||
| GE_CHK_BOOL_EXEC(AttrUtils::SetListNamedAttrs(ge_model, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs), | |||||
| return FAILED, "Set ATTR_NAME_FUZZ_BUILD_RES_ATTRS failed."); | |||||
| } | |||||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | |||||
| } else { | } else { | ||||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | ||||
| } | } | ||||
| @@ -825,15 +881,17 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| * @param [in] vector<GeTensor> &inputs: Operator input data description information. | * @param [in] vector<GeTensor> &inputs: Operator input data description information. | ||||
| * @param [in] vector<GeTensor> &outputs: Operator output data description information. | * @param [in] vector<GeTensor> &outputs: Operator output data description information. | ||||
| * @param [in] const string &model_file_name: Offline model filename. | * @param [in] const string &model_file_name: Offline model filename. | ||||
| * @param [in] compile_flag: op build flag from atc | |||||
| * @return SUCCESS handle successfully / others handle failed | * @return SUCCESS handle successfully / others handle failed | ||||
| */ | */ | ||||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
| const vector<GeTensor> &outputs, const string &model_file_name) { | |||||
| const vector<GeTensor> &outputs, const string &model_file_name, | |||||
| int32_t compile_flag) { | |||||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | ||||
| GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | ||||
| ModelBufferData model_buff; | ModelBufferData model_buff; | ||||
| OpEngineType engine_type = ENGINE_SYS; | OpEngineType engine_type = ENGINE_SYS; | ||||
| Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); | |||||
| Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true, compile_flag); | |||||
| GELOGI("Finish build single offline model, status: %u", status); | GELOGI("Finish build single offline model, status: %u", status); | ||||
| return status; | return status; | ||||
| } | } | ||||
| @@ -850,7 +908,6 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor | |||||
| * @return SUCCESS handle successfully / others handle failed | * @return SUCCESS handle successfully / others handle failed | ||||
| */ | */ | ||||
| // old process will be deleted | |||||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
| const vector<GeTensor> &outputs, OpEngineType engine_type, | const vector<GeTensor> &outputs, OpEngineType engine_type, | ||||
| ModelBufferData &model_buff) { | ModelBufferData &model_buff) { | ||||
| @@ -864,7 +921,12 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor | |||||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
| const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag, | const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag, | ||||
| ModelBufferData &model_buff) { | ModelBufferData &model_buff) { | ||||
| return SUCCESS; | |||||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||||
| GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | |||||
| Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false, | |||||
| compile_flag); | |||||
| GELOGI("Finish build single online model, status: %u", status); | |||||
| return status; | |||||
| } | } | ||||
| Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
| @@ -61,6 +61,7 @@ | |||||
| #include "graph/passes/iterator_op_pass.h" | #include "graph/passes/iterator_op_pass.h" | ||||
| #include "graph/passes/link_gen_mask_nodes_pass.h" | #include "graph/passes/link_gen_mask_nodes_pass.h" | ||||
| #include "graph/passes/mark_graph_unknown_status_pass.h" | #include "graph/passes/mark_graph_unknown_status_pass.h" | ||||
| #include "graph/passes/mark_node_unknown_shape_pass.h" | |||||
| #include "graph/passes/merge_pass.h" | #include "graph/passes/merge_pass.h" | ||||
| #include "graph/passes/merge_input_memcpy_pass.h" | #include "graph/passes/merge_input_memcpy_pass.h" | ||||
| #include "graph/passes/merge_to_stream_merge_pass.h" | #include "graph/passes/merge_to_stream_merge_pass.h" | ||||
| @@ -864,6 +865,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| } | } | ||||
| ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kPrepareOptimize); | ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kPrepareOptimize); | ||||
| // set fuzz compile flag after origin graph optimize | |||||
| GE_CHK_STATUS_RET(SetFuzzCompileFlag(compute_graph), "Set fuzz compile flag failed."); | |||||
| ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id); | ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | ||||
| @@ -878,7 +881,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| options_.build_step == BUILD_STEP_AFTER_BUILDER || | options_.build_step == BUILD_STEP_AFTER_BUILDER || | ||||
| options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)); | options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)); | ||||
| if (run_after_optimize_subgraph) { | if (run_after_optimize_subgraph) { | ||||
| Status ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id); | |||||
| ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Run PreRunAfterOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | GELOGE(ret, "Run PreRunAfterOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | ||||
| return ret; | return ret; | ||||
| @@ -896,6 +899,22 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphManager::SetFuzzCompileFlag(ComputeGraphPtr &compute_graph) { | |||||
| if (!GetLocalOmgContext().fuzz_compile_flag) { | |||||
| return SUCCESS; | |||||
| } | |||||
| for (const auto &node : compute_graph->GetAllNodes()) { | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| GELOGD("Fuzz compile flag is %d.", GetLocalOmgContext().fuzz_compile_flag); | |||||
| if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, GetLocalOmgContext().fuzz_compile_flag)) { | |||||
| GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD]Failed to set fuzz build attr to %s.", op_desc->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) { | Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) { | ||||
| PassManager pass_manager; | PassManager pass_manager; | ||||
| GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass)); | GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass)); | ||||
| @@ -2487,6 +2506,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
| new (std::nothrow) VariableRefDeleteOpPass)) | new (std::nothrow) VariableRefDeleteOpPass)) | ||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass", | GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass", | ||||
| new (std::nothrow) CompileNodesPass)) | new (std::nothrow) CompileNodesPass)) | ||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( | |||||
| "OptimizeStage2::AfterMergePasses::MarkNodeUnknownShapePass", new(std::nothrow) MarkNodeUnknownShapePass)) | |||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( | GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( | ||||
| "OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new(std::nothrow) MarkGraphUnknownStatusPass)) | "OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new(std::nothrow) MarkGraphUnknownStatusPass)) | ||||
| GE_CHK_STATUS_RET( | GE_CHK_STATUS_RET( | ||||
| @@ -358,6 +358,7 @@ class GraphManager { | |||||
| ComputeGraphPtr &compute_graph, | ComputeGraphPtr &compute_graph, | ||||
| GeRootModelPtr &ge_root_model, | GeRootModelPtr &ge_root_model, | ||||
| uint64_t session_id); | uint64_t session_id); | ||||
| Status SetFuzzCompileFlag(ComputeGraphPtr &compute_graph); | |||||
| Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, | Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, | ||||
| Graph2SubGraphInfoList &sub_graph_map, | Graph2SubGraphInfoList &sub_graph_map, | ||||
| @@ -0,0 +1,99 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph/passes/mark_node_unknown_shape_pass.h" | |||||
| #include "graph/utils/node_utils.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/common/local_context.h" | |||||
| namespace ge { | |||||
| namespace { | |||||
| const char *const kEngineNameAiCore = "AIcoreEngine"; | |||||
| const char *const kNeedRefreshShape = "_need_generate"; | |||||
| const char *const kOriginalNode = "_original_node"; | |||||
| const int32_t kDynamicState = -2; | |||||
| } | |||||
| Status MarkNodeUnknownShapePass::Run(ComputeGraphPtr graph) { | |||||
| GE_CHECK_NOTNULL(graph); | |||||
| if (!GetLocalOmgContext().fuzz_compile_flag) { | |||||
| return SUCCESS; | |||||
| } | |||||
| if (IsAllAicoreSupportDyn(graph)) { | |||||
| if (UpdateNodeShapeToUnknown(graph) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Update][Node_Shape]Failed to update node shape to unknown."); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| bool MarkNodeUnknownShapePass::IsAllAicoreSupportDyn(ComputeGraphPtr &graph) { | |||||
| bool is_all_aicore_support_dyn = false; | |||||
| for (const auto &node : graph->GetAllNodes()) { | |||||
| if (node->GetOpDesc() == nullptr) { | |||||
| continue; | |||||
| } | |||||
| if (node->GetOpDesc()->GetOpKernelLibName() != kEngineNameAiCore) { | |||||
| GELOGD("Kernel of %s is %s.", node->GetName().c_str(), node->GetOpDesc()->GetOpKernelLibName().c_str()); | |||||
| continue; | |||||
| } | |||||
| NodePtr original_node = nullptr; | |||||
| original_node = node->GetOpDesc()->TryGetExtAttr(kOriginalNode, original_node); | |||||
| if ((original_node == nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) || | |||||
| (original_node != nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS) && | |||||
| !AttrUtils::HasAttr(original_node->GetOpDesc(), kNeedRefreshShape))) { | |||||
| GELOGD("%s has set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str()); | |||||
| is_all_aicore_support_dyn = true; | |||||
| } else { | |||||
| GELOGD("%s has not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str()); | |||||
| is_all_aicore_support_dyn = false; | |||||
| break; | |||||
| } | |||||
| } | |||||
| return is_all_aicore_support_dyn; | |||||
| } | |||||
| Status MarkNodeUnknownShapePass::UpdateNodeShapeToUnknown(ComputeGraphPtr &graph) { | |||||
| GELOGD("Need to update node shape to dynamic when get fuzz build result."); | |||||
| for (const auto &node : graph->GetAllNodes()) { | |||||
| if (NodeUtils::IsConst(*node) || node->GetType() == VARIABLE) { | |||||
| continue; | |||||
| } | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
| auto src_node = NodeUtils::GetInDataNodeByIndex(*node, static_cast<int>(i)); | |||||
| if (src_node != nullptr && (NodeUtils::IsConst(*src_node) || src_node->GetType() == VARIABLE)) { | |||||
| continue; | |||||
| } | |||||
| GELOGD("Update input shape for %s.", node->GetName().c_str()); | |||||
| auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||||
| if (input_desc != nullptr) { | |||||
| input_desc->SetShape(GeShape({kDynamicState})); | |||||
| } | |||||
| } | |||||
| for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||||
| if (output_desc != nullptr) { | |||||
| GELOGD("Update output shape for %s.", node->GetName().c_str()); | |||||
| output_desc->SetShape(GeShape({kDynamicState})); | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,32 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ | |||||
| #define GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ | |||||
| #include "graph/graph.h" | |||||
| #include "inc/graph_pass.h" | |||||
| namespace ge { | |||||
| class MarkNodeUnknownShapePass : public GraphPass { | |||||
| public: | |||||
| Status Run(ComputeGraphPtr graph); | |||||
| private: | |||||
| bool IsAllAicoreSupportDyn(ComputeGraphPtr &graph); | |||||
| Status UpdateNodeShapeToUnknown(ComputeGraphPtr &graph); | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ | |||||
| @@ -55,9 +55,17 @@ Status InsertReshapeIfNeed(const NodePtr &node) { | |||||
| GE_CHECK_NOTNULL(dst_node->GetOpDesc()); | GE_CHECK_NOTNULL(dst_node->GetOpDesc()); | ||||
| auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); | auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); | ||||
| GE_CHECK_NOTNULL(dst_tensor); | GE_CHECK_NOTNULL(dst_tensor); | ||||
| bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK && | |||||
| dst_tensor->GetShape().GetDims() != UNKNOWN_RANK && | |||||
| src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims(); | |||||
| bool is_dynamic = false; | |||||
| const auto &src_tensor_dims = src_tensor->GetShape().GetDims(); | |||||
| const auto &dst_tensor_dims = dst_tensor->GetShape().GetDims(); | |||||
| if ((std::any_of(src_tensor_dims.begin(), src_tensor_dims.end(), [](int64_t val) { return val < 0 ; })) | |||||
| || (std::any_of(dst_tensor_dims.begin(), dst_tensor_dims.end(), [](int64_t val) { return val < 0; }))) { | |||||
| GELOGD("No need to insert reshape node between %s nad %s.", node->GetName().c_str(), | |||||
| dst_node->GetName().c_str()); | |||||
| is_dynamic = true; | |||||
| } | |||||
| bool is_need_insert_reshape = src_tensor_dims != dst_tensor_dims && | |||||
| !is_dynamic; | |||||
| if (is_need_insert_reshape) { | if (is_need_insert_reshape) { | ||||
| auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph()); | auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph()); | ||||
| GE_CHECK_NOTNULL(reshape); | GE_CHECK_NOTNULL(reshape); | ||||
| @@ -54,6 +54,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||||
| "[%s] check input node shape by shape range failed.", | "[%s] check input node shape by shape range failed.", | ||||
| root_graph_item->GetName().c_str()); | root_graph_item->GetName().c_str()); | ||||
| } | } | ||||
| if (context_.global_step != nullptr) { | if (context_.global_step != nullptr) { | ||||
| GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | ||||
| sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | ||||
| @@ -100,8 +101,10 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
| GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id)); | GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id)); | ||||
| } | } | ||||
| HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | |||||
| if (!model_->IsSingleOp()) { | |||||
| HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | |||||
| } | |||||
| args.outputs.clear(); | args.outputs.clear(); | ||||
| HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | ||||
| @@ -168,7 +168,7 @@ Status NodeItem::InitInputsAndOutputs() { | |||||
| Status NodeItem::ResolveDynamicState() { | Status NodeItem::ResolveDynamicState() { | ||||
| (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | ||||
| GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); | |||||
| GELOGD("Node name is %s, dynamic state is %d.", this->node_name.c_str(), is_dynamic); | |||||
| if (!is_dynamic) { | if (!is_dynamic) { | ||||
| GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), | GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), | ||||
| "[%s] Failed to get shape status.", | "[%s] Failed to get shape status.", | ||||
| @@ -22,6 +22,7 @@ | |||||
| #include "hybrid/node_executor/aicore/aicore_task_builder.h" | #include "hybrid/node_executor/aicore/aicore_task_builder.h" | ||||
| #include "graph/load/model_manager/tbe_handle_store.h" | #include "graph/load/model_manager/tbe_handle_store.h" | ||||
| #include "graph/types.h" | #include "graph/types.h" | ||||
| #include "single_op/task/build_task_utils.h" | |||||
| using optiling::OpRunInfo; | using optiling::OpRunInfo; | ||||
| @@ -31,6 +32,7 @@ namespace { | |||||
| constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | ||||
| constexpr char const *kAttrOpParamSize = "op_para_size"; | constexpr char const *kAttrOpParamSize = "op_para_size"; | ||||
| constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | ||||
| std::atomic<std::uint64_t> log_id(0); | |||||
| } // namespace | } // namespace | ||||
| TbeHandleHolder::TbeHandleHolder(void *bin_handle) | TbeHandleHolder::TbeHandleHolder(void *bin_handle) | ||||
| @@ -48,6 +50,12 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { | |||||
| } | } | ||||
| Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
| log_name_ = op_desc.GetName() + "_tvmbin"; | |||||
| log_id_ = log_id++; | |||||
| auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | |||||
| GE_CHECK_NOTNULL(op_desc_ptr); | |||||
| auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_ptr); | |||||
| GELOGI("[TASK_INFO] %lu/%s %s.", log_id_, log_name_.c_str(), task_info.c_str()); | |||||
| GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | ||||
| GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | ||||
| @@ -67,6 +75,7 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) | |||||
| output_indices_to_skip_.push_back(i); | output_indices_to_skip_.push_back(i); | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("[TASK_INFO] %lu/%s.", log_id_, log_name_.c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -114,6 +114,8 @@ class AiCoreOpTask { | |||||
| uint32_t tiling_key_ = 0; | uint32_t tiling_key_ = 0; | ||||
| void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
| bool is_dynamic_ = false; | bool is_dynamic_ = false; | ||||
| uint64_t log_id_ = 0; | |||||
| std::string log_name_; | |||||
| }; | }; | ||||
| class AtomicAddrCleanOpTask : public AiCoreOpTask { | class AtomicAddrCleanOpTask : public AiCoreOpTask { | ||||
| @@ -216,6 +216,10 @@ DEFINE_string(op_bank_path, "", "Optional; op bank path"); | |||||
| DEFINE_string(display_model_info, "0", "Optional; display model info"); | DEFINE_string(display_model_info, "0", "Optional; display model info"); | ||||
| DEFINE_string(performance_mode, "", "Optional; express high compile performance or high execute performance." | |||||
| "normal: no need to compile, used saved .o files directly;" | |||||
| "high: need to recompile, high execute performance mode."); | |||||
| class GFlagUtils { | class GFlagUtils { | ||||
| public: | public: | ||||
| /** | /** | ||||
| @@ -330,7 +334,8 @@ class GFlagUtils { | |||||
| "Default value: $HOME/atc_data\n" | "Default value: $HOME/atc_data\n" | ||||
| " --op_compiler_cache_mode Set the operator compilation cache mode." | " --op_compiler_cache_mode Set the operator compilation cache mode." | ||||
| "Options are disable(default), enable and force(force to refresh the cache)\n" | "Options are disable(default), enable and force(force to refresh the cache)\n" | ||||
| " --display_model_info enable for display model info; 0(default): close display, 1: open display"); | |||||
| " --display_model_info enable for display model info; 0(default): close display, 1: open display.\n" | |||||
| " --performance_mode Set high performance mode of compile or execute."); | |||||
| gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); | gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); | ||||
| // Using gflags to analyze input parameters | // Using gflags to analyze input parameters | ||||
| @@ -1078,6 +1083,7 @@ static void SetEnvForSingleOp(std::map<string, string> &options) { | |||||
| options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode); | options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode); | ||||
| options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path); | options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path); | ||||
| options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path); | options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path); | ||||
| options.emplace(ge::PERFORMANCE_MODE, FLAGS_performance_mode); | |||||
| } | } | ||||
| domi::Status GenerateSingleOp(const std::string& json_file_path) { | domi::Status GenerateSingleOp(const std::string& json_file_path) { | ||||
| @@ -1124,7 +1130,7 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) { | |||||
| output_path = FLAGS_output + "/"; | output_path = FLAGS_output + "/"; | ||||
| } | } | ||||
| output_path += param.file_name; | output_path += param.file_name; | ||||
| ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path); | |||||
| ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path, param.compile_flag); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); | DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); | ||||
| ret = domi::FAILED; | ret = domi::FAILED; | ||||
| @@ -1229,6 +1235,8 @@ domi::Status GenerateOmModel() { | |||||
| options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path)); | options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path)); | ||||
| options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info)); | options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info)); | ||||
| options.insert(std::pair<string, string>(string(ge::PERFORMANCE_MODE), FLAGS_performance_mode)); | |||||
| // set enable scope fusion passes | // set enable scope fusion passes | ||||
| SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes); | SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes); | ||||
| // print atc option map | // print atc option map | ||||
| @@ -53,6 +53,7 @@ constexpr char const *kKeyOriginFormat = "origin_format"; | |||||
| constexpr char const *kFileSuffix = ".om"; | constexpr char const *kFileSuffix = ".om"; | ||||
| constexpr char const *kKeyDynamicInput = "dynamic_input"; | constexpr char const *kKeyDynamicInput = "dynamic_input"; | ||||
| constexpr char const *kKeyDynamicOutput = "dynamic_output"; | constexpr char const *kKeyDynamicOutput = "dynamic_output"; | ||||
| constexpr char const *kKeyCompileFlag = "compile_flag"; | |||||
| constexpr int kDumpJsonIndent = 2; | constexpr int kDumpJsonIndent = 2; | ||||
| constexpr int kShapeRangePairSize = 2; | constexpr int kShapeRangePairSize = 2; | ||||
| constexpr int kShapeRangeLow = 0; | constexpr int kShapeRangeLow = 0; | ||||
| @@ -265,7 +266,10 @@ void from_json(const Json &j, SingleOpAttr &attr) { | |||||
| } | } | ||||
| void from_json(const Json &j, SingleOpDesc &desc) { | void from_json(const Json &j, SingleOpDesc &desc) { | ||||
| desc.op = j.at(kKeyOp).get<string>(); | |||||
| auto op = j.find(kKeyOp); | |||||
| if (op != j.end()) { | |||||
| desc.op = j.at(kKeyOp).get<string>(); | |||||
| } | |||||
| auto input_desc = j.find(kKeyInputDesc); | auto input_desc = j.find(kKeyInputDesc); | ||||
| if (input_desc != j.end()) { | if (input_desc != j.end()) { | ||||
| @@ -281,6 +285,11 @@ void from_json(const Json &j, SingleOpDesc &desc) { | |||||
| if (attr_field != j.end()) { | if (attr_field != j.end()) { | ||||
| desc.attrs = attr_field->get<vector<SingleOpAttr>>(); | desc.attrs = attr_field->get<vector<SingleOpAttr>>(); | ||||
| } | } | ||||
| auto compile_flag = j.find(kKeyCompileFlag); | |||||
| if (compile_flag != j.end()) { | |||||
| desc.compile_flag = compile_flag->get<int32_t>(); | |||||
| } | |||||
| } | } | ||||
| Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) { | Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) { | ||||
| @@ -583,10 +592,16 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| int32_t compile_flag = 0; | |||||
| for (const Json &single_op_json : single_op_list_json) { | for (const Json &single_op_json : single_op_list_json) { | ||||
| SingleOpDesc single_op_desc; | SingleOpDesc single_op_desc; | ||||
| GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str()); | GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str()); | ||||
| single_op_desc = single_op_json; | single_op_desc = single_op_json; | ||||
| GELOGD("Compile flag is %d.", single_op_desc.compile_flag); | |||||
| if (single_op_desc.compile_flag == 1) { | |||||
| compile_flag = single_op_desc.compile_flag; | |||||
| continue; | |||||
| } | |||||
| if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) { | if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) { | ||||
| GELOGE(FAILED, "[Update][DynamicTensorName] failed for invalid input param!"); | GELOGE(FAILED, "[Update][DynamicTensorName] failed for invalid input param!"); | ||||
| REPORT_CALL_ERROR("E19999", "UpdateDynamicTensorName failed for invalid input param."); | REPORT_CALL_ERROR("E19999", "UpdateDynamicTensorName failed for invalid input param."); | ||||
| @@ -604,6 +619,7 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| param.compile_flag = compile_flag; | |||||
| op_list.emplace_back(param); | op_list.emplace_back(param); | ||||
| GELOGI("Parse the index[%d] of op success", index); | GELOGI("Parse the index[%d] of op success", index); | ||||
| @@ -55,6 +55,7 @@ struct SingleOpDesc { | |||||
| std::vector<SingleOpTensorDesc> input_desc; | std::vector<SingleOpTensorDesc> input_desc; | ||||
| std::vector<SingleOpTensorDesc> output_desc; | std::vector<SingleOpTensorDesc> output_desc; | ||||
| std::vector<SingleOpAttr> attrs; | std::vector<SingleOpAttr> attrs; | ||||
| int32_t compile_flag = 0; | |||||
| }; | }; | ||||
| struct SingleOpBuildParam { | struct SingleOpBuildParam { | ||||
| @@ -62,6 +63,7 @@ struct SingleOpBuildParam { | |||||
| std::vector<ge::GeTensor> inputs; | std::vector<ge::GeTensor> inputs; | ||||
| std::vector<ge::GeTensor> outputs; | std::vector<ge::GeTensor> outputs; | ||||
| std::string file_name; | std::string file_name; | ||||
| int32_t compile_flag = 0; | |||||
| }; | }; | ||||
| void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc); | void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc); | ||||
| @@ -34,6 +34,9 @@ const size_t kDataMemAlignSize = 32; | |||||
| const size_t kDataMemAlignUnit = 2; | const size_t kDataMemAlignUnit = 2; | ||||
| const string kShapeTypeDynamic = "dynamic"; | const string kShapeTypeDynamic = "dynamic"; | ||||
| const string kShapeTypeStatic = "static"; | const string kShapeTypeStatic = "static"; | ||||
| const int64_t kHostMemType = 1; | |||||
| const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024; | |||||
| const uint32_t kAlignBytes = 512; | |||||
| size_t GetAlignedSize(size_t size) { | size_t GetAlignedSize(size_t size) { | ||||
| size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | ||||
| @@ -65,6 +68,72 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||||
| profiling_manager.ReportProfilingData(model_id, task_desc_info); | profiling_manager.ReportProfilingData(model_id, task_desc_info); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status CalInputsHostMemSize(const std::vector<DataBuffer> &inputs, | |||||
| std::vector<std::pair<size_t, uint64_t>> &inputs_size) { | |||||
| int64_t total_size = 0; | |||||
| size_t index = 0; | |||||
| for (auto &input_buffer : inputs) { | |||||
| int64_t input_size = 0; | |||||
| if (input_buffer.placement == kHostMemType) { | |||||
| GE_CHECK_LE(input_buffer.length, INT64_MAX); | |||||
| input_size = input_buffer.length; | |||||
| // input_size pad to 512 | |||||
| GE_CHK_STATUS_RET(CheckInt64AddOverflow(input_size, (kAlignBytes - 1)), "Padding size is beyond the INT64_MAX."); | |||||
| input_size = ((input_size + kAlignBytes - 1) / kAlignBytes) * kAlignBytes; | |||||
| inputs_size.emplace_back(index, input_size); | |||||
| GE_CHK_STATUS_RET(CheckInt64AddOverflow(total_size, input_size), "Total size is beyond the INT64_MAX."); | |||||
| total_size += input_size; | |||||
| GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size); | |||||
| } | |||||
| index++; | |||||
| } | |||||
| if (total_size > kFuzzDeviceBufferSize) { | |||||
| GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status UpdateInputsBufferAddr(StreamResource *stream_resource, rtStream_t stream, | |||||
| const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||||
| std::vector<DataBuffer> &update_buffers) { | |||||
| GE_CHECK_NOTNULL(stream_resource); | |||||
| if (stream_resource->Init() != SUCCESS) { | |||||
| GELOGE(FAILED, "[Malloc][Memory]Failed to malloc device buffer."); | |||||
| return FAILED; | |||||
| } | |||||
| auto dst_addr = reinterpret_cast<uint8_t *>(stream_resource->GetDeviceBufferAddr()); | |||||
| // copy host mem from input_buffer to device mem of dst_addr | |||||
| for (const auto &input_size : inputs_size) { | |||||
| size_t index = input_size.first; | |||||
| auto size = input_size.second; | |||||
| GELOGD("Do H2D for %zu input, dst size is %zu, src length is %lu.", index, size, update_buffers[index].length); | |||||
| GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length, | |||||
| RT_MEMCPY_HOST_TO_DEVICE_EX, stream)); | |||||
| update_buffers[index].data = dst_addr; | |||||
| dst_addr = reinterpret_cast<uint8_t *>(dst_addr + size); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status InitHybridModelArgs(const std::vector<DataBuffer> &input_buffers, | |||||
| const std::vector<DataBuffer> &output_buffers, | |||||
| const std::vector<GeTensorDesc> &inputs_desc, | |||||
| hybrid::HybridModelExecutor::ExecuteArgs &args) { | |||||
| for (auto &input : input_buffers) { | |||||
| args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); | |||||
| } | |||||
| for (auto &output : output_buffers) { | |||||
| args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); | |||||
| } | |||||
| for (auto &tensor_desc : inputs_desc) { | |||||
| auto desc = MakeShared<GeTensorDesc>(tensor_desc); | |||||
| GE_CHECK_NOTNULL(desc); | |||||
| args.input_desc.emplace_back(desc); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| SingleOp::SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream) | SingleOp::SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream) | ||||
| @@ -168,13 +237,28 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector<DataBuffer> &inputs, | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector<DataBuffer> &inputs, | ||||
| const std::vector<DataBuffer> &outputs) { | const std::vector<DataBuffer> &outputs) { | ||||
| GELOGD("Start SingleOp::ExecuteAsync."); | |||||
| Status ret = ValidateArgs(inputs, outputs); | Status ret = ValidateArgs(inputs, outputs); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| GE_CHECK_NOTNULL(stream_resource_); | GE_CHECK_NOTNULL(stream_resource_); | ||||
| vector<pair<size_t, uint64_t>> inputs_size; | |||||
| GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(inputs, inputs_size)); | |||||
| std::lock_guard<std::mutex> lk(*stream_mutex_); | std::lock_guard<std::mutex> lk(*stream_mutex_); | ||||
| vector<DataBuffer> update_buffers = inputs; | |||||
| if (!inputs_size.empty()) { | |||||
| GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource_, stream_, inputs_size, update_buffers)); | |||||
| } | |||||
| if (hybrid_model_executor_ != nullptr) { | |||||
| GELOGD("Execute multi-task single op by hybrid model executor"); | |||||
| hybrid::HybridModelExecutor::ExecuteArgs args; | |||||
| GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, outputs, inputs_desc_, args)); | |||||
| return hybrid_model_executor_->Execute(args); | |||||
| } | |||||
| auto current_mem_base = stream_resource_->GetMemoryBase(); | auto current_mem_base = stream_resource_->GetMemoryBase(); | ||||
| if (running_param_->mem_base != current_mem_base) { | if (running_param_->mem_base != current_mem_base) { | ||||
| running_param_->mem_base = const_cast<uint8_t *>(current_mem_base); | running_param_->mem_base = const_cast<uint8_t *>(current_mem_base); | ||||
| @@ -185,7 +269,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||||
| task->GetOpdesc()->GetName().c_str()); | task->GetOpdesc()->GetName().c_str()); | ||||
| } | } | ||||
| } | } | ||||
| ret = UpdateArgs(inputs, outputs); | |||||
| ret = UpdateArgs(update_buffers, outputs); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -252,33 +336,64 @@ Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc, | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DynamicSingleOp::SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||||
| const vector<GeTensorDesc> &input_desc, | |||||
| const std::vector<DataBuffer> &input_buffers) { | |||||
| auto op_desc = op_task_->GetOpdesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| GELOGD("Start update inputs tensor value of %s.", op_desc->GetName().c_str()); | |||||
| for (const auto &input_size : inputs_size) { | |||||
| size_t index = input_size.first; | |||||
| auto ge_tensor_desc = input_desc.at(index); | |||||
| // reconstruct GeTensor by DataBuffer | |||||
| GeTensorPtr ge_tensor = MakeShared<GeTensor>(ge_tensor_desc); | |||||
| GE_CHECK_NOTNULL(ge_tensor); | |||||
| GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.", | |||||
| index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length); | |||||
| if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data), | |||||
| static_cast<size_t>(input_buffers[index].length)) != SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| auto tensor_desc = op_desc->MutableInputDesc(index); | |||||
| GE_CHECK_NOTNULL(tensor_desc); | |||||
| if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) { | |||||
| GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE to %s.", op_desc->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | ||||
| const vector<DataBuffer> &input_buffers, | const vector<DataBuffer> &input_buffers, | ||||
| vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
| vector<DataBuffer> &output_buffers) { | vector<DataBuffer> &output_buffers) { | ||||
| GELOGD("Start DynamicSingleOp::ExecuteAsync."); | |||||
| GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | ||||
| vector<pair<size_t, uint64_t>> inputs_size; | |||||
| GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(input_buffers, inputs_size)); | |||||
| vector<DataBuffer> update_buffers = input_buffers; | |||||
| std::lock_guard<std::mutex> lk(*stream_mutex_); | |||||
| if (!inputs_size.empty()) { | |||||
| StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); | |||||
| GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); | |||||
| } | |||||
| if (hybrid_model_executor_ != nullptr) { | if (hybrid_model_executor_ != nullptr) { | ||||
| GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | ||||
| hybrid::HybridModelExecutor::ExecuteArgs args; | hybrid::HybridModelExecutor::ExecuteArgs args; | ||||
| for (auto &input : input_buffers) { | |||||
| args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); | |||||
| } | |||||
| for (auto &output : output_buffers) { | |||||
| args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); | |||||
| } | |||||
| for (auto &tensor_desc : input_desc) { | |||||
| auto desc = MakeShared<GeTensorDesc>(tensor_desc); | |||||
| GE_CHECK_NOTNULL(desc); | |||||
| args.input_desc.emplace_back(desc); | |||||
| } | |||||
| GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, output_buffers, input_desc, args)); | |||||
| return hybrid_model_executor_->Execute(args); | return hybrid_model_executor_->Execute(args); | ||||
| } | } | ||||
| std::lock_guard<std::mutex> lk(*stream_mutex_); | |||||
| GE_CHECK_NOTNULL(op_task_); | GE_CHECK_NOTNULL(op_task_); | ||||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||||
| if (!inputs_size.empty()) { | |||||
| GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(inputs_size, input_desc, input_buffers)); | |||||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, update_buffers, output_desc, output_buffers, stream_)); | |||||
| } else { | |||||
| GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||||
| } | |||||
| GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | ||||
| GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -59,6 +59,9 @@ class SingleOp { | |||||
| std::vector<OpTask *> tasks_; | std::vector<OpTask *> tasks_; | ||||
| std::vector<std::vector<uintptr_t *>> arg_table_; | std::vector<std::vector<uintptr_t *>> arg_table_; | ||||
| std::unique_ptr<SingleOpModelParam> running_param_; | std::unique_ptr<SingleOpModelParam> running_param_; | ||||
| std::unique_ptr<hybrid::HybridModel> hybrid_model_; | |||||
| std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | |||||
| std::vector<GeTensorDesc> inputs_desc_; | |||||
| }; | }; | ||||
| class DynamicSingleOp { | class DynamicSingleOp { | ||||
| @@ -76,7 +79,8 @@ class DynamicSingleOp { | |||||
| const std::vector<DataBuffer> &inputs, | const std::vector<DataBuffer> &inputs, | ||||
| std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
| std::vector<DataBuffer> &outputs) const; | std::vector<DataBuffer> &outputs) const; | ||||
| Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||||
| const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers); | |||||
| std::unique_ptr<OpTask> op_task_; | std::unique_ptr<OpTask> op_task_; | ||||
| std::unique_ptr<hybrid::HybridModel> hybrid_model_; | std::unique_ptr<hybrid::HybridModel> hybrid_model_; | ||||
| std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | ||||
| @@ -85,6 +89,7 @@ class DynamicSingleOp { | |||||
| rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
| size_t num_inputs_ = 0; | size_t num_inputs_ = 0; | ||||
| size_t num_outputs_ = 0; | size_t num_outputs_ = 0; | ||||
| ComputeGraphPtr compute_graph_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_SINGLE_OP_SINGLE_OP_H_ | #endif // GE_SINGLE_OP_SINGLE_OP_H_ | ||||
| @@ -43,6 +43,8 @@ using std::vector; | |||||
| namespace ge { | namespace ge { | ||||
| namespace { | namespace { | ||||
| const size_t kDataOutputNum = 1; | const size_t kDataOutputNum = 1; | ||||
| const uint32_t kOutputIndexOfData = 0; | |||||
| constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | |||||
| Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | ||||
| auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | ||||
| @@ -51,7 +53,9 @@ Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | |||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| const auto &depends = op_desc->GetOpInferDepends(); | const auto &depends = op_desc->GetOpInferDepends(); | ||||
| if (!depends.empty()) { | |||||
| bool support_dynamic_shape = false; | |||||
| (void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, support_dynamic_shape); | |||||
| if (!depends.empty() && support_dynamic_shape) { | |||||
| flag = true; | flag = true; | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -462,6 +466,31 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa | |||||
| *task = aicpucc_task.release(); | *task = aicpucc_task.release(); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status SingleOpModel::InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, | |||||
| SingleOp &single_op) { | |||||
| for (const auto &op_desc : data_ops_) { | |||||
| auto output_tensor_desc = op_desc->GetOutputDesc(kOutputIndexOfData); | |||||
| GeTensorDesc tensor_desc(output_tensor_desc); | |||||
| single_op.inputs_desc_.emplace_back(tensor_desc); | |||||
| GELOGD("Init inputs desc from %s.", op_desc->GetName().c_str()); | |||||
| } | |||||
| GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); | |||||
| auto root_model = model_helper_.GetGeRootModel(); | |||||
| GE_CHECK_NOTNULL(root_model); | |||||
| root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); | |||||
| root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model); | |||||
| single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); | |||||
| GE_CHECK_NOTNULL(single_op.hybrid_model_); | |||||
| GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "[Init][HybridModel]Failed."); | |||||
| int32_t device_id = 0; | |||||
| GE_CHK_RT_RET(rtGetDevice(&device_id)); | |||||
| single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), | |||||
| device_id, | |||||
| resource.GetStream())); | |||||
| GE_CHECK_NOTNULL(single_op.hybrid_model_executor_); | |||||
| GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed."); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | ||||
| GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs()); | GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs()); | ||||
| @@ -469,10 +498,20 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | |||||
| single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_)); | single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_)); | ||||
| GE_CHECK_NOTNULL(single_op.running_param_); | GE_CHECK_NOTNULL(single_op.running_param_); | ||||
| GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op)); | GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op)); | ||||
| auto ge_model = model_helper_.GetGeModel(); | |||||
| GE_CHECK_NOTNULL(ge_model); | |||||
| bool infer_depend_flag = false; | |||||
| GE_CHK_STATUS_RET(IfInferDepend(ge_model, infer_depend_flag), "[Check][InferDepend] failed."); | |||||
| if (infer_depend_flag) { | |||||
| // construct single_op, do single op with HybridModelExecutor | |||||
| GELOGD("Init hybrid model params of single op, and will do execute with hybrid model executor."); | |||||
| return InitHybridModelExecutor(resource, ge_model, single_op); | |||||
| } | |||||
| return BuildTaskList(&resource, single_op); | return BuildTaskList(&resource, single_op); | ||||
| } | } | ||||
| Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { | |||||
| Status SingleOpModel::BuildModelTaskKernel(StreamResource *stream_resource, const TaskDef &task_def, | |||||
| DynamicSingleOp &single_op) { | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | ||||
| task_def.kernel_with_handle().context(); | task_def.kernel_with_handle().context(); | ||||
| @@ -483,6 +522,10 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
| TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | ||||
| tbe_task->SetModelArgs(model_name_, model_id_); | tbe_task->SetModelArgs(model_name_, model_id_); | ||||
| if (tbe_task->tiling_buffer_ != nullptr) { | |||||
| GELOGD("tiling buffer is not nullptr."); | |||||
| tbe_task->stream_resource_ = stream_resource; | |||||
| } | |||||
| single_op.op_task_.reset(tbe_task); | single_op.op_task_.reset(tbe_task); | ||||
| } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | ||||
| GELOGD("Building AICPU_CC task"); | GELOGD("Building AICPU_CC task"); | ||||
| @@ -504,10 +547,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
| Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &single_op) { | |||||
| auto ge_model = model_helper_.GetGeModel(); | auto ge_model = model_helper_.GetGeModel(); | ||||
| GE_CHECK_NOTNULL(ge_model); | GE_CHECK_NOTNULL(ge_model); | ||||
| auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | |||||
| GE_CHECK_NOTNULL(compute_graph); | |||||
| single_op.compute_graph_ = compute_graph; | |||||
| auto tasks = ge_model->GetModelTaskDefPtr()->task(); | auto tasks = ge_model->GetModelTaskDefPtr()->task(); | ||||
| for (int i = 0; i < tasks.size(); ++i) { | for (int i = 0; i < tasks.size(); ++i) { | ||||
| const TaskDef &task_def = tasks[i]; | const TaskDef &task_def = tasks[i]; | ||||
| @@ -521,7 +567,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
| "BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks."); | "BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks."); | ||||
| return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op)); | |||||
| GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(stream_resource, task_def, single_op)); | |||||
| } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | ||||
| if (single_op.op_task_ != nullptr) { | if (single_op.op_task_ != nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks."); | GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks."); | ||||
| @@ -561,6 +607,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
| single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | ||||
| GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | ||||
| model_params_.memory_size = UINT_MAX; | model_params_.memory_size = UINT_MAX; | ||||
| model_params_.graph_is_dynamic = true; | |||||
| auto ge_model = model_helper_.GetGeModel(); | auto ge_model = model_helper_.GetGeModel(); | ||||
| GE_CHECK_NOTNULL(ge_model); | GE_CHECK_NOTNULL(ge_model); | ||||
| @@ -585,6 +632,6 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
| GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed."); | GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| return BuildTaskListForDynamicOp(single_op); | |||||
| return BuildTaskListForDynamicOp(&resource, single_op); | |||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -40,6 +40,7 @@ struct SingleOpModelParam { | |||||
| std::map<uintptr_t, int> addr_mapping_; | std::map<uintptr_t, int> addr_mapping_; | ||||
| int64_t core_type = 0; | int64_t core_type = 0; | ||||
| bool graph_is_dynamic = false; | |||||
| }; | }; | ||||
| class SingleOpModel { | class SingleOpModel { | ||||
| @@ -65,15 +66,17 @@ class SingleOpModel { | |||||
| void ParseOutputNode(const OpDescPtr &op_desc); | void ParseOutputNode(const OpDescPtr &op_desc); | ||||
| Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | ||||
| Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | |||||
| Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); | |||||
| Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | ||||
| Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | ||||
| bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | ||||
| Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | ||||
| Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); | |||||
| Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def, | |||||
| DynamicSingleOp &single_op); | |||||
| static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | ||||
| void ParseArgTable(OpTask *task, SingleOp &op); | void ParseArgTable(OpTask *task, SingleOp &op); | ||||
| Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); | |||||
| std::string model_name_; | std::string model_name_; | ||||
| uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
| @@ -22,6 +22,11 @@ | |||||
| #include "single_op/single_op_model.h" | #include "single_op/single_op_model.h" | ||||
| namespace ge { | namespace ge { | ||||
| namespace { | |||||
| // limit available device mem size 1M | |||||
| const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024; | |||||
| } | |||||
| StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) { | StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) { | ||||
| } | } | ||||
| @@ -39,6 +44,17 @@ StreamResource::~StreamResource() { | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed.")); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed.")); | ||||
| } | } | ||||
| } | } | ||||
| if (device_buffer_ != nullptr) { | |||||
| auto rt_ret = rtFree(device_buffer_); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed.")); | |||||
| } | |||||
| } | |||||
| Status StreamResource::Init() { | |||||
| auto rt_ret = rtMalloc(&device_buffer_, kFuzzDeviceBufferSize, RT_MEMORY_HBM); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Malloc][Rt] failed.")); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| SingleOp *StreamResource::GetOperator(const uint64_t key) { | SingleOp *StreamResource::GetOperator(const uint64_t key) { | ||||
| @@ -40,6 +40,7 @@ class StreamResource { | |||||
| rtStream_t GetStream() const; | rtStream_t GetStream() const; | ||||
| void SetStream(rtStream_t stream); | void SetStream(rtStream_t stream); | ||||
| Status Init(); | |||||
| SingleOp *GetOperator(const uint64_t key); | SingleOp *GetOperator(const uint64_t key); | ||||
| DynamicSingleOp *GetDynamicOperator(const uint64_t key); | DynamicSingleOp *GetDynamicOperator(const uint64_t key); | ||||
| @@ -49,6 +50,9 @@ class StreamResource { | |||||
| uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true); | uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true); | ||||
| uint8_t *MallocWeight(const std::string &purpose, size_t size); | uint8_t *MallocWeight(const std::string &purpose, size_t size); | ||||
| const uint8_t *GetMemoryBase() const; | const uint8_t *GetMemoryBase() const; | ||||
| void *GetDeviceBufferAddr() const { | |||||
| return device_buffer_; | |||||
| } | |||||
| private: | private: | ||||
| uint8_t *DoMallocMemory(const std::string &purpose, | uint8_t *DoMallocMemory(const std::string &purpose, | ||||
| @@ -65,6 +69,7 @@ class StreamResource { | |||||
| rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
| std::mutex mu_; | std::mutex mu_; | ||||
| std::mutex stream_mu_; | std::mutex stream_mu_; | ||||
| void *device_buffer_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -137,7 +137,7 @@ Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | |||||
| Status OpTask::UpdateRunInfo() { | |||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| @@ -200,14 +200,14 @@ void TbeOpTask::SetHandle(void *handle) { | |||||
| Status TbeOpTask::LaunchKernel(rtStream_t stream) { | Status TbeOpTask::LaunchKernel(rtStream_t stream) { | ||||
| GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | ||||
| auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | |||||
| auto ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), sm_desc, stream); | |||||
| auto ret = DoLaunchKernel(stream); | |||||
| int retry_times = 0; | int retry_times = 0; | ||||
| while (ret != RT_ERROR_NONE && retry_times < kLaunchRetryTimes) { | while (ret != RT_ERROR_NONE && retry_times < kLaunchRetryTimes) { | ||||
| retry_times++; | retry_times++; | ||||
| GELOGW("Retry after %d ms, retry_times: %d", kSleepTime, retry_times); | GELOGW("Retry after %d ms, retry_times: %d", kSleepTime, retry_times); | ||||
| std::this_thread::sleep_for(std::chrono::milliseconds(kSleepTime)); | std::this_thread::sleep_for(std::chrono::milliseconds(kSleepTime)); | ||||
| ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, sm_desc, stream); | |||||
| ret = DoLaunchKernel(stream); | |||||
| } | } | ||||
| if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
| @@ -220,8 +220,7 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | |||||
| GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | |||||
| Status TbeOpTask::UpdateRunInfo() { | |||||
| // invoke OpParaCalculate | // invoke OpParaCalculate | ||||
| GELOGD("Start to invoke OpParaCalculate."); | GELOGD("Start to invoke OpParaCalculate."); | ||||
| optiling::OpRunInfo run_info; | optiling::OpRunInfo run_info; | ||||
| @@ -235,10 +234,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve | |||||
| block_dim_ = run_info.block_dim; | block_dim_ = run_info.block_dim; | ||||
| tiling_data_ = run_info.tiling_data.str(); | tiling_data_ = run_info.tiling_data.str(); | ||||
| tiling_key_ = run_info.tiling_key; | tiling_key_ = run_info.tiling_key; | ||||
| run_info_workspaces_ = run_info.workspaces; | |||||
| GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, | GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, | ||||
| tiling_data_.size(), tiling_key_); | tiling_data_.size(), tiling_key_); | ||||
| GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "[Allocate][Workspaces] failed."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -288,14 +286,33 @@ Status TbeOpTask::UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, cons | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size) { | |||||
| Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { | |||||
| if (tiling_buffer != nullptr) { | |||||
| uintptr_t *arg_base = nullptr; | |||||
| size_t arg_num = 0; | |||||
| GetIoAddr(arg_base, arg_num); | |||||
| GE_CHECK_NOTNULL(node); | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| uint32_t inputs_num = node->GetOpDesc()->GetInputsSize(); | |||||
| uint32_t outputs_num = node->GetOpDesc()->GetOutputsSize(); | |||||
| uint32_t workspace_nums = node->GetOpDesc()->GetWorkspace().size(); | |||||
| uint32_t tiling_index = inputs_num + outputs_num + workspace_nums; | |||||
| if (arg_num == 0 || arg_num < tiling_index) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.", | |||||
| tiling_index, arg_num); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| arg_base[tiling_index] = reinterpret_cast<uintptr_t>(tiling_buffer); | |||||
| } | |||||
| node_ = node; | node_ = node; | ||||
| tiling_buffer_ = tiling_buffer; | tiling_buffer_ = tiling_buffer; | ||||
| max_tiling_size_ = max_tiling_size; | max_tiling_size_ = max_tiling_size; | ||||
| return SUCCESS; | |||||
| } | } | ||||
| Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) { | Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) { | ||||
| static const std::string kPurpose("malloc workspace memory for dynamic op."); | static const std::string kPurpose("malloc workspace memory for dynamic op."); | ||||
| workspaces_.clear(); | |||||
| if (workspace_sizes.empty()) { | if (workspace_sizes.empty()) { | ||||
| GELOGD("No need to allocate workspace."); | GELOGD("No need to allocate workspace."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -333,8 +350,10 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
| vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
| vector<DataBuffer> &output_buffers, | vector<DataBuffer> &output_buffers, | ||||
| rtStream_t stream) { | rtStream_t stream) { | ||||
| GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc)); | |||||
| GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); | GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); | ||||
| GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | |||||
| GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); | |||||
| GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); | |||||
| std::vector<void *> args; | std::vector<void *> args; | ||||
| for (auto &buffer : input_buffers) { | for (auto &buffer : input_buffers) { | ||||
| args.emplace_back(buffer.data); | args.emplace_back(buffer.data); | ||||
| @@ -354,6 +373,15 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
| args.emplace_back(tiling_buffer_); | args.emplace_back(tiling_buffer_); | ||||
| } | } | ||||
| GELOGD("Dst size is %zu, src size is %zu.", arg_size_, args.size() * sizeof(void *)); | |||||
| // node with workspace: build can not get size of workspace, need to update arg_size_ when execute | |||||
| if (arg_size_ < (args.size() * sizeof(void *))) { | |||||
| size_t temp_size = args.size() * sizeof(void *); | |||||
| GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size); | |||||
| args_.reset(new(std::nothrow) uint8_t[temp_size]()); | |||||
| GE_CHECK_NOTNULL(args_); | |||||
| arg_size_ = temp_size; | |||||
| } | |||||
| if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { | if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str()); | GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str()); | ||||
| REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str()); | REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str()); | ||||
| @@ -361,17 +389,22 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
| } | } | ||||
| GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | ||||
| GE_CHK_STATUS_RET(DoLaunchKernel(stream), "Failed to do launch kernel."); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TbeOpTask::DoLaunchKernel(rtStream_t stream) { | |||||
| auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | |||||
| if (handle_ == nullptr) { | if (handle_ == nullptr) { | ||||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); | |||||
| GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); | |||||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), | |||||
| sm_desc, stream)); | |||||
| } else { | } else { | ||||
| std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); | std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); | ||||
| std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); | std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); | ||||
| GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr, | |||||
| stream, kernel_info.c_str())); | |||||
| GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str()); | |||||
| GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), | |||||
| static_cast<uint32_t>(arg_size_), sm_desc, stream, kernel_info.c_str())); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -30,6 +30,7 @@ | |||||
| #include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
| #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | ||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| #include "register/op_tiling.h" | |||||
| namespace ge { | namespace ge { | ||||
| class StreamResource; | class StreamResource; | ||||
| @@ -39,8 +40,7 @@ class OpTask { | |||||
| OpTask() = default; | OpTask() = default; | ||||
| virtual ~OpTask() = default; | virtual ~OpTask() = default; | ||||
| virtual Status LaunchKernel(rtStream_t stream) = 0; | virtual Status LaunchKernel(rtStream_t stream) = 0; | ||||
| virtual Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | |||||
| const vector<GeTensorDesc> &output_desc); | |||||
| virtual Status UpdateRunInfo(); | |||||
| virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | ||||
| void SetModelArgs(std::string model_name, uint32_t model_id); | void SetModelArgs(std::string model_name, uint32_t model_id); | ||||
| Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | ||||
| @@ -81,22 +81,23 @@ class TbeOpTask : public OpTask { | |||||
| void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | ||||
| const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | ||||
| Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | |||||
| const vector<GeTensorDesc> &output_desc) override; | |||||
| Status UpdateRunInfo() override; | |||||
| const void *GetArgs() const; | const void *GetArgs() const; | ||||
| size_t GetArgSize() const; | size_t GetArgSize() const; | ||||
| const std::string &GetStubName() const; | const std::string &GetStubName() const; | ||||
| void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | |||||
| Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size); | |||||
| const std::string &GetTaskType() const override; | const std::string &GetTaskType() const override; | ||||
| void SetHandle(void *handle); | void SetHandle(void *handle); | ||||
| private: | private: | ||||
| friend class SingleOpModel; | friend class SingleOpModel; | ||||
| friend class TbeTaskBuilder; | |||||
| static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); | static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); | ||||
| Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | ||||
| const vector<GeTensorDesc> &output_desc); | const vector<GeTensorDesc> &output_desc); | ||||
| Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); | Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); | ||||
| Status DoLaunchKernel(rtStream_t stream); | |||||
| const void *stub_func_ = nullptr; | const void *stub_func_ = nullptr; | ||||
| std::unique_ptr<uint8_t[]> args_; | std::unique_ptr<uint8_t[]> args_; | ||||
| @@ -108,6 +109,7 @@ class TbeOpTask : public OpTask { | |||||
| void *tiling_buffer_ = nullptr; | void *tiling_buffer_ = nullptr; | ||||
| uint32_t max_tiling_size_ = 0; | uint32_t max_tiling_size_ = 0; | ||||
| std::string tiling_data_; | std::string tiling_data_; | ||||
| std::vector<int64_t> run_info_workspaces_; | |||||
| std::vector<void *> workspaces_; | std::vector<void *> workspaces_; | ||||
| NodePtr node_; | NodePtr node_; | ||||
| @@ -308,92 +308,65 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m | |||||
| } | } | ||||
| Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc) { | Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc) { | ||||
| size_t arg_size = kernel_def_.args_size(); | |||||
| auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||||
| GE_CHECK_NOTNULL(args); | |||||
| auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy failed, size = %zu, ret = %d", | |||||
| arg_size, static_cast<int>(rt_ret)); | |||||
| REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||||
| bool is_task_all_kernel = (task_type == RT_MODEL_TASK_ALL_KERNEL); | |||||
| size_t arg_size = 0; | |||||
| std::unique_ptr<uint8_t[]> args = nullptr; | |||||
| if (is_task_all_kernel) { | |||||
| GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_ALL_KERNEL.", op_desc->GetName().c_str()); | |||||
| arg_size = kernel_def_with_handle_.args_size(); | |||||
| args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||||
| GE_CHECK_NOTNULL(args); | |||||
| GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, | |||||
| RT_MEMCPY_HOST_TO_HOST)) | |||||
| } else { | |||||
| GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_KERNEL.", op_desc->GetName().c_str()); | |||||
| arg_size = kernel_def_.args_size(); | |||||
| args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||||
| GE_CHECK_NOTNULL(args); | |||||
| GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST)) | |||||
| } | } | ||||
| const domi::KernelContext &context = kernel_def_.context(); | |||||
| const domi::KernelContext &context = task_type == RT_MODEL_TASK_ALL_KERNEL ? | |||||
| kernel_def_with_handle_.context() : kernel_def_.context(); | |||||
| const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | ||||
| uint16_t offset = *args_offset_tmp; | uint16_t offset = *args_offset_tmp; | ||||
| bool is_dynamic = false; | |||||
| (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | |||||
| if (is_dynamic) { | |||||
| GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | |||||
| } else { | |||||
| // copy args | |||||
| std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||||
| void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||||
| uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||||
| rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||||
| REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast<int>(rt_ret)); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| } | |||||
| task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | |||||
| // copy args | |||||
| std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||||
| void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||||
| uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||||
| GE_CHK_RT_RET(rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST)); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, | |||||
| const OpDescPtr &op_desc) { | |||||
| size_t arg_size = kernel_def_with_handle_.args_size(); | |||||
| auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||||
| GE_CHECK_NOTNULL(args); | |||||
| auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "[Update][Kernel_def:args]rtMemcpy failed, size = %zu, ret = %d", | |||||
| arg_size, static_cast<int>(rt_ret)); | |||||
| REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||||
| return rt_ret; | |||||
| if (is_task_all_kernel) { | |||||
| task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | |||||
| kernel_def_with_handle_); | |||||
| } else { | |||||
| task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | |||||
| } | } | ||||
| const domi::KernelContext &context = kernel_def_with_handle_.context(); | |||||
| const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | |||||
| uint16_t offset = *args_offset_tmp; | |||||
| bool is_dynamic = false; | bool is_dynamic = false; | ||||
| (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | ||||
| if (is_dynamic) { | if (is_dynamic) { | ||||
| GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | ||||
| } else { | |||||
| // copy args | |||||
| std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||||
| void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||||
| uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||||
| rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||||
| REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast<int>(rt_ret)); | |||||
| return rt_ret; | |||||
| if (!param.graph_is_dynamic && task.tiling_buffer_ != nullptr) { | |||||
| GELOGD("Need to update run info when graph is static with dynamic node: %s.", op_desc->GetName().c_str()); | |||||
| task.UpdateRunInfo(); | |||||
| GE_CHK_RT_RET(rtMemcpy(task.tiling_buffer_, task.max_tiling_size_, task.tiling_data_.data(), | |||||
| task.tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| } | } | ||||
| } | } | ||||
| task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | |||||
| kernel_def_with_handle_); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { | Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { | ||||
| GELOGD("Build tbe task begin"); | GELOGD("Build tbe task begin"); | ||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||||
| auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) : | |||||
| SetKernelArgs(task, param, op_desc_); | |||||
| auto ret = SetKernelArgs(task, param, op_desc_); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||||
| ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : | ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : | ||||
| RegisterKernel(task, param); | RegisterKernel(task, param); | ||||
| task.SetHandle(handle_); | task.SetHandle(handle_); | ||||
| @@ -437,7 +410,7 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | |||||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | ||||
| } | } | ||||
| task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size)); | |||||
| task.EnableDynamicSupport(node_, tiling_buffer, static_cast<uint32_t>(max_size)); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -97,7 +97,6 @@ class TbeTaskBuilder { | |||||
| private: | private: | ||||
| Status InitTilingInfo(TbeOpTask &task); | Status InitTilingInfo(TbeOpTask &task); | ||||
| Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | ||||
| Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | |||||
| Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; | Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; | ||||
| Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); | Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); | ||||
| @@ -65,10 +65,12 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||||
| /// @param [in] inputs: input tensors. | /// @param [in] inputs: input tensors. | ||||
| /// @param [in] outputs: output tensors. | /// @param [in] outputs: output tensors. | ||||
| /// @param [in] model_file_name: name of model file. | /// @param [in] model_file_name: name of model file. | ||||
| /// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1 | |||||
| /// @return SUCCESS or FAILED | /// @return SUCCESS or FAILED | ||||
| /// | /// | ||||
| Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | ||||
| const std::vector<GeTensor> &outputs, const std::string &model_file_name); | |||||
| const std::vector<GeTensor> &outputs, const std::string &model_file_name, | |||||
| int32_t compile_flag = 0); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief: Build single Op into model buff. | /// @brief: Build single Op into model buff. | ||||
| @@ -100,7 +102,7 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||||
| ge::ModelBufferData &model, bool is_offline = true); | ge::ModelBufferData &model, bool is_offline = true); | ||||
| Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | ||||
| const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | ||||
| bool is_offline = true); | |||||
| bool is_offline = true, int32_t compile_flag = 0); | |||||
| bool CheckNoAicore(const ComputeGraphPtr &graph); | bool CheckNoAicore(const ComputeGraphPtr &graph); | ||||
| void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | ||||
| Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | ||||
| @@ -123,6 +123,7 @@ struct OmgContext { | |||||
| bool need_multi_batch = false; | bool need_multi_batch = false; | ||||
| std::vector<NodePtr> data_nodes; | std::vector<NodePtr> data_nodes; | ||||
| std::vector<NodePtr> getnext_nosink_nodes; | std::vector<NodePtr> getnext_nosink_nodes; | ||||
| bool fuzz_compile_flag = false; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -278,6 +278,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/buffer_pool_memory_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/buffer_pool_memory_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/mark_node_unknown_shape_pass.cc" | |||||
| "${GE_CODE_DIR}/ge/model/ge_model.cc" | "${GE_CODE_DIR}/ge/model/ge_model.cc" | ||||
| "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" | "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | ||||
| @@ -708,6 +709,8 @@ set(PASS_TEST_FILES | |||||
| "graph/passes/transpose_transdata_pass_unittest.cc" | "graph/passes/transpose_transdata_pass_unittest.cc" | ||||
| "graph/passes/parallel_group_pass_unittest.cc" | "graph/passes/parallel_group_pass_unittest.cc" | ||||
| "graph/passes/buffer_pool_memory_pass_unittest.cc" | "graph/passes/buffer_pool_memory_pass_unittest.cc" | ||||
| "graph/passes/mark_node_unknown_shape_pass_unittest.cc" | |||||
| "graph/passes/reshape_recovery_pass_unittest.cc" | |||||
| ) | ) | ||||
| set(KERNEL_TEST_FILES | set(KERNEL_TEST_FILES | ||||
| @@ -799,6 +802,7 @@ set(SINGLE_OP_TEST_FILES | |||||
| "single_op/single_op_manager_unittest.cc" | "single_op/single_op_manager_unittest.cc" | ||||
| "single_op/stream_resource_unittest.cc" | "single_op/stream_resource_unittest.cc" | ||||
| "single_op/single_op_task_unittest.cc" | "single_op/single_op_task_unittest.cc" | ||||
| "single_op/single_op_unittest.cc" | |||||
| ) | ) | ||||
| set(PROFILING_MNG_TEST_FILES | set(PROFILING_MNG_TEST_FILES | ||||
| @@ -45,6 +45,15 @@ ComputeGraphPtr MakeGraph() { | |||||
| builder.AddDataEdge(data, 0, addn1, 0); | builder.AddDataEdge(data, 0, addn1, 0); | ||||
| return builder.GetGraph(); | return builder.GetGraph(); | ||||
| } | } | ||||
| static GeAttrValue::NamedAttrs CreateNamedAttrs(const string &name, std::map<string, GeAttrValue> map) { | |||||
| GeAttrValue::NamedAttrs named_attrs; | |||||
| named_attrs.SetName(name); | |||||
| for (auto it : map) { | |||||
| named_attrs.SetAttr(it.first, it.second); | |||||
| } | |||||
| return named_attrs; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| /* | /* | ||||
| @@ -85,25 +94,7 @@ TEST_F(UtestGeGenerator, test_build_single_op_online) { | |||||
| GeGenerator generator; | GeGenerator generator; | ||||
| generator.Initialize({}); | generator.Initialize({}); | ||||
| ModelBufferData model_buffer; | ModelBufferData model_buffer; | ||||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, model_buffer), FAILED); | |||||
| } | |||||
| TEST_F(UtestGeGenerator, test_singleop_fuzz_build) { | |||||
| GeTensorDesc tensor_desc; | |||||
| shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add"); | |||||
| op_desc->AddInputDesc(tensor_desc); | |||||
| op_desc->AddInputDesc(tensor_desc); | |||||
| op_desc->AddOutputDesc(tensor_desc); | |||||
| GeTensor tensor(tensor_desc); | |||||
| const vector<GeTensor> inputs = { tensor, tensor }; | |||||
| const vector<GeTensor> outputs = { tensor }; | |||||
| GeGenerator generator; | |||||
| generator.Initialize({}); | |||||
| ModelBufferData model_buffer; | |||||
| bool compile_flag = true; | |||||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, compile_flag, model_buffer), SUCCESS); | |||||
| EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, false, model_buffer), FAILED); | |||||
| } | } | ||||
| TEST_F(UtestGeGenerator, test_check_aicore) { | TEST_F(UtestGeGenerator, test_check_aicore) { | ||||
| @@ -0,0 +1,115 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <cstdint> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #define private public | |||||
| #include "graph/passes/mark_node_unknown_shape_pass.h" | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "inc/pass_manager.h" | |||||
| #include "graph/common/local_context.h" | |||||
| #undef private | |||||
| namespace ge { | |||||
| class UtestMarkNodeUnknownShapePass : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| public: | |||||
| NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { | |||||
| GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
| auto op_desc = std::make_shared<OpDesc>(name, type); | |||||
| for (auto i = 0; i < in_num; ++i) { | |||||
| op_desc->AddInputDesc(test_desc); | |||||
| } | |||||
| for (auto i = 0; i < out_num; ++i) { | |||||
| op_desc->AddOutputDesc(test_desc); | |||||
| } | |||||
| return graph->AddNode(op_desc); | |||||
| } | |||||
| /// netoutput1 | |||||
| /// | | |||||
| /// conv1 | |||||
| /// \ / | |||||
| /// data | |||||
| void make_graph(const ComputeGraphPtr &graph) { | |||||
| GetLocalOmgContext().fuzz_compile_flag = true; | |||||
| auto conv2d_node = MakeNode(graph, 2, 1, "conv1", "Conv2D"); | |||||
| { | |||||
| auto data1 = MakeNode(graph, 1, 1, "data", "Data"); | |||||
| GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||||
| data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); | |||||
| data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); | |||||
| GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); | |||||
| GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); | |||||
| } | |||||
| conv2d_node->GetOpDesc()->SetOpKernelLibName("AIcoreEngine"); | |||||
| AttrUtils::SetBool(conv2d_node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, true); | |||||
| auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); | |||||
| GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | |||||
| } | |||||
| }; | |||||
| TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_GE_kernel) { | |||||
| OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||||
| ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||||
| op_desc->SetOpKernelLibName("GE"); | |||||
| graph->AddNode(op_desc); | |||||
| PassManager pass; | |||||
| pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||||
| EXPECT_EQ(pass.Run(graph), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestMarkNodeUnknownShapePass, test_run_without_fuzz_attrs) { | |||||
| OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||||
| ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||||
| op_desc->SetOpKernelLibName("AIcoreEngine"); | |||||
| graph->AddNode(op_desc); | |||||
| GetLocalOmgContext().fuzz_compile_flag = true; | |||||
| PassManager pass; | |||||
| pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||||
| EXPECT_EQ(pass.Run(graph), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_fuzz_attrs) { | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||||
| make_graph(graph); | |||||
| PassManager pass; | |||||
| pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||||
| EXPECT_EQ(pass.Run(graph), SUCCESS); | |||||
| EXPECT_EQ(graph->GetAllNodes().size(), 3); | |||||
| for (const auto &node : graph->GetAllNodes()) { | |||||
| if (node->GetName() == "conv1") { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| EXPECT_NE(op_desc, nullptr); | |||||
| for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
| auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||||
| EXPECT_TRUE(input_desc->GetShape().GetDim(0) == -2); | |||||
| } | |||||
| for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||||
| EXPECT_NE(output_desc, nullptr); | |||||
| EXPECT_TRUE(output_desc->GetShape().GetDim(0) == -2); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,69 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph/passes/reshape_recovery_pass.h" | |||||
| #include <gtest/gtest.h> | |||||
| #include <set> | |||||
| #include <string> | |||||
| #include "graph_builder_utils.h" | |||||
| namespace ge { | |||||
| class UtestReshapeRecoveryPass : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| namespace { | |||||
| /// netoutput1 | |||||
| /// | \ | |||||
| ///transdata1 \ | |||||
| /// | \ | |||||
| /// | transdata2 | |||||
| /// | / | |||||
| /// var1 const1 | |||||
| ut::GraphBuilder Graph1Builder() { | |||||
| ut::GraphBuilder builder = ut::GraphBuilder("g2"); | |||||
| auto var1 = builder.AddNode("var1", "Variable", 0, 1, FORMAT_ND, DT_FLOAT, {-1}); | |||||
| auto const1 = builder.AddNode("const1", "Const", 0, 1, FORMAT_ND, DT_FLOAT, {1, 1, 224, 224}); | |||||
| auto transdata2 = builder.AddNode("transdata2", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224}); | |||||
| auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224}); | |||||
| auto netoutput1 = builder.AddNode("netoutput1", "Netoutput", 2, 0); | |||||
| builder.AddDataEdge(var1, 0, transdata1, 0); | |||||
| builder.AddDataEdge(const1, 0, transdata2, 0); | |||||
| builder.AddDataEdge(transdata2, 0, netoutput1, 1); | |||||
| builder.AddDataEdge(transdata1, 0, netoutput1, 0); | |||||
| return builder; | |||||
| } | |||||
| } // namespace | |||||
| TEST_F(UtestReshapeRecoveryPass, reshape_recovery_with_dynamic_shape) { | |||||
| auto builder = Graph1Builder(); | |||||
| auto graph = builder.GetGraph(); | |||||
| ReshapeRecoveryPass reshape_recovery_pass; | |||||
| EXPECT_EQ(graph->GetDirectNodesSize(),5); | |||||
| Status ret = reshape_recovery_pass.Run(graph); | |||||
| EXPECT_EQ(ret, SUCCESS); | |||||
| EXPECT_EQ(graph->GetDirectNodesSize(),8); | |||||
| auto reshape1 = graph->FindNode("Reshape_ReshapeRecoveryPass_0"); | |||||
| EXPECT_NE(reshape1, nullptr); | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,163 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <vector> | |||||
| #include "runtime/rt.h" | |||||
| #define protected public | |||||
| #define private public | |||||
| #include "single_op/single_op.h" | |||||
| #include "single_op/single_op_manager.h" | |||||
| #undef private | |||||
| #undef protected | |||||
| using namespace std; | |||||
| using namespace ge; | |||||
| class UtestSingleOp : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async) { | |||||
| uintptr_t resource_id = 0; | |||||
| std::mutex stream_mu; | |||||
| rtStream_t stream = nullptr; | |||||
| rtStreamCreate(&stream, 0); | |||||
| DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream); | |||||
| vector<int64_t> dims_vec_0 = {2}; | |||||
| vector<GeTensorDesc> input_desc; | |||||
| GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); | |||||
| // input data from device | |||||
| AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 0); | |||||
| input_desc.emplace_back(tensor_desc_0); | |||||
| vector<DataBuffer> input_buffers; | |||||
| ge::DataBuffer data_buffer; | |||||
| data_buffer.data = new char[4]; | |||||
| data_buffer.length = 4; | |||||
| input_buffers.emplace_back(data_buffer); | |||||
| vector<GeTensorDesc> output_desc; | |||||
| vector<DataBuffer> output_buffers; | |||||
| // UpdateRunInfo failed | |||||
| EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), ACL_ERROR_GE_PARAM_INVALID); | |||||
| } | |||||
| TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async1) { | |||||
| uintptr_t resource_id = 0; | |||||
| std::mutex stream_mu; | |||||
| rtStream_t stream = nullptr; | |||||
| rtStreamCreate(&stream, 0); | |||||
| DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream); | |||||
| dynamic_single_op.num_inputs_ = 1; | |||||
| vector<int64_t> dims_vec_0 = {2}; | |||||
| vector<GeTensorDesc> input_desc; | |||||
| GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); | |||||
| // input data from host | |||||
| AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 1); | |||||
| input_desc.emplace_back(tensor_desc_0); | |||||
| int64_t input_size = 0; | |||||
| EXPECT_EQ(TensorUtils::GetTensorMemorySizeInBytes(tensor_desc_0, input_size), SUCCESS); | |||||
| EXPECT_EQ(input_size, 64); | |||||
| EXPECT_NE(SingleOpManager::GetInstance().GetResource(resource_id, stream), nullptr); | |||||
| vector<DataBuffer> input_buffers; | |||||
| ge::DataBuffer data_buffer; | |||||
| data_buffer.data = new char[4]; | |||||
| data_buffer.length = 4; | |||||
| input_buffers.emplace_back(data_buffer); | |||||
| vector<GeTensorDesc> output_desc; | |||||
| vector<DataBuffer> output_buffers; | |||||
| auto *tbe_task = new (std::nothrow) TbeOpTask(); | |||||
| ge::OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||||
| ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||||
| ge::NodePtr node = graph->AddNode(op_desc); | |||||
| tbe_task->node_ = node; | |||||
| dynamic_single_op.op_task_.reset((OpTask *)(tbe_task)); | |||||
| OpDescPtr desc_ptr = MakeShared<OpDesc>("name1", "type1"); | |||||
| EXPECT_EQ(desc_ptr->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); | |||||
| dynamic_single_op.op_task_->op_desc_ = desc_ptr; | |||||
| // UpdateRunInfo failed | |||||
| EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), PARAM_INVALID); | |||||
| } | |||||
| TEST_F(UtestSingleOp, test_singleop_execute_async1) { | |||||
| StreamResource *res = new (std::nothrow) StreamResource(1); | |||||
| std::mutex stream_mu; | |||||
| rtStream_t stream = nullptr; | |||||
| rtStreamCreate(&stream, 0); | |||||
| SingleOp single_op(res, &stream_mu, stream); | |||||
| vector<DataBuffer> input_buffers; | |||||
| ge::DataBuffer data_buffer; | |||||
| data_buffer.data = new char[4]; | |||||
| data_buffer.length = 4; | |||||
| data_buffer.placement = 1; | |||||
| input_buffers.emplace_back(data_buffer); | |||||
| vector<DataBuffer> output_buffers; | |||||
| single_op.input_sizes_.emplace_back(4); | |||||
| SingleOpModelParam model_params; | |||||
| single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); | |||||
| single_op.args_.resize(1); | |||||
| EXPECT_EQ(single_op.hybrid_model_executor_, nullptr); | |||||
| EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | |||||
| EXPECT_EQ(single_op.tasks_.size(), 0); | |||||
| EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), SUCCESS); | |||||
| } | |||||
| TEST_F(UtestSingleOp, test_singleop_execute_async2) { | |||||
| StreamResource *res = new (std::nothrow) StreamResource(1); | |||||
| std::mutex stream_mu; | |||||
| rtStream_t stream = nullptr; | |||||
| rtStreamCreate(&stream, 0); | |||||
| SingleOp single_op(res, &stream_mu, stream); | |||||
| vector<DataBuffer> input_buffers; | |||||
| ge::DataBuffer data_buffer; | |||||
| data_buffer.data = new char[4]; | |||||
| data_buffer.length = 4; | |||||
| data_buffer.placement = 1; | |||||
| input_buffers.emplace_back(data_buffer); | |||||
| vector<DataBuffer> output_buffers; | |||||
| single_op.input_sizes_.emplace_back(4); | |||||
| SingleOpModelParam model_params; | |||||
| single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); | |||||
| single_op.args_.resize(1); | |||||
| GeTensorDesc tensor_desc(GeShape({1}), FORMAT_NHWC, DT_UINT64); | |||||
| single_op.inputs_desc_.emplace_back(tensor_desc); | |||||
| std::shared_ptr<ge::GeRootModel> root_model = ge::MakeShared<ge::GeRootModel>(); | |||||
| single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); | |||||
| single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), 0, stream)); | |||||
| EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | |||||
| EXPECT_EQ(single_op.tasks_.size(), 0); | |||||
| EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID); | |||||
| } | |||||