From 666a9179562a65cb503e29fd75feba417a791dd8 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 17:58:28 +0800 Subject: [PATCH 01/11] Aicpu check op type --- ge/graph/build/model_builder.cc | 44 +++++ ge/graph/build/model_builder.h | 2 + .../load/new_model_manager/davinci_model.cc | 2 + .../load/new_model_manager/model_manager.cc | 170 ++++++++++++++++++ .../load/new_model_manager/model_manager.h | 4 + ge/hybrid/model/hybrid_model_builder.cc | 26 +++ ge/hybrid/model/hybrid_model_builder.h | 1 + 7 files changed, 249 insertions(+) diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 37eb499a..3c36d4ee 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -581,9 +581,15 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { // Add TBE Kernels and custom aicpu op bin std::set tbe_name_set; std::set aicpu_name_set; + std::vector aicpu_optype_list; + std::vector aicpu_tf_optype_list; + std::set aicpu_optype_set; + std::set aicpu_tf_optype_set; for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); + // check aicpu op type + (void)CheckAicpuOp(node_op_desc, aicpu_optype_set, aicpu_tf_optype_set); TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); if (tbe_kernel == nullptr) { std::string kernel_name; @@ -605,6 +611,25 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { tbe_kernel_store_.AddTBEKernel(tbe_kernel); } + if (ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list)) { + GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size()); + aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end()); + } + + if (ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list)) { + GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size()); + aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end()); + } + + // reset list with set + aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end()); + aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end()); + GELOGI("Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, aicpu_tf_optype_list:%zu.", + compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail."); + + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail."); + for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); @@ -796,4 +821,23 @@ Status ModelBuilder::CompileSingleOp() { GE_TIMESTAMP_CALLNUM_END(BatchCompileOp, "GraphBuild::CompileOp"); return ge::SUCCESS; } + +Status ModelBuilder::CheckAicpuOp(const OpDescPtr &op_desc, std::set &cpue_check_set, std::set &tf_engine_set) { + GE_CHECK_NOTNULL(op_desc); + std::string aicpu_optype; + bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype); + std::vector tf_optypes; + bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes); + if (has_attr_check_cpu && !aicpu_optype.empty()) { + GELOGI("Check Aicpu op type %s, op name: %s.", op_desc->GetType().c_str(), op_desc->GetName().c_str()); + cpue_check_set.insert(aicpu_optype); + } + + if (has_attr_check_tf && !tf_optypes.empty()) { + GELOGI("Check Tf op type %s, op name: %s, tf_optypes size: %zu.", op_desc->GetType().c_str(), op_desc->GetName().c_str(), tf_optypes.size()); + tf_engine_set.insert(tf_optypes.begin(), tf_optypes.end()); + } + + return SUCCESS; +} } // namespace ge diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index e75521c7..8efd76e9 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -83,6 +83,8 @@ class ModelBuilder { Status CompileSingleOp(); + Status CheckAicpuOp(const OpDescPtr &op_desc, std::set &cpue_check_set, std::set &tf_engine_set); + uint64_t session_id_; map mem_type_to_mem_offset_; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index bc755e07..fceadc32 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -485,6 +485,8 @@ Status DavinciModel::DoTaskSink() { GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOp(ge_model_), "Check aicpu op type failed."); + GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index b595ac39..7a7afe5d 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -18,6 +18,7 @@ #include +#include "aicpu/aicpu_schedule/aicpu_op_type_list.h" #include "common/dump/dump_manager.h" #include "common/l2_cache_optimize.h" #include "common/profiling/profiling_manager.h" @@ -30,6 +31,7 @@ #include "graph/load/new_model_manager/davinci_model_parser.h" #include "model/ge_root_model.h" #include "graph/common/local_context.h" +#include "graph/utils/attr_utils.h" #include "common/formats/utils/formats_trans_utils.h" #include "hybrid/hybrid_davinci_model.h" @@ -1529,4 +1531,172 @@ Status ModelManager::EnableExceptionDump(const std::map &options return SUCCESS; } +Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, std::vector &aicpu_tf_optype_list) { + std::string kernel_name = "checkOpType"; + GELOGI("LaunchKernelCheckAicpuOpType in, kernel name %s", kernel_name.c_str()); + std::lock_guard lock(cust_aicpu_mutex_); + std::vector req_aicpu_op_info_list; + std::vector res_aicpu_op_info_list; + std::vector res_ret_code_list; + + if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) { + GELOGI("No need to check aicpu optype."); + return SUCCESS; + } + + vector allocated_mem; + rtError_t status; + rtStream_t stream = nullptr; + void *args = nullptr; + + void *d_req_op_list = nullptr; + void *d_res_op_list = nullptr; + void *d_ret_code_list = nullptr; + + size_t aicpu_op_nums = aicpu_optype_list.size(); + size_t tf_op_nums = aicpu_tf_optype_list.size(); + size_t op_nums = aicpu_op_nums + tf_op_nums; + // malloc sysOpInfoList in SysOpCheckInfo + status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_req_op_list); + + // malloc sysOpInfoList in SysOpCheckResp + status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_res_op_list); + + // malloc returnCodeList in SysOpCheckResp + status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_ret_code_list); + + for (const auto &op_type : aicpu_optype_list) { + SysOpInfo op_info; + // malloc op_type name in SysOpInfo + void *d_op_type_name = nullptr; + status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_op_type_name); + GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE)); + op_info.opType = reinterpret_cast(reinterpret_cast(d_op_type_name)); + op_info.opLen = op_type.length(); + op_info.kernelsType = CPU_KERNEL; + req_aicpu_op_info_list.emplace_back(op_info); + //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo), sizeof(SysOpInfo), &op_info, sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE)); + } + + for (const auto &op_type : aicpu_tf_optype_list) { + SysOpInfo op_info; + // malloc op_type name in SysOpInfo + void *d_op_type_name = nullptr; + status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_op_type_name); + GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE)); + op_info.opType = reinterpret_cast(reinterpret_cast(d_op_type_name)); + op_info.opLen = op_type.size(); + op_info.kernelsType = TF_KERNEL; + req_aicpu_op_info_list.emplace_back(op_info); + //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo), sizeof(SysOpInfo), &op_info, sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE)); + } + + GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(), sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE)); + + SysOpCheckInfo op_check_info_req; + SysOpCheckResp op_check_info_res; + op_check_info_req.opListNum = op_nums; + op_check_info_req.offSetLen = sizeof(SysOpCheckInfo); + op_check_info_req.sysOpInfoList = reinterpret_cast(reinterpret_cast(d_req_op_list)); + + op_check_info_res.opListNum = op_nums; + op_check_info_res.returnCodeList = reinterpret_cast(reinterpret_cast(d_ret_code_list)); + op_check_info_res.sysOpInfoList = reinterpret_cast(reinterpret_cast(d_res_op_list)); + + uint32_t args_size = sizeof(SysOpCheckInfo) + sizeof(SysOpCheckResp); + status = rtMalloc(&args, args_size, RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(args); + GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(reinterpret_cast(reinterpret_cast(reinterpret_cast(args)) + op_check_info_req.offSetLen), sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); + + GE_CHK_RT(rtStreamCreate(&stream, 0)); + GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); + + status = rtStreamSynchronize(stream); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + + // Check the response + void *d_op_check_info_res = reinterpret_cast(reinterpret_cast(reinterpret_cast(args)) + op_check_info_req.offSetLen); + op_check_info_res = {}; + GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST)); + std::function callback = [&]() { + for (auto mem : allocated_mem) { + GE_CHK_RT(rtFree(mem)); + } + GE_CHK_RT(rtStreamDestroy(stream)); + }; + + uint64_t res_op_nums = op_check_info_res.opListNum; + if (res_op_nums != 0) { + GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, reinterpret_cast(reinterpret_cast(op_check_info_res.returnCodeList)), sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, reinterpret_cast(reinterpret_cast(op_check_info_res.sysOpInfoList)), sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); + if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { + GELOGE(FAILED, "Number of retcode is not equal to number of op type."); + GE_MAKE_GUARD(release, callback); + } + std::string fail_reason = "Check aicpu op_type failed. details: "; + for (uint32_t i = 0; i < res_op_nums; i++) { + ReturnCode ret_code = res_ret_code_list.at(i); + SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); + std::vector op_name; + GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); + std::string kernel_type = (static_cast(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; + string op_name_str(op_name.data()); + fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " " + to_string(static_cast(ret_code)) + "<0: op_type, 1: format, 2: datatype> not support."; + } + GELOGE(FAILED, "%s", fail_reason.c_str()); + GE_MAKE_GUARD(release, callback); + return FAILED; + } + + GE_MAKE_GUARD(release, callback); + GELOGI("Cpu kernel launch check optype task success."); + return SUCCESS; +} + +Status ModelManager::CheckAicpuOp(GeModelPtr ge_model) { + std::vector aicpu_optype_list; + std::vector aicpu_tf_optype_list; + bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list); + bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list); + if (!aicpu_need_check && !tf_need_check) { + GELOGI("No need to check aicpu optype."); + return SUCCESS; + } + GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed."); + return SUCCESS; +} + } // namespace ge diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index fc98d9c2..4463b3ef 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -295,6 +295,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status LaunchKernelCustAicpuSo(const string &kernel_name); + ge::Status LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, std::vector &aicpu_tf_optype_list); + + ge::Status CheckAicpuOp(GeModelPtr ge_model); + ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); ge::Status GenSessionId(uint64_t &session_id); diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index d519c35b..e1c35008 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -21,6 +21,7 @@ #include "graph/build/memory/var_mem_assign_util.h" #include "graph/debug/ge_attr_define.h" #include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/new_model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" @@ -921,6 +922,7 @@ Status HybridModelBuilder::InitWeights() { } Status HybridModelBuilder::LoadTasks() { + GE_CHK_STATUS_RET(CheckAicpuOp(), "Check Aicpu op failed."); for (auto &it : hybrid_model_.node_items_) { auto &node_item = it.second; auto &node_ptr = node_item->node; @@ -1557,5 +1559,29 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item, return SUCCESS; } + +Status HybridModelBuilder::CheckAicpuOp() { + std::vector aicpu_optype_list; + std::vector aicpu_tf_optype_list; + std::set aicpu_optype_set; + std::set aicpu_tf_optype_set; + const auto &root_graph = ge_root_model_->GetRootGraph(); + for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { + auto &name = it.first; + auto &ge_model = it.second; + GE_CHECK_NOTNULL(ge_model); + if (ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list)) { + aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end()); + } + + if (ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list)) { + aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end()); + } + } + // reset list with set + aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end()); + aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end()); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed."); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index b90ec982..a549a9f1 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -78,6 +78,7 @@ class HybridModelBuilder { Status ParseVarOutputs(NodeItem &node_item); Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item); Status RecoverGraphUnknownFlag(); + Status CheckAicpuOp(); const char* GetGraphName() const { return hybrid_model_.model_name_.c_str(); From b1c70681ec2711f6990fe134ac67187c716bf690 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 18:54:04 +0800 Subject: [PATCH 02/11] Update some diff from yellow zone --- ge/graph/build/model_builder.cc | 53 ++++++++++--------- ge/graph/build/model_builder.h | 4 +- .../load/new_model_manager/davinci_model.cc | 2 +- .../load/new_model_manager/model_manager.cc | 31 ++++++++--- .../load/new_model_manager/model_manager.h | 2 +- 5 files changed, 57 insertions(+), 35 deletions(-) diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 3c36d4ee..1eb758c3 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -581,15 +581,11 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { // Add TBE Kernels and custom aicpu op bin std::set tbe_name_set; std::set aicpu_name_set; - std::vector aicpu_optype_list; - std::vector aicpu_tf_optype_list; - std::set aicpu_optype_set; - std::set aicpu_tf_optype_set; for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); // check aicpu op type - (void)CheckAicpuOp(node_op_desc, aicpu_optype_set, aicpu_tf_optype_set); + (void)CollectCheckAicpuAttr(node_op_desc, aicpu_optype_set, aicpu_tf_optype_set); TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); if (tbe_kernel == nullptr) { std::string kernel_name; @@ -611,24 +607,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { tbe_kernel_store_.AddTBEKernel(tbe_kernel); } - if (ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list)) { - GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size()); - aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end()); - } - - if (ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list)) { - GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size()); - aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end()); - } - - // reset list with set - aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end()); - aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end()); - GELOGI("Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, aicpu_tf_optype_list:%zu.", - compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size()); - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail."); - - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail."); + (void)SetModelAicpuCheckAttr(model); for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = n->GetOpDesc(); @@ -822,7 +801,7 @@ Status ModelBuilder::CompileSingleOp() { return ge::SUCCESS; } -Status ModelBuilder::CheckAicpuOp(const OpDescPtr &op_desc, std::set &cpue_check_set, std::set &tf_engine_set) { +Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &cpue_check_set, std::set &tf_engine_set) { GE_CHECK_NOTNULL(op_desc); std::string aicpu_optype; bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype); @@ -840,4 +819,30 @@ Status ModelBuilder::CheckAicpuOp(const OpDescPtr &op_desc, std::set aicpu_optype_list; + std::vector aicpu_tf_optype_list; + std::set aicpu_optype_set; + std::set aicpu_tf_optype_set; + if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) { + GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size()); + aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end()); + } + + if (ge::AttrUtils::GetListStr(&model, "needCheckTf", aicpu_tf_optype_list)) { + GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size()); + aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end()); + } + + // reset list with set + aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end()); + aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end()); + GELOGI("Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, aicpu_tf_optype_list:%zu.", + compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail."); + + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail."); + return SUCCESS; +} } // namespace ge diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index 8efd76e9..e4dfbf0f 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -83,7 +83,9 @@ class ModelBuilder { Status CompileSingleOp(); - Status CheckAicpuOp(const OpDescPtr &op_desc, std::set &cpue_check_set, std::set &tf_engine_set); + Status CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &cpue_check_set, std::set &tf_engine_set); + + Status SetModelCheckAicpuAttr(ge::Model &model); uint64_t session_id_; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index fceadc32..d1ea833a 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -485,7 +485,7 @@ Status DavinciModel::DoTaskSink() { GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOp(ge_model_), "Check aicpu op type failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 7a7afe5d..5973b030 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -54,6 +54,7 @@ const char *const kDeleteCustOp = "deleteCustOp"; const int kTimeSpecNano = 1000000000; const int kTimeSpecMiro = 1000000; const int kSessionMaxBias = 100; +const int kOpNameMaxSize = 100; struct CustAicpuSoBuf { uint64_t kernelSoBuf; uint32_t kernelSoBufLen; @@ -1540,7 +1541,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op std::vector res_ret_code_list; if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) { - GELOGI("No need to check aicpu optype."); + GELOGI("No need to check aicpu op type."); return SUCCESS; } @@ -1624,7 +1625,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op op_check_info_req.offSetLen = sizeof(SysOpCheckInfo); op_check_info_req.sysOpInfoList = reinterpret_cast(reinterpret_cast(d_req_op_list)); - op_check_info_res.opListNum = op_nums; + op_check_info_res.opListNum = 0; op_check_info_res.returnCodeList = reinterpret_cast(reinterpret_cast(d_ret_code_list)); op_check_info_res.sysOpInfoList = reinterpret_cast(reinterpret_cast(d_res_op_list)); @@ -1636,7 +1637,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op } allocated_mem.push_back(args); GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT(rtMemcpy(reinterpret_cast(reinterpret_cast(reinterpret_cast(args)) + op_check_info_req.offSetLen), sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(reinterpret_cast(reinterpret_cast(args) + op_check_info_req.offSetLen), sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT(rtStreamCreate(&stream, 0)); GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); @@ -1648,8 +1649,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op } // Check the response - void *d_op_check_info_res = reinterpret_cast(reinterpret_cast(reinterpret_cast(args)) + op_check_info_req.offSetLen); - op_check_info_res = {}; + void *d_op_check_info_res = reinterpret_cast(reinterpret_cast(args) + op_check_info_req.offSetLen); GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST)); std::function callback = [&]() { for (auto mem : allocated_mem) { @@ -1658,25 +1658,40 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op GE_CHK_RT(rtStreamDestroy(stream)); }; + if (op_check_info_res.isWithoutJson) { + GELOGI("No need to check aicpu in this scenoria."); + GE_MAKE_GUARD(release, callback); + return SUCCESS; + } uint64_t res_op_nums = op_check_info_res.opListNum; + GELOGI("Check aicpu type, is without json: %d, res op num: %lu.", op_check_info_res.isWithoutJson, res_op_nums); if (res_op_nums != 0) { + res_ret_code_list.clear(); + res_ret_code_list.resize(res_op_nums); + res_aicpu_op_info_list.clear(); + res_aicpu_op_info_list.resize(res_op_nums); GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, reinterpret_cast(reinterpret_cast(op_check_info_res.returnCodeList)), sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, reinterpret_cast(reinterpret_cast(op_check_info_res.sysOpInfoList)), sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { GELOGE(FAILED, "Number of retcode is not equal to number of op type."); GE_MAKE_GUARD(release, callback); + return FAILED; } - std::string fail_reason = "Check aicpu op_type failed. details: "; + std::string fail_reason; for (uint32_t i = 0; i < res_op_nums; i++) { ReturnCode ret_code = res_ret_code_list.at(i); SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); + GELOGI("Not surpport aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code); std::vector op_name; + op_name.clear(); + op_name.resize(kOpNameMaxSize); GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); std::string kernel_type = (static_cast(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; string op_name_str(op_name.data()); - fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " " + to_string(static_cast(ret_code)) + "<0: op_type, 1: format, 2: datatype> not support."; + fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " ret code:" + to_string(static_cast(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n"; } - GELOGE(FAILED, "%s", fail_reason.c_str()); + fail_reason += "not support."; + GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); GE_MAKE_GUARD(release, callback); return FAILED; } diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 4463b3ef..f1f404b5 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -297,7 +297,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, std::vector &aicpu_tf_optype_list); - ge::Status CheckAicpuOp(GeModelPtr ge_model); + ge::Status CheckAicpuOpList(GeModelPtr ge_model); ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); From 72e7927e3d817fea5b345281e466a67baa4f0915 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 19:03:15 +0800 Subject: [PATCH 03/11] update some code --- ge/graph/load/new_model_manager/model_manager.cc | 4 ++-- ge/hybrid/model/hybrid_model_builder.cc | 10 +++++----- ge/hybrid/model/hybrid_model_builder.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 5973b030..7d776eb7 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1681,14 +1681,14 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op for (uint32_t i = 0; i < res_op_nums; i++) { ReturnCode ret_code = res_ret_code_list.at(i); SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); - GELOGI("Not surpport aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code); + GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code); std::vector op_name; op_name.clear(); op_name.resize(kOpNameMaxSize); GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); std::string kernel_type = (static_cast(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; string op_name_str(op_name.data()); - fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " ret code:" + to_string(static_cast(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n"; + fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " ret code:" + std::to_string(static_cast(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n"; } fail_reason += "not support."; GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index e1c35008..3ffb2dc6 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -922,7 +922,7 @@ Status HybridModelBuilder::InitWeights() { } Status HybridModelBuilder::LoadTasks() { - GE_CHK_STATUS_RET(CheckAicpuOp(), "Check Aicpu op failed."); + GE_CHK_STATUS_RET(CheckAicpuOpList(), "Check Aicpu op failed."); for (auto &it : hybrid_model_.node_items_) { auto &node_item = it.second; auto &node_ptr = node_item->node; @@ -1560,21 +1560,20 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item, return SUCCESS; } -Status HybridModelBuilder::CheckAicpuOp() { +Status HybridModelBuilder::CheckAicpuOpList() { std::vector aicpu_optype_list; std::vector aicpu_tf_optype_list; std::set aicpu_optype_set; std::set aicpu_tf_optype_set; - const auto &root_graph = ge_root_model_->GetRootGraph(); for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { auto &name = it.first; auto &ge_model = it.second; GE_CHECK_NOTNULL(ge_model); - if (ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list)) { + if (ge::AttrUtils::GetListStr(*ge_model, "needCheckCpu", aicpu_optype_list)) { aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end()); } - if (ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list)) { + if (ge::AttrUtils::GetListStr(*ge_model, "needCheckTf", aicpu_tf_optype_list)) { aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end()); } } @@ -1582,6 +1581,7 @@ Status HybridModelBuilder::CheckAicpuOp() { aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end()); aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end()); GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed."); + return SUCCESS; } } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index a549a9f1..bb349d86 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -78,7 +78,7 @@ class HybridModelBuilder { Status ParseVarOutputs(NodeItem &node_item); Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item); Status RecoverGraphUnknownFlag(); - Status CheckAicpuOp(); + Status CheckAicpuOpList(); const char* GetGraphName() const { return hybrid_model_.model_name_.c_str(); From ee03aa5dec0711d13ce68bb7fb9473b4e05fbb1d Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 19:41:36 +0800 Subject: [PATCH 04/11] update aicpu model builder --- ge/graph/build/model_builder.cc | 8 ++++---- ge/graph/build/model_builder.h | 2 +- ge/graph/load/new_model_manager/model_manager.cc | 2 +- ge/hybrid/model/hybrid_model_builder.cc | 1 - 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 1eb758c3..b68b5f60 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -581,6 +581,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { // Add TBE Kernels and custom aicpu op bin std::set tbe_name_set; std::set aicpu_name_set; + std::set aicpu_optype_set; + std::set aicpu_tf_optype_set; for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); @@ -607,7 +609,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { tbe_kernel_store_.AddTBEKernel(tbe_kernel); } - (void)SetModelAicpuCheckAttr(model); + (void)SetModelCheckAicpuAttr(model); for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = n->GetOpDesc(); @@ -820,11 +822,9 @@ Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &aicpu_optype_set, std::set &aicpu_tf_optype_set) { std::vector aicpu_optype_list; std::vector aicpu_tf_optype_list; - std::set aicpu_optype_set; - std::set aicpu_tf_optype_set; if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) { GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size()); aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end()); diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index e4dfbf0f..b133b98b 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -85,7 +85,7 @@ class ModelBuilder { Status CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &cpue_check_set, std::set &tf_engine_set); - Status SetModelCheckAicpuAttr(ge::Model &model); + Status SetModelCheckAicpuAttr(ge::Model &model, std::set &aicpu_optype_set, std::set &aicpu_tf_optype_set); uint64_t session_id_; diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 7d776eb7..fc0ebb8a 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1701,7 +1701,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op return SUCCESS; } -Status ModelManager::CheckAicpuOp(GeModelPtr ge_model) { +Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) { std::vector aicpu_optype_list; std::vector aicpu_tf_optype_list; bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list); diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 3ffb2dc6..e0d0913e 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -1566,7 +1566,6 @@ Status HybridModelBuilder::CheckAicpuOpList() { std::set aicpu_optype_set; std::set aicpu_tf_optype_set; for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { - auto &name = it.first; auto &ge_model = it.second; GE_CHECK_NOTNULL(ge_model); if (ge::AttrUtils::GetListStr(*ge_model, "needCheckCpu", aicpu_optype_list)) { From a77f2c39bf4f00484826e45eb1f7d9a8e5987c52 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 19:47:25 +0800 Subject: [PATCH 05/11] add info log to check requs --- ge/graph/load/new_model_manager/model_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index fc0ebb8a..30deb00b 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1616,7 +1616,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op req_aicpu_op_info_list.emplace_back(op_info); //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo), sizeof(SysOpInfo), &op_info, sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE)); } - + GELOGI("Check aicpu op all attr size: %zu, real attr size: %zu.", op_nums, req_aicpu_op_info_list.size()); GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(), sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE)); SysOpCheckInfo op_check_info_req; From 80795addb1155350fee77a89eb06d7abe99b3e8f Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sat, 12 Dec 2020 20:04:19 +0800 Subject: [PATCH 06/11] update parameter --- ge/graph/build/model_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index b68b5f60..b451b897 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -609,7 +609,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { tbe_kernel_store_.AddTBEKernel(tbe_kernel); } - (void)SetModelCheckAicpuAttr(model); + (void)SetModelCheckAicpuAttr(model, aicpu_optype_set, aicpu_tf_optype_set); for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = n->GetOpDesc(); From aecc1c9b79a0c97a6cd87496791bbe6d378b7d10 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sun, 13 Dec 2020 15:10:29 +0800 Subject: [PATCH 07/11] update print log --- ge/graph/load/new_model_manager/model_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 30deb00b..a6c70a78 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1707,7 +1707,7 @@ Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) { bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list); bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list); if (!aicpu_need_check && !tf_need_check) { - GELOGI("No need to check aicpu optype."); + GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str()); return SUCCESS; } GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed."); From 78e31e9856b21a997f578ccff70f741aea2f0ddd Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Sun, 13 Dec 2020 16:31:54 +0800 Subject: [PATCH 08/11] update clang-formate --- ge/graph/build/model_builder.cc | 28 +++++++---- .../load/new_model_manager/model_manager.cc | 49 ++++++++++++------- ge/hybrid/model/hybrid_model_builder.cc | 3 +- 3 files changed, 51 insertions(+), 29 deletions(-) diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index b451b897..d639433e 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -803,26 +803,29 @@ Status ModelBuilder::CompileSingleOp() { return ge::SUCCESS; } -Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &cpue_check_set, std::set &tf_engine_set) { +Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &cpue_check_set, + std::set &tf_engine_set) { GE_CHECK_NOTNULL(op_desc); std::string aicpu_optype; bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype); std::vector tf_optypes; - bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes); + bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes); if (has_attr_check_cpu && !aicpu_optype.empty()) { GELOGI("Check Aicpu op type %s, op name: %s.", op_desc->GetType().c_str(), op_desc->GetName().c_str()); cpue_check_set.insert(aicpu_optype); } if (has_attr_check_tf && !tf_optypes.empty()) { - GELOGI("Check Tf op type %s, op name: %s, tf_optypes size: %zu.", op_desc->GetType().c_str(), op_desc->GetName().c_str(), tf_optypes.size()); + GELOGI("Check Tf op type %s, op name: %s, tf_optypes size: %zu.", op_desc->GetType().c_str(), + op_desc->GetName().c_str(), tf_optypes.size()); tf_engine_set.insert(tf_optypes.begin(), tf_optypes.end()); } - + return SUCCESS; } -Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set &aicpu_optype_set, std::set &aicpu_tf_optype_set) { +Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set &aicpu_optype_set, + std::set &aicpu_tf_optype_set) { std::vector aicpu_optype_list; std::vector aicpu_tf_optype_list; if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) { @@ -838,11 +841,16 @@ Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::setGetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size()); - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail."); - - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail."); + GELOGI( + "Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, " + "aicpu_tf_optype_list:%zu.", + compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), + aicpu_tf_optype_list.size()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return FAILED, + "Set attr needCheckCpu fail."); + + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return FAILED, + "Set attr needCheckTf fail."); return SUCCESS; } } // namespace ge diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index a6c70a78..99b47878 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1532,7 +1532,8 @@ Status ModelManager::EnableExceptionDump(const std::map &options return SUCCESS; } -Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, std::vector &aicpu_tf_optype_list) { +Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, + std::vector &aicpu_tf_optype_list) { std::string kernel_name = "checkOpType"; GELOGI("LaunchKernelCheckAicpuOpType in, kernel name %s", kernel_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); @@ -1556,7 +1557,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op size_t aicpu_op_nums = aicpu_optype_list.size(); size_t tf_op_nums = aicpu_tf_optype_list.size(); - size_t op_nums = aicpu_op_nums + tf_op_nums; + size_t op_nums = aicpu_op_nums + tf_op_nums; // malloc sysOpInfoList in SysOpCheckInfo status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { @@ -1591,12 +1592,11 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); - GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE)); op_info.opType = reinterpret_cast(reinterpret_cast(d_op_type_name)); op_info.opLen = op_type.length(); op_info.kernelsType = CPU_KERNEL; req_aicpu_op_info_list.emplace_back(op_info); - //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo), sizeof(SysOpInfo), &op_info, sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE)); } for (const auto &op_type : aicpu_tf_optype_list) { @@ -1609,15 +1609,15 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); - GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE)); op_info.opType = reinterpret_cast(reinterpret_cast(d_op_type_name)); op_info.opLen = op_type.size(); op_info.kernelsType = TF_KERNEL; req_aicpu_op_info_list.emplace_back(op_info); - //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo), sizeof(SysOpInfo), &op_info, sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE)); } GELOGI("Check aicpu op all attr size: %zu, real attr size: %zu.", op_nums, req_aicpu_op_info_list.size()); - GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(), sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(), + sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE)); SysOpCheckInfo op_check_info_req; SysOpCheckResp op_check_info_res; @@ -1636,8 +1636,10 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(args); - GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT(rtMemcpy(reinterpret_cast(reinterpret_cast(args) + op_check_info_req.offSetLen), sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT( + rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(reinterpret_cast(reinterpret_cast(args) + op_check_info_req.offSetLen), + sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT(rtStreamCreate(&stream, 0)); GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); @@ -1650,7 +1652,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op // Check the response void *d_op_check_info_res = reinterpret_cast(reinterpret_cast(args) + op_check_info_req.offSetLen); - GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), + RT_MEMCPY_DEVICE_TO_HOST)); std::function callback = [&]() { for (auto mem : allocated_mem) { GE_CHK_RT(rtFree(mem)); @@ -1670,8 +1673,12 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op res_ret_code_list.resize(res_op_nums); res_aicpu_op_info_list.clear(); res_aicpu_op_info_list.resize(res_op_nums); - GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, reinterpret_cast(reinterpret_cast(op_check_info_res.returnCodeList)), sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); - GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, reinterpret_cast(reinterpret_cast(op_check_info_res.sysOpInfoList)), sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, + reinterpret_cast(reinterpret_cast(op_check_info_res.returnCodeList)), + sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, + reinterpret_cast(reinterpret_cast(op_check_info_res.sysOpInfoList)), + sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { GELOGE(FAILED, "Number of retcode is not equal to number of op type."); GE_MAKE_GUARD(release, callback); @@ -1681,14 +1688,19 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op for (uint32_t i = 0; i < res_op_nums; i++) { ReturnCode ret_code = res_ret_code_list.at(i); SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); - GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code); + GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, + aicpu_info.kernelsType, aicpu_info.opLen, ret_code); std::vector op_name; op_name.clear(); op_name.resize(kOpNameMaxSize); - GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); - std::string kernel_type = (static_cast(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; + GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast(aicpu_info.opType), + aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); + std::string kernel_type = + (static_cast(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; string op_name_str(op_name.data()); - fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " ret code:" + std::to_string(static_cast(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n"; + fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + + " ret code:" + std::to_string(static_cast(ret_code)) + + "<0: op_type, 1: format, 2: datatype> \n"; } fail_reason += "not support."; GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); @@ -1706,11 +1718,12 @@ Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) { std::vector aicpu_tf_optype_list; bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list); bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list); - if (!aicpu_need_check && !tf_need_check) { + if (!aicpu_need_check && !tf_need_check) { GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str()); return SUCCESS; } - GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed."); + GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), + "Launch check aicpu op type failed."); return SUCCESS; } diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index e0d0913e..b9b8e6d0 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -1579,7 +1579,8 @@ Status HybridModelBuilder::CheckAicpuOpList() { // reset list with set aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end()); aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end()); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), + "Launch check aicpu op type failed."); return SUCCESS; } } // namespace hybrid From 0adb4ffc4b95940bce8076eb457130092e6d5b7e Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Mon, 14 Dec 2020 15:25:11 +0800 Subject: [PATCH 09/11] update codex warnings --- CMakeLists.txt | 4 ++-- ge/common/profiling/profiling_manager.cc | 3 ++- ge/common/profiling/profiling_manager.h | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 86d0184b..7416a130 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,7 +115,7 @@ if (ENABLE_OPEN_SRC) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) endif() elseif(PLATFORM STREQUAL "all") - find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR}) + find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) @@ -123,7 +123,7 @@ if (ENABLE_OPEN_SRC) find_module(resource libresource.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR}) + find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 456cb0a4..0df6773a 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -802,7 +802,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP if (!fp_point_.empty() && !bp_point_.empty()) { fp_point = fp_point_; bp_point = bp_point_; - GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str()); + GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", + bp_point.c_str(), fp_point.c_str()); return; } // ProfApi mode and training trace is set diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 5fa4fac4..87822fb0 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -80,7 +80,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { bool ProfilingTrainingTraceOn() const { return is_training_trace_; } bool ProfilingModelLoadOn() const { return is_load_profiling_; } bool ProfilingModelExecuteOn() const; - bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env + // is_execute_profiling_ only used by ge option and env + bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } void ReportProfilingData(uint32_t model_id, const std::vector &task_desc_info, const std::vector &compute_graph_desc_info); void ProfilingTaskDescInfo(uint32_t model_id, const std::vector &task_desc_info, From 66ab5d7dae49fa45901e822d6baa963fa9194436 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Mon, 14 Dec 2020 16:24:22 +0800 Subject: [PATCH 10/11] update new line --- ge/graph/build/model_builder.h | 6 ++++-- ge/graph/load/new_model_manager/model_manager.h | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index b133b98b..715c16de 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -83,9 +83,11 @@ class ModelBuilder { Status CompileSingleOp(); - Status CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &cpue_check_set, std::set &tf_engine_set); + Status CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &cpue_check_set, + std::set &tf_engine_set); - Status SetModelCheckAicpuAttr(ge::Model &model, std::set &aicpu_optype_set, std::set &aicpu_tf_optype_set); + Status SetModelCheckAicpuAttr(ge::Model &model, std::set &aicpu_optype_set, + std::set &aicpu_tf_optype_set); uint64_t session_id_; diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index f1f404b5..dc685519 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -295,7 +295,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status LaunchKernelCustAicpuSo(const string &kernel_name); - ge::Status LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, std::vector &aicpu_tf_optype_list); + ge::Status LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, + std::vector &aicpu_tf_optype_list); ge::Status CheckAicpuOpList(GeModelPtr ge_model); From e929e6de314b575037d990ed900f803567809b3b Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Mon, 14 Dec 2020 20:04:43 +0800 Subject: [PATCH 11/11] update thirdparty includes --- CMakeLists.txt | 1 + .../aicpu/aicpu_schedule/aicpu_op_type_list.h | 60 ++++ .../inc/aicpu/common/aicpu_task_struct.h | 4 +- third_party/fwkacllib/inc/cce/aicpu_engine.h | 16 +- .../fwkacllib/inc/cce/aicpu_engine_struct.h | 8 +- .../fwkacllib/inc/cce/fwk_adpt_struct.h | 17 +- third_party/fwkacllib/inc/hccl/base.h | 30 +- third_party/fwkacllib/inc/hccl/hcom.h | 214 +++++------ .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h | 58 +-- .../fwkacllib/inc/mmpa/sub_inc/mmpa_win.h | 6 + third_party/fwkacllib/inc/runtime/base.h | 332 ++---------------- third_party/fwkacllib/inc/runtime/config.h | 23 +- third_party/fwkacllib/inc/runtime/dev.h | 4 +- third_party/fwkacllib/inc/runtime/rt.h | 2 +- third_party/fwkacllib/inc/tdt/status.h | 7 + third_party/fwkacllib/inc/tdt/tsd_client.h | 28 +- .../fwkacllib/inc/toolchain/prof_callback.h | 135 +++++++ .../fwkacllib/inc/toolchain/prof_reporter.h | 26 +- third_party/fwkacllib/inc/toolchain/slog.h | 25 ++ 19 files changed, 504 insertions(+), 492 deletions(-) create mode 100644 third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h create mode 100644 third_party/fwkacllib/inc/toolchain/prof_callback.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 7416a130..3df71320 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,6 +105,7 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) + find_module(msprofiler_fwk libmsprofiler.a ${ASCEND_ACL_DIR}) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") elseif(PRODUCT STREQUAL "flr1") diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h new file mode 100644 index 00000000..7e0f94a8 --- /dev/null +++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h @@ -0,0 +1,60 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_OP_TYPE_LIST_H_ +#define AICPU_OP_TYPE_LIST_H_ + +enum OpKernelType { + TF_KERNEL, + CPU_KERNEL +}; + +enum ReturnCode { + OP_TYPE_NOT_SUPPORT, + FORMAT_NOT_SUPPORT, + DTYPE_NOT_SUPPORT +}; + +#pragma pack(push, 1) +//One byte alignment +struct SysOpInfo { + uint64_t opLen; + uint64_t opType; + OpKernelType kernelsType; +}; + +struct OpParamInfo { + uint64_t num; + uint64_t dtypeList; + uint64_t formatList; +}; + +struct SysOpCheckInfo { + uint64_t opListNum; + uint64_t offSetLen; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; + +struct SysOpCheckResp { + uint64_t opListNum; + bool isWithoutJson; + uint64_t returnCodeList; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; +#pragma pack(pop) +#endif // AICPU_OP_TYPE_LIST_H_ diff --git a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h index c3672663..72e21f6f 100644 --- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h +++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h @@ -21,13 +21,15 @@ namespace aicpu { +#pragma pack(push, 1) struct AicpuParamHead { uint32_t length; // Total length: include cunstom message uint32_t ioAddrNum; // Input and output address number uint32_t extInfoLength; // extInfo struct Length uint64_t extInfoAddr; // extInfo address -} __attribute__ ((packed)); +}; +#pragma pack(pop) } // namespace aicpu diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h index 740f1200..b83731a8 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h @@ -13,10 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef AICPU_ENGINE_H__ #define AICPU_ENGINE_H__ +#include + #ifdef __cplusplus extern "C" { #endif @@ -36,12 +37,23 @@ typedef enum { /** * @ingroup aicpu engine * @brief aeCallInterface: - * a interface to call a function in a op kernfel lib + * a interface to call a function in a op kernfel lib * @param [in] addr void *, should be STR_KERNEL * format * @return aeStatus_t */ aeStatus_t aeCallInterface(void *addr); +/** + * @ingroup aicpu engine + * @brief aeBatchLoadKernelSo: + * a interface to load kernel so + * @param [in] loadSoNum load so number + * @param [in] soPaths load so paths + * @param [in] soNames load so names + * @return aeStatus_t + */ +aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]); + #ifdef __cplusplus } #endif diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h index a5f43be9..8c0c1847 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h @@ -33,18 +33,22 @@ typedef enum { FMK_KERNEL_TYPE_RESERVED } FwkkernelType_t; +#pragma pack(push, 1) typedef struct { uint32_t fwkKernelType; // FwkkernelType_t union { ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel; } fwkKernelBase; -} __attribute__((packed)) STR_FWK_OP_KERNEL; +} STR_FWK_OP_KERNEL; +#pragma pack(pop) +#pragma pack(push, 1) struct SessionInfo { uint64_t sessionId; uint64_t kernelId; bool sessFlag; -} __attribute__((packed)); +}; +#pragma pack(pop) #ifdef __cplusplus } diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 79d94023..50b39d91 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType { FWK_ADPT_UPDATE_INPUT_OUTPUT }; +#pragma pack(push, 1) // API Parameter Structure struct StrFWKKernel { FWKOperateType opType; @@ -89,31 +90,39 @@ struct StrFWKKernel { uint64_t extInfoLen; // extend info total length uint64_t extInfoAddr; // extend info addr, ExtInfo structure -} __attribute__((packed)); +}; +#pragma pack(pop) typedef StrFWKKernel FWKOperateParam; // Extent info ShapeAndType const uint32_t kMaxShapeDims = 8; +#pragma pack(push, 1) struct ShapeAndType { int32_t type; int64_t dims[kMaxShapeDims]; -} __attribute__((packed)); +}; +#pragma pack(pop) // Extend info structure for extInfoAddr const uint32_t kExtInfoHeadSize = 8; + +#pragma pack(push, 1) struct ExtInfo { int32_t infoType; // extend type uint32_t infoLen; // length for infoMsg char infoMsg[0]; // extend value -} __attribute__((packed)); +}; +#pragma pack(pop) +#pragma pack(push, 1) struct ResultSummary { uint64_t shape_data_ptr; // shape data addr, need convert to void* uint64_t shape_data_size; // num of dims uint64_t raw_data_ptr; // raw data addr, need convert to void* uint64_t raw_data_size; // size of raw data -} __attribute__((packed)); +}; +#pragma pack(pop) } // end namespace FWKAdapter } // namespace aicpu diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index 8194097e..9facd20c 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -22,7 +22,8 @@ #ifndef HCCL_BASE_H_ #define HCCL_BASE_H_ - +#include +#include #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -95,6 +96,33 @@ typedef void *rtStream_t; */ typedef void *rtModel_t; +struct HcomOperation { + std::string hcclType; + void *inputPtr; + void *outputPtr; + u64 count; + HcclDataType dataType; + HcclReduceOp opType; + u32 root; + + HcomOperation() + { + inputPtr = nullptr; + outputPtr = nullptr; + count = 0; + dataType = HCCL_DATA_TYPE_RESERVED; + opType = HCCL_REDUCE_RESERVED; + root = 0; + } +}; + +struct HcomRemoteAccessAddrInfo { + u32 remotetRankID; + u64 remoteAddr; // host embedding table address + u64 localAddr; // device HBM address + u64 length; // Memory Length in Bytes +}; + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index 90b96ac7..e491d43f 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -24,145 +24,96 @@ #include #include +#include +#include #ifdef __cplusplus extern "C" { #endif // __cplusplus -/** - * @brief Initialize HCOM. - * - * @param rank_table A string identifying the rank table file path, include file name. - * @param identify A string identifying the identify for the rank. - * @return HcclResult - * @see hcom_destroy() - */ -extern HcclResult hcom_init(const char *rank_table, const char *identify); -/** - * @brief Destroy HCOM - * - * @return HcclResult - * @see hcom_init() - */ -extern HcclResult hcom_destroy(void); - -/** - * @brief Bind the model. - * - * @param model A pointer identifying the model information. - * @param stream A pointer identifying the stream information. - * @return HcclResult - * @see hcom_unbind_model() - */ -extern HcclResult hcom_bind_model(rtModel_t model, rtStream_t stream); /** - * @brief Unbind the model. + * @brief Get the rank number in the group. * - * @param model An pointer identifying the model information. - * @return HcclResult - * @see hcom_unbind_model() + * @param group A string identifying the group name. + * @param rankSize A pointer identifying the rank number. + * @return HcclResult */ -extern HcclResult hcom_unbind_model(rtModel_t model); +HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); /** - * @brief All-gather operator. + * @brief Get the rank number in the group. * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param inputCount An integer(u64) identifying the number of the input data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param rankSize A pointer identifying the rank number. * @return HcclResult */ -extern HcclResult hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount, - HcclDataType dataType, const char *group, rtStream_t stream); +HcclResult HcomGetRankSize(const char *group, u32 *rankSize); /** - * @brief All-reduce operator. + * @brief Get the rank number of this rank's server within the group. * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the output data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param localRankSize A pointer identifying the rank number. * @return HcclResult */ -extern HcclResult hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count, - HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream); +HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); /** - * @brief Broadcast operator. + * @brief Get the rank number of this rank's server within the group. * - * @param tag A string identifying the tag of the operator. - * @param ptr A pointer identifying the data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param root An integer(u32) identifying the the root rank in the operator. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param localRankSize A pointer identifying the rank number. * @return HcclResult */ -extern HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType dataType, u32 root, - const char *group, rtStream_t stream); +HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); /** - * @brief Reduce-scatter operator. + * @brief Get the rank id of this rank. * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param rankId A pointer identifying the rank id. * @return HcclResult */ -extern HcclResult hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count, - HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream); +HcclResult hcom_get_rank_id(const char *group, u32 *rankId); /** - * @brief Get the rank number in the group. + * @brief Get the rank id of this rank. * * @param group A string identifying the group name. - * @param rankSize A pointer identifying the rank number. + * @param rankId A pointer identifying the rank id. * @return HcclResult */ -HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); +HcclResult HcomGetRankId(const char *group, u32 *rankId); /** - * @brief Get the rank number of this rank's server within the group. + * @brief Get the local rank id of this rank's server within the group. * * @param group A string identifying the group name. - * @param localRankSize A pointer identifying the rank number. + * @param localRankId A pointer identifying the local rank id. * @return HcclResult */ -HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); +HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); /** - * @brief Get the rank id of this rank. + * @brief Get the local rank id of this rank's server within the group. * * @param group A string identifying the group name. - * @param rankId A pointer identifying the rank id. + * @param localRankId A pointer identifying the local rank id. * @return HcclResult */ -HcclResult hcom_get_rank_id(const char *group, u32 *rankId); +HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); /** - * @brief Get the local rank id of this rank's server within the group. + * @brief Get the world rank id according to the group rank id. * * @param group A string identifying the group name. - * @param localRankId A pointer identifying the local rank id. + * @param groupRank An integer(u32) identifying the group rank id. + * @param worldRank A pointer identifying the world rank id. * @return HcclResult */ -HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); +HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); /** * @brief Get the world rank id according to the group rank id. @@ -172,7 +123,7 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); * @param worldRank A pointer identifying the world rank id. * @return HcclResult */ -HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); +HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); /** * @brief Get the group rank id according to the world rank id. @@ -184,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, */ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); +/** + * @brief Get the group rank id according to the world rank id. + * + * @param worldRank An integer(u32) identifying the world rank id. + * @param group A string identifying the group name. + * @param groupRank A pointer identifying the group rank id. + * @return HcclResult + */ +HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); + /** * @brief Create group. * @@ -195,60 +156,40 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); /** - * @brief Destroy group + * @brief Create group. * * @param group A string identifying the group name. + * @param rankNum An integer(u32) identifying the number of ranks in the group. + * @param rankIds A list identifying the ranks in the group. * @return HcclResult */ -HcclResult hcom_destroy_group(const char *group); +HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); /** - * @brief Send operator. + * @brief Destroy group * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param destRank An integer identifying the destination rank. - * @param srTag An integer identifying the send/recv message tag. - * The message will be send by the receive operator with the same "sr_tag". - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. * @return HcclResult */ -HcclResult hcom_send(const char *tag, void *inputPtr, u64 count, HcclDataType dataType, - u32 destRank, u32 srTag, const char *group, rtStream_t stream); +HcclResult hcom_destroy_group(const char *group); /** - * @brief Receive operator. + * @brief Destroy group * - * @param tag A string identifying the tag of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param srcRank An integer identifying the source rank. - * @param srTag An integer identifying the send/recv message tag. - * The message will be send by the send operator with the same "sr_tag". - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. * @return HcclResult */ -HcclResult hcom_receive(const char *tag, void *outputPtr, u64 count, HcclDataType dataType, - u32 srcRank, u32 srTag, const char *group, rtStream_t stream); +HcclResult HcomDestroyGroup(const char *group); /** - * @brief Get the gradient split strategy with in the group. + * @brief Set the gradient split strategy with in the group, according to gradient index. * * @param group A string identifying the group name. - * @param feature A pointer identifying the feature of the model. - * @param maxSegmentNum An integer(u32) identifying the max segments of gradients. - * @param segmentNum A pointer identifying the segments number of gradients. - * @param segmentIdx A list identifying the index of end gradient in each segment. - * @return HcclResult + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param IdxList A list identifying the index of end gradient in each segment. + * @return HcclResult */ -HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum, - u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE, - OriginalGraphShapeType shapeType = KNOWN_SHAPE); +extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); /** * @brief Set the gradient split strategy with in the group, according to gradient index. @@ -258,7 +199,7 @@ HcclResult hcom_get_split_strategy(const char *group, const struct model_feature * @param IdxList A list identifying the index of end gradient in each segment. * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); /** * @brief Set the gradient split strategy with in the group, according to gradient data size. @@ -270,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen */ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); +/** + * @brief Set the gradient split strategy with in the group, according to gradient data size. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param sizeList A list identifying the percent of each segment. + * @return HcclResult + */ +extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); + /** * @brief Register memories and init resources for remote access. * @@ -279,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment */ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); +/** + * @brief Register memories and init resources for remote access. + * + * @param addrList memory addresses for remote access. + * @param count number of remote memory addresses. + * @return HcclResult + */ +extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); + +HcclResult HcomExecInitialize(); + +HcclResult HcomExecFinalize(); + +HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); + +HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, + const std::vector& addrInfos, + std::function callback); + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index ea51f497..ad48f70b 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -215,6 +215,10 @@ typedef struct { #define S_IWRITE S_IWUSR #endif +#define mm_no_argument no_argument +#define mm_required_argument required_argument +#define mm_optional_argument optional_argument + #define M_FILE_RDONLY O_RDONLY #define M_FILE_WRONLY O_WRONLY #define M_FILE_RDWR O_RDWR @@ -227,6 +231,7 @@ typedef struct { #define M_BINARY O_RDONLY #define M_TRUNC O_TRUNC #define M_IRWXU S_IRWXU +#define M_APPEND O_APPEND #define M_IN_CREATE IN_CREATE #define M_IN_CLOSE_WRITE IN_CLOSE_WRITE @@ -342,17 +347,17 @@ MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd); MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd, - VOID *sendMsg, - INT32 sendLen, - UINT32 sendFlag, - const mmSockAddr* addr, - INT32 tolen); + VOID *sendMsg, + INT32 sendLen, + UINT32 sendFlag, + const mmSockAddr* addr, + INT32 tolen); MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, - VOID *recvBuf, - mmSize recvLen, - UINT32 recvFlag, - mmSockAddr* addr, - mmSocklen_t *FromLen); + VOID *recvBuf, + mmSize recvLen, + UINT32 recvFlag, + mmSockAddr* addr, + mmSocklen_t *FromLen); MMPA_FUNC_VISIBILITY INT32 mmSAStartup(); MMPA_FUNC_VISIBILITY INT32 mmSACleanup(); MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode); @@ -360,7 +365,10 @@ MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info); MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName); MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle); MMPA_FUNC_VISIBILITY CHAR *mmDlerror(); -MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period); +MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, + mmUserBlock_t *timerBlock, + UINT milliSecond, + UINT period); MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle); MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); @@ -408,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); // Poll related interface MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); -MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, - pmmPollData polledData, mmPollBack pollBack); +MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, + INT32 fdCount, + INT32 timeout, + mmCompletionHandle handleIOCP, + pmmPollData polledData, + mmPollBack pollBack); MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode(); MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); @@ -454,8 +466,11 @@ MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt); MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg(); MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg); MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); -MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts, - INT32 *longIndex); +MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, + char *const *argv, + const char *opts, + const mmStructOption *longOpts, + INT32 *longIndex); MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag); MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length); @@ -521,11 +536,14 @@ MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count); MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count); MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); -MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile, - mmProcess *id); - -MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, - const mmThreadAttr *threadAttr); +MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, + const mmArgvEnv *env, + const char *stdoutRedirectFile, + mmProcess *id); + +MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, + const mmUserBlock_t *funcBlock, + const mmThreadAttr *threadAttr); MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index 5db6bbf8..cecdd4a7 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -237,6 +237,11 @@ typedef struct { } mmThreadAttr; typedef VOID (*mmPf)(VOID); + +#define mm_no_argument 0 +#define mm_required_argument 1 +#define mm_optional_argument 2 + #define M_FILE_RDONLY GENERIC_READ #define M_FILE_WRONLY GENERIC_WRITE #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE) @@ -249,6 +254,7 @@ typedef VOID (*mmPf)(VOID); #define M_CREAT _O_CREAT #define M_BINARY _O_BINARY #define M_TRUNC _O_TRUNC +#define M_APPEND _O_APPEND #define M_IREAD _S_IREAD #define M_IRUSR _S_IREAD diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 4e735438..b9b2cbe5 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -18,6 +18,7 @@ #define __CCE_RUNTIME_BASE_H__ #include +#include "toolchain/prof_callback.h" #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { @@ -32,309 +33,8 @@ extern "C" { #endif #endif -/** - * @ingroup dvrt_base - * @brief runtime error numbers. - */ -typedef enum tagRtError { - RT_ERROR_NONE = 0x0, // success - - RT_ERROR_DEVICE_BASE = 0x07010000, - RT_ERROR_DEVICE_NULL, - RT_ERROR_DEVICE_NEW, - RT_ERROR_DEVICE_ID, - RT_ERROR_DEVICE_CHIPTYPE, - RT_ERROR_DEVICE_DEPLOY, - RT_ERROR_DEVICE_RETAIN, - RT_ERROR_DEVICE_PLATFORM, - RT_ERROR_DEVICE_LOADER, - RT_ERROR_DEVICE_LIMIT, - RT_ERROR_DEVICE_PROC_HANG_OUT, - RT_ERROR_DEVICE_POWER_UP_FAIL, - RT_ERROR_DEVICE_POWER_DOWN_FAIL, - RT_ERROR_DEVICE_INVALID, - - RT_ERROR_DRV_BASE = 0x07020000, - RT_ERROR_DRV_NULL, - RT_ERROR_DRV_NEW, - RT_ERROR_DRV_MEMORY, - RT_ERROR_DRV_INPUT, - RT_ERROR_DRV_PTRNULL, - RT_ERROR_DRV_OPEN_AICPU, - RT_ERROR_DRV_CLOSE_AICPU, - RT_ERROR_DRV_SYM_AICPU, - RT_ERROR_DRV_OPEN_TSD, - RT_ERROR_DRV_CLOSE_TSD, - RT_ERROR_DRV_SYM_TSD, - RT_ERROR_DRV_SOURCE, - RT_ERROR_DRV_REPORT, - RT_ERROR_DRV_COMMAND, - RT_ERROR_DRV_OCCUPY, - RT_ERROR_DRV_ERR, - - RT_ERROR_STREAM_BASE = 0x07030000, - RT_ERROR_STREAM_NULL, - RT_ERROR_STREAM_NEW, - RT_ERROR_STREAM_CONTEXT, - RT_ERROR_STREAM_INVALID, - RT_ERROR_STREAM_MODEL, - RT_ERROR_STREAM_FUSION, - RT_ERROR_STREAM_FULL, - RT_ERROR_STREAM_EMPTY, - RT_ERROR_STREAM_NOT_COMPLETE, - RT_ERROR_STREAM_SYNC, - RT_ERROR_STREAM_NO_CB_REG, - RT_ERROR_STREAM_DUPLICATE, - RT_ERROR_STREAM_NOT_EXIST, - RT_ERROR_SQ_NO_EXIST_SQ_TO_REUSE, - RT_ERROR_SQID_FULL, - - RT_ERROR_MODEL_BASE = 0x07040000, - RT_ERROR_MODEL_NULL, - RT_ERROR_MODEL_NEW, - RT_ERROR_MODEL_CONTEXT, - RT_ERROR_MODEL_ENDGRAPH, - RT_ERROR_MODEL_STREAM, - RT_ERROR_MODEL_EXCUTOR, - RT_ERROR_MODEL_SETUP, - RT_ERROR_MODEL_ID, - RT_ERROR_MODEL_EXE_FAILED, - RT_ERROR_END_OF_SEQUENCE, // end of sequence - RT_ERROR_MODEL_EXIT, - RT_ERROR_MODEL_EXIT_STREAM_UNBIND, - RT_ERROR_MODEL_EXIT_ID, - RT_ERROR_MODEL_ABORT_NORMAL, - - RT_ERROR_EVENT_BASE = 0x07050000, - RT_ERROR_EVENT_NULL, - RT_ERROR_EVENT_NEW, - RT_ERROR_EVENT_RECORDER_NULL, - RT_ERROR_EVENT_TIMESTAMP_INVALID, - RT_ERROR_EVENT_TIMESTAMP_REVERSAL, - RT_ERROR_EVENT_NOT_COMPLETE, - - RT_ERROR_NOTIFY_BASE = 0x07060000, - RT_ERROR_NOTIFY_NULL, - RT_ERROR_NOTIFY_NEW, - RT_ERROR_NOTIFY_TYPE, - RT_ERROR_NOTIFY_NOT_COMPLETE, - - RT_ERROR_CONTEXT_BASE = 0x07070000, - RT_ERROR_CONTEXT_NULL, - RT_ERROR_CONTEXT_NEW, - RT_ERROR_CONTEXT_DEL, - RT_ERROR_CONTEXT_DEFAULT_STREAM_NULL, - RT_ERROR_CONTEXT_ONLINE_STREAM_NULL, - - RT_ERROR_KERNEL_BASE = 0x07080000, - RT_ERROR_KERNEL_NULL, - RT_ERROR_KERNEL_NEW, - RT_ERROR_KERNEL_LOOKUP, - RT_ERROR_KERNEL_NAME, - RT_ERROR_KERNEL_TYPE, - RT_ERROR_KERNEL_OFFSET, - RT_ERROR_KERNEL_DUPLICATE, - RT_ERROR_KERNEL_UNREGISTERING, - - RT_ERROR_PROGRAM_BASE = 0x07090000, - RT_ERROR_PROGRAM_NULL, - RT_ERROR_PROGRAM_NEW, - RT_ERROR_PROGRAM_DATA, - RT_ERROR_PROGRAM_SIZE, - RT_ERROR_PROGRAM_MEM_TYPE, - RT_ERROR_PROGRAM_MACHINE_TYPE, - RT_ERROR_PROGRAM_USEOUT, - - RT_ERROR_MODULE_BASE = 0x070a0000, - RT_ERROR_MODULE_NULL, - RT_ERROR_MODULE_NEW, - - RT_ERROR_INSTANCE_BASE = 0x070b0000, - RT_ERROR_INSTANCE_NULL, - RT_ERROR_INSTANCE_NEW, - RT_ERROR_INSTANCE_VERSION, - - RT_ERROR_API_BASE = 0x070c0000, - RT_ERROR_API_NULL, - RT_ERROR_API_NEW, - - RT_ERROR_DATADUMP_BASE = 0x070d0000, - RT_ERROR_DATADUMP_NULL, - RT_ERROR_DATADUMP_NEW, - RT_ERROR_DATADUMP_TIME, - RT_ERROR_DATADUMP_FILE, - RT_ERROR_DATADUMP_ADDRESS, - RT_ERROR_DATADUMP_LOAD_FAILED, - RT_ERROR_DUMP_ADDR_SET_FAILED, - - RT_ERROR_PROF_BASE = 0x070e0000, - RT_ERROR_PROF_NULL, - RT_ERROR_PROF_NEW, - RT_ERROR_PROF_START, - RT_ERROR_PROF_DEVICE_MEM, - RT_ERROR_PROF_HOST_MEM, - RT_ERROR_PROF_SET_DIR, - RT_ERROR_PROF_OPER, - RT_ERROR_PROF_FULL, - RT_ERROR_PROF_NAME, - - RT_ERROR_PCTRACE_BASE = 0x070f0000, - RT_ERROR_PCTRACE_NULL, - RT_ERROR_PCTRACE_NEW, - RT_ERROR_PCTRACE_TIME, - RT_ERROR_PCTRACE_FILE, - - RT_ERROR_TASK_BASE = 0x07100000, - RT_ERROR_TASK_NULL, - RT_ERROR_TASK_NEW, - RT_ERROR_TASK_TYPE, - RT_ERROR_TASK_ALLOCATOR, - - RT_ERROR_COMMON_BASE = 0x07110000, - RT_ERROR_INVALID_VALUE, // RT_ERROR_INPUT_INVALID - RT_ERROR_MEMORY_ADDRESS_UNALIGNED, - RT_ERROR_SEC_HANDLE, - RT_ERROR_OS_HANDLE, - RT_ERROR_MUTEX_LOCK, - RT_ERROR_MUTEX_UNLOCK, - RT_ERROR_CALLOC, - RT_ERROR_POOL_RESOURCE, - RT_ERROR_TRANS_ARGS, - RT_ERROR_METADATA, - RT_ERROR_LOST_HEARTBEAT, - RT_ERROR_REPORT_TIMEOUT, - RT_ERROR_FEATURE_NOT_SUPPROT, - RT_ERROR_MEMORY_ALLOCATION, - RT_ERROR_MEMORY_FREE, - RT_ERROR_INVALID_MEMORY_TYPE, - - RT_ERROR_DEBUG_BASE = 0x07120000, - RT_ERROR_DEBUG_NULL, - RT_ERROR_DEBUG_NEW, - RT_ERROR_DEBUG_SIGNAL, - RT_ERROR_DEBUG_OPEN, - RT_ERROR_DEBUG_WRITE, - RT_ERROR_DEBUG_REGISTER_FAILED, - RT_ERROR_DEBUG_UNREGISTER_FAILED, - - RT_ERROR_ENGINE_BASE = 0x07130000, - RT_ERROR_ENGINE_NULL, - RT_ERROR_ENGINE_NEW, - RT_ERROR_ENGINE_THREAD, - - RT_ERROR_LABEL_BASE = 0x07140000, - RT_ERROR_LABEL_NULL, - RT_ERROR_LABEL_NEW, - RT_ERROR_LABEL_CONTEXT, - RT_ERROR_LABEL_STREAM, - RT_ERROR_LABEL_MODEL, - RT_ERROR_LABEL_ALLOCATOR, - RT_ERROR_LABEL_FREE, - RT_ERROR_LABEL_SET, - RT_ERROR_LABEL_ID, - - RT_ERROR_TSFW_BASE = 0x07150000, - RT_ERROR_TSFW_UNKNOWN, - RT_ERROR_TSFW_NULL_PTR, - RT_ERROR_TSFW_ILLEGAL_AI_CORE_ID, - RT_ERROR_TSFW_ILLEGAL_PARAM, - RT_ERROR_TSFW_TASK_CMD_QUEUE_FULL, - RT_ERROR_TSFW_TASK_CMD_QUEUE_EMPTY, - RT_ERROR_TSFW_TASK_REPORT_QUEUE_FULL, - RT_ERROR_TSFW_TASK_REPORT_QUEUE_EMPTY, - RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_OCCUPYED, - RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_FREED, - RT_ERROR_TSFW_L2_MEM_INSUFFICIENT_SPACE, - RT_ERROR_TSFW_L2_MALLOC_FAILED, - RT_ERROR_TSFW_DMA_CHANNEL_ALL_OCCUPYED, - RT_ERROR_TSFW_MEMCPY_OP_FAILED, - RT_ERROR_TSFW_BS_SLOT_ALL_OCCUPYED, - RT_ERROR_TSFW_TBS_SLOT_REPEAT_FREE, - RT_ERROR_TSFW_PRIORITY_TASK_LIST_FULL, - RT_ERROR_TSFW_PRIORITY_TASK_LIST_EMPTY, - RT_ERROR_TSFW_NO_STREAM_LIST_NEED_TO_BE_PROCESSED, - RT_ERROR_TSFW_REPEAT_MARK_STREAM_NEED_SERVICE, - RT_ERROR_TSFW_SYS_DMA_CHANNEL_ALL_OCCUPAPYED, - RT_ERROR_TSFW_NO_HBML2TASKNODE_FOUND, - RT_ERROR_TSFW_SQNODE_NODE_SLOT_ALL_OCCUPAPYED, - RT_ERROR_TSFW_CQNODE_NODE_SLOT_ALL_OCCUPAPYED, - RT_ERROR_TSFW_SQNODE_NOT_ENOUGH, - RT_ERROR_TSFW_SQNODE_SLOT_REPEAT_FREE, - RT_ERROR_TSFW_CQNODE_SLOT_REPEAT_FREE, - RT_ERROR_TSFW_CQ_REPORT_FAILED, - RT_ERROR_TSFW_SYS_DMA_RESET_SUCCESS, - RT_ERROR_TSFW_SYS_DMA_RESET_FAILED, - RT_ERROR_TSFW_SYS_DMA_TRNSFER_FAILED, - RT_ERROR_TSFW_SYS_DMA_MEMADDRALIGN_FAILED, - RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_FULL, - RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_EMPTY, - RT_ERROR_TSFW_TIMER_EVENT_FULL, - RT_ERROR_TSFW_TASK_L2_DESC_ENTRY_NOT_ENOUGH, - RT_ERROR_TSFW_AICORE_TIMEOUT, - RT_ERROR_TSFW_AICORE_EXCEPTION, - RT_ERROR_TSFW_AICORE_TRAP_EXCEPTION, - RT_ERROR_TSFW_AICPU_TIMEOUT, - RT_ERROR_TSFW_SDMA_L2_TO_DDR_MALLOC_FAIL, - RT_ERROR_TSFW_AICPU_EXCEPTION, - RT_ERROR_TSFW_AICPU_DATADUMP_RSP_ERR, - RT_ERROR_TSFW_AICPU_MODEL_RSP_ERR, - RT_ERROR_TSFW_REPEAT_ACTIVE_MODEL_STREAM, - RT_ERROR_TSFW_REPEAT_NOTIFY_WAIT, - RT_ERROR_TSFW_DEBUG_INVALID_SQCQ, - RT_ERROR_TSFW_DEBUG_WRONG_COMMAND_TYPE, - RT_ERROR_TSFW_DEBUG_CMD_PROCESS, - RT_ERROR_TSFW_DEBUG_INVALID_DEVICE_STATUS, - RT_ERROR_TSFW_DEBUG_NOT_IN_DEBUG_STATUS, - RT_ERROR_TSFW_DEBUG_INVALID_TASK_STATUS, - RT_ERROR_TSFW_DEBUG_TASK_EMPTY, - RT_ERROR_TSFW_DEBUG_TASK_FULL, - RT_ERROR_TSFW_DEBUG_TASK_NOT_EXIST, - RT_ERROR_TSFW_DEBUG_AI_CORE_FULL, - RT_ERROR_TSFW_DEBUG_AI_CORE_NOT_EXIST, - RT_ERROR_TSFW_DEBUG_AI_CORE_EXCEPTION, - RT_ERROR_TSFW_DEBUG_AI_CORE_TIMEOUT, - RT_ERROR_TSFW_DEBUG_BREAKPOINT_FULL, - RT_ERROR_TSFW_DEBUG_READ_ERROR, - RT_ERROR_TSFW_DEBUG_WRITE_FAIL, - RT_ERROR_TSFW_QUEUE_FULL, - RT_ERROR_TSFW_QUEUE_EMPTY, - RT_ERROR_TSFW_QUEUE_ALLOC_MEM_FAIL, - RT_ERROR_TSFW_QUEUE_DATA_SIZE_UNMATCH, - RT_ERROR_TSFW_PCIE_DMA_INVLD_CPY_TYPE, - RT_ERROR_TSFW_INVLD_CPY_DIR, - RT_ERROR_TSFW_PCIE_DMA_INVLD_CQ_DES, - RT_ERROR_TSFW_PCIE_DMA_CPY_ERR, - RT_ERROR_TSFW_PCIE_DMA_LNK_CHN_BUSY, - RT_ERROR_TSFW_PROFILE_BUFF_FULL, - RT_ERROR_TSFW_PROFILE_MODE_CONFLICT, - RT_ERROR_TSFW_PROFILE_OTHER_PID_ON, - RT_ERROR_TSFW_SCHD_AIC_TASK_PRELOAD_FAILED, - RT_ERROR_TSFW_TSCPU_CLOSE_FAILED, - RT_ERROR_TSFW_EXPECT_FAIL, - RT_ERROR_TSFW_REPEAT_MODEL_STREAM, - RT_ERROR_TSFW_STREAM_MODEL_UNBIND, - RT_ERROR_TSFW_MODEL_EXE_FAILED, - RT_ERROR_TSFW_IPC_SEND_FAILED, - RT_ERROR_TSFW_IPC_PROC_REG_FAILED, - RT_ERROR_TSFW_STREAM_FULL, - RT_ERROR_TSFW_END_OF_SEQUENCE, - RT_ERROR_TSFW_SWITCH_STREAM_LABEL, - RT_ERROR_TSFW_TRANS_SQE_FAIL, - RT_ERROR_TSFW_RESERVED, - - RT_ERROR_SUBSCRIBE_BASE = 0x07160000, - RT_ERROR_SUBSCRIBE_NULL, - RT_ERROR_SUBSCRIBE_NEW, - RT_ERROR_SUBSCRIBE_STREAM, - RT_ERROR_SUBSCRIBE_THREAD, - RT_ERROR_SUBSCRIBE_GROUP, - - RT_ERROR_GROUP_BASE = 0x07170000, - RT_ERROR_GROUP_NOT_SET, - RT_ERROR_GROUP_NOT_CREATE, - - RT_ERROR_RESERVED = 0x07ff0000, - }rtError_t; +typedef int32_t rtError_t; +static const int32_t RT_ERROR_NONE = 0; // success /** * @ingroup dvrt_base @@ -387,10 +87,20 @@ typedef struct rtExceptionInfo { uint32_t deviceid; } rtExceptionInfo; +typedef struct rtTaskFailInfo { + uint32_t taskid; + uint32_t streamid; + uint32_t tid; + uint32_t deviceid; + uint32_t retcode; +} rtTaskFailInfo; + typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); +typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo); + typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); /** @@ -447,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* */ RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); +/** + * @ingroup profiling_base + * @brief ts set profiling reporter callback. + */ +RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback); + /** * @ingroup dvrt_base * @brief Returns the last error from a runtime call. @@ -485,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback); */ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback); +/** + * @ingroup dvrt_base + * @brief register callback for fail task + * @param [in] uniName unique register name, can't be null + * @param [in] callback fail task callback function + * @param [out] NA + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback); + /** * @ingroup dvrt_base * @brief notify handle. diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index f1a70eaa..12a407d7 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig { typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; -/** - * @ingroup - * @brief get platform - * @param [in] platForm - * @return platForm - */ -RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm); - /** * @ingroup * @brief get AI core count @@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate */ RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); -/** - * @ingroup - * @brief set platform in gen ctx - * @param [in] platForm - * @return RT_ERROR_NONE for ok, errno for failed - */ -RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); /** * @ingroup @@ -185,6 +170,14 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); */ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); +/** + * @ingroup + * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020. + * @param [out] runtimeVersion + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index b378e3b0..d1a91a9b 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -330,12 +330,12 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3 FEATURE_TYPE_MEMCPY = 0, FEATURE_TYPE_RSV, } rtFeatureType_t; - * @param [in] infoType info type + * @param [in] featureInfo info type typedef enum tagMemcpyInfo { MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, MEMCPY_INFO _RSV, } rtMemcpyInfo_t; - * @param [out] value the capability info + * @param [out] value the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT * @return RT_ERROR_NONE for ok */ RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h index d3d5956f..83cafa3c 100644 --- a/third_party/fwkacllib/inc/runtime/rt.h +++ b/third_party/fwkacllib/inc/runtime/rt.h @@ -28,4 +28,4 @@ #include "rt_model.h" #include "stream.h" -#endif // __CCE_RUNTIME_RT_H__ \ No newline at end of file +#endif // __CCE_RUNTIME_RT_H__ diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h index d30564b8..d5050f35 100644 --- a/third_party/fwkacllib/inc/tdt/status.h +++ b/third_party/fwkacllib/inc/tdt/status.h @@ -34,9 +34,16 @@ using TDT_StatusT = uint32_t; typedef uint32_t TDT_StatusT; #endif +#define LINUX 0 +#define WINDOWS 1 + #ifndef TDT_LIB_EXPORT +#if(TARGET_SYSTEM_NAME == WINDOWS) +#define TDT_LIB_EXPORT __declspec(dllexport) +#else #define TDT_LIB_EXPORT __attribute__((visibility("default"))) #endif +#endif /** * @ingroup tdt status. * diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h index 6066a12e..665c8b82 100644 --- a/third_party/fwkacllib/inc/tdt/tsd_client.h +++ b/third_party/fwkacllib/inc/tdt/tsd_client.h @@ -23,6 +23,7 @@ #include #include "tdt/status.h" #include "tdt/data_common.h" +#include "toolchain/prof_callback.h" #ifdef __cplusplus extern "C" { @@ -37,7 +38,7 @@ extern "C" { * Used for the Framework process to communicate with the TSDDaemon process, * and notify TSD to complete the initialization of other processes * -* @param phyDeviceId [IN] type #unsigned int. Physical device ID +* @param logicDeviceId [IN] type #unsigned int. Logic device ID * @param rankSize [IN] type #unsigned int. The rankSize of the training. * The default value is 1. When rankSize is greater than 1, * HCCP will be pulled to perform set communication related operations. @@ -49,7 +50,7 @@ extern "C" { * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize); +TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); /** * @ingroup Close @@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); +TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); /** * @ingroup UpdateProfilingMode @@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag); +TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); + +/** +* @ingroup TsdSetMsprofReporterCallback +* @brief 用于推理场景下设置aicpu的profilng的callback函数 +* +* @par Function +* 设置offline模式下aicpu_sd进程的profiling的callback函数 +* +* @param callback [IN] type #MsprofReporterCallback. 回调函数 +* @retval TDT_OK Success +* @retval OtherValues Failure +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tsd_client.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'TDT_StatusT' defined +* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined +*/ +TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback); /** * @ingroup CreateCmdParameterObj diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h new file mode 100644 index 00000000..3fad74bc --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -0,0 +1,135 @@ +/** + * Copyright 2020-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @file prof_callback.h + * @brief declaraion of profiling callbacks + */ + +#ifndef MSPROFILER_PROF_CALLBACK_H_ +#define MSPROFILER_PROF_CALLBACK_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + + +#include "stddef.h" +#include "stdint.h" + +/** + * @name MsprofErrorCode + * @brief error code + */ +enum MsprofErrorCode { + MSPROF_ERROR_NONE = 0, + MSPROF_ERROR_MEM_NOT_ENOUGH, + MSPROF_ERROR_GET_ENV, + MSPROF_ERROR_CONFIG_INVALID, + MSPROF_ERROR_ACL_JSON_OFF, + MSPROF_ERROR, +}; + +#define MSPROF_ENGINE_MAX_TAG_LEN (31) + +/** + * @name ReporterData + * @brief struct of data to report + */ +struct ReporterData { + char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; // the sub-type of the module, data with different tag will be writen + int deviceId; // the index of device + size_t dataLen; // the length of send data + unsigned char *data; // the data content +}; + +/** + * @name MsprofReporterModuleId + * @brief module id of data to report + */ +enum MsprofReporterModuleId { + MSPROF_MODULE_DATA_PREPROCESS = 0, // DATA_PREPROCESS + MSPROF_MODULE_HCCL, // HCCL + MSPROF_MODULE_ACL, // AclModule + MSPROF_MODULE_FRAMEWORK, // Framework + MSPROF_MODULE_RUNTIME // runtime +}; + +/** + * @name MsprofReporterCallbackType + * @brief reporter callback request type + */ +enum MsprofReporterCallbackType { + MSPROF_REPORTER_REPORT = 0, // report data + MSPROF_REPORTER_INIT, // init reporter + MSPROF_REPORTER_UNINIT, // uninit reporter +}; + +/** + * @name MsprofReporterCallback + * @brief callback to start reporter/stop reporter/report date + * @param moduleId [IN] enum MsprofReporterModuleId + * @param type [IN] enum MsprofReporterCallbackType + * @param data [IN] callback data (nullptr on INTI/UNINIT) + * @param len [IN] callback data size (0 on INIT/UNINIT) + * @return enum MsprofErrorCode + */ +typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len); + + +#define MSPROF_OPTIONS_DEF_LEN_MAX (2048) + +/** + * @name MsprofGeOptions + * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS + */ +struct MsprofGeOptions { + char jobId[MSPROF_OPTIONS_DEF_LEN_MAX]; + char options[MSPROF_OPTIONS_DEF_LEN_MAX]; +}; + +/** + * @name MsprofCtrlCallbackType + * @brief ctrl callback request type + */ +enum MsprofCtrlCallbackType { + MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env + MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json + MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options + MSPROF_CTRL_FINALIZE // stop profiling +}; + +/** + * @name MsprofCtrlCallback + * @brief callback to start/stop profiling + * @param type [IN] enum MsprofCtrlCallbackType + * @param data [IN] callback data + * @param len [IN] callback data size + * @return enum MsprofErrorCode + */ +typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len); + +/** + * @name MsprofSetDeviceCallback + * @brief callback to notify set/reset device + * @param devId [IN] device id + * @param isOpenDevice [IN] true: set device, false: reset device + */ +typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice); + +#ifdef __cplusplus +} +#endif + +#endif // MSPROFILER_PROF_CALLBACK_H_ diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index c734380c..ff91351b 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -16,7 +16,17 @@ #ifndef MSPROF_ENGINE_PROF_REPORTER_H_ #define MSPROF_ENGINE_PROF_REPORTER_H_ +#ifndef OS_TYPE +#define OS_TYPE 0 +#endif // OS_TYPE + +#if (OS_TYPE != LINUX) +#define MSVP_PROF_API __declspec(dllexport) +#else #define MSVP_PROF_API __attribute__((visibility("default"))) +#endif + +#include "prof_callback.h" /** * @file prof_reporter.h @@ -25,20 +35,6 @@ */ namespace Msprof { namespace Engine { -/// the max tag length -#define MSPROF_ENGINE_MAX_TAG_LEN (31) -/** - * @ingroup reporter - * @brief struct ReporterData - * the sturct of the data send to libmsprof - */ -struct ReporterData { - char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; ///< the sub-type of the module, data with different tag will be writen - int deviceId; ///< the physical id of device - size_t dataLen; ///< the length of send data - unsigned char *data; ///< the data content -}; - /** * @ingroup reporter * @brief class Reporter @@ -86,4 +82,4 @@ class MSVP_PROF_API Reporter { } // namespace Engine } // namespace Msprof -#endif // MSPROF_ENGINE_PROF_REPORTER_H_ \ No newline at end of file +#endif // MSPROF_ENGINE_PROF_REPORTER_H_ diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index bce58f32..5faca0ae 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -18,7 +18,9 @@ #define D_SYSLOG_H_ #ifdef __cplusplus +#ifndef LOG_CPP extern "C" { +#endif #endif // __cplusplus #ifndef LINUX @@ -105,6 +107,7 @@ extern "C" { #define SECURITY_LOG_MASK (0x00100000) #define RUN_LOG_MASK (0x01000000) #define OPERATION_LOG_MASK (0x10000000) +#define RESERVERD_LENGTH 52 typedef struct tagDCODE { const char *cName; @@ -116,6 +119,18 @@ typedef struct tagKV { char *value; } KeyValue; +typedef enum { + APPLICATION = 0, + SYSTEM +} ProcessType; + +typedef struct { + ProcessType type; + unsigned int pid; + unsigned int deviceId; + char reserved[RESERVERD_LENGTH]; +} LogAttr; + /** * @ingroup slog * @@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent); */ DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); +/** + * @ingroup slog + * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION + * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetAttr(LogAttr logAttr); + /** * @ingroup slog * @brief dlog_error: print error log @@ -367,6 +390,8 @@ void DlogInner(int moduleId, int level, const char *fmt, ...); void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); #ifdef __cplusplus +#ifndef LOG_CPP } +#endif // LOG_CPP #endif // __cplusplus #endif // D_SYSLOG_H_