From a8cfa7531427d217c97e88888587cc5b0ccb5ef9 Mon Sep 17 00:00:00 2001 From: dajunli Date: Sat, 19 Dec 2020 11:12:21 +0800 Subject: [PATCH] change hccl inference --- ge/graph/manager/memory_api.cc | 2 +- .../node_executor/hccl/hccl_node_executor.cc | 50 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc index 45e4bb65..0798eb51 100644 --- a/ge/graph/manager/memory_api.cc +++ b/ge/graph/manager/memory_api.cc @@ -63,7 +63,7 @@ Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t }); auto hcom_remote_mem_register = - (HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "hcom_remote_access_mem_register"); + (HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "HcomRegRemoteAccessMem"); if (hcom_remote_mem_register == nullptr) { GELOGE(FAILED, "Failed to invoke hcom_remote_mem_register function."); return FAILED; diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 704cab77..eebe2a81 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -42,10 +42,10 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do GELOGE(FAILED, "hccl handle is nullptr! "); return FAILED; } - auto EnqueueHcomOpertion = (HcclResult(*)(HcomOpertion, std::function))dlsym( - context.handle_, "EnqueueHcomOpertion"); - if (EnqueueHcomOpertion == nullptr) { - GELOGE(FAILED, "Failed to invoke EnqueueHcomOpertion hcom unknown node function."); + auto HcomExecEnqueueOperation = (HcclResult(*)(HcomOperation, std::function))dlsym( + context.handle_, "HcomExecEnqueueOperation"); + if (HcomExecEnqueueOperation == nullptr) { + GELOGE(FAILED, "Failed to invoke HcomExecEnqueueOperation hcom unknown node function."); if (dlclose(context.handle_) != 0) { GELOGW("Failed to close handle %s", dlerror()); } @@ -70,7 +70,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do const OpDescPtr op_desc = node_item.GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - HcomOpertion op_info; + HcomOperation op_info; op_info.hcclType = op_desc->GetType(); op_info.inputPtr = inputs.empty() ? nullptr : inputs[0]; op_info.outputPtr = outputs.empty() ? nullptr : outputs[0]; @@ -96,7 +96,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do op_info.root = root_id; auto callback = [this, op_desc](HcclResult status) { if (status != HCCL_SUCCESS) { - GELOGE(HCCL_E_INTERNAL, "node %s call EnqueueHcomOpertion failed, ret: 0x%X", op_desc->GetName().c_str(), status); + GELOGE(HCCL_E_INTERNAL, "node %s call HcomExecEnqueueOperation failed, ret: 0x%X", op_desc->GetName().c_str(), status); } std::lock_guard lock(this->hccl_mutex_); this->cond_.notify_all(); @@ -110,9 +110,9 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do context.GetNodeName(), op_info.hcclType.c_str(), count, op_info.dataType, op_info.opType, op_info.root); op_info.count = count; - HcclResult hccl_ret = EnqueueHcomOpertion(op_info, callback); + HcclResult hccl_ret = HcomExecEnqueueOperation(op_info, callback); if (hccl_ret != HCCL_SUCCESS) { - GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); + GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); return HCCL_E_INTERNAL; } @@ -213,11 +213,11 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector done_callback) { GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName()); - auto EnqueueRemoteAccess = + auto HcomExecEnqueueRemoteAccess = (HcclResult(*)(const string &, const vector &, - std::function))dlsym(context.handle_, "EnqueueRemoteAccess"); - if (EnqueueRemoteAccess == nullptr) { - GELOGE(FAILED, "Failed to invoke EnqueueRemoteAccess hcom unknown node function."); + std::function))dlsym(context.handle_, "HcomExecEnqueueRemoteAccess"); + if (HcomExecEnqueueRemoteAccess == nullptr) { + GELOGE(FAILED, "Failed to invoke HcomExecEnqueueRemoteAccess hcom unknown node function."); if (dlclose(context.handle_) != 0) { GELOGW("Failed to close handle %s", dlerror()); } @@ -228,15 +228,15 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do auto callback = [this](HcclResult status) { if (status != HCCL_SUCCESS) { - GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", status); + GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", status); } std::lock_guard lock(this->hccl_mutex_); this->cond_.notify_all(); GELOGI("rdma callback success."); }; - HcclResult hccl_ret = EnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); + HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); if (hccl_ret != HCCL_SUCCESS) { - GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); + GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); return HCCL_E_INTERNAL; } @@ -307,32 +307,32 @@ Status HcclNodeExecutor::Initialize() { GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror()); return FAILED; } - auto HcomExcutorInitialize = (HcclResult(*)())dlsym(handle_, "HcomExcutorInitialize"); - if (HcomExcutorInitialize == nullptr) { - GELOGE(FAILED, "Failed to invoke HcomExcutorInitialize hcom unknown node function."); + auto HcomExecInitialize = (HcclResult(*)())dlsym(handle_, "HcomExecInitialize"); + if (HcomExecInitialize == nullptr) { + GELOGE(FAILED, "Failed to invoke HcomExecInitialize hcom unknown node function."); return FAILED; } - HcclResult hccl_ret = HcomExcutorInitialize(); + HcclResult hccl_ret = HcomExecInitialize(); if (hccl_ret == HCCL_E_PTR) { GELOGI("Hccl comm is null, hcom executor initialize is not required."); } else if (hccl_ret == HCCL_SUCCESS) { GELOGI("Hcom executor initialize success."); } else { - GELOGE(FAILED, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); + GELOGE(FAILED, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); return FAILED; } return SUCCESS; } Status HcclNodeExecutor::Finalize() { - auto HcomExcutorFinalize = (HcclResult(*)())dlsym(handle_, "HcomExcutorFinalize"); - if (HcomExcutorFinalize == nullptr) { - GELOGE(FAILED, "Failed to invoke HcomExcutorFinalize hcom unknown node function."); + auto HcomExecFinalize = (HcclResult(*)())dlsym(handle_, "HcomExecFinalize"); + if (HcomExecFinalize == nullptr) { + GELOGE(FAILED, "Failed to invoke HcomExecFinalize hcom unknown node function."); return FAILED; } - HcclResult hccl_ret = HcomExcutorFinalize(); + HcclResult hccl_ret = HcomExecFinalize(); if (hccl_ret != HCCL_SUCCESS) { - GELOGE(FAILED, "Call HcomExcutorFinalize failed, ret: 0x%X", hccl_ret); + GELOGE(FAILED, "Call HcomExecFinalize failed, ret: 0x%X", hccl_ret); return FAILED; } // dlclose file handle