Browse Source

!171 for cust aicpu support

Merge pull request !171 from weiyang/development
tags/v1.1.0
王涛 Gitee 3 years ago
parent
commit
189ecd496c
6 changed files with 149 additions and 73 deletions
  1. +1
    -8
      ge/executor/ge_executor.cc
  2. +2
    -0
      ge/graph/load/new_model_manager/davinci_model.cc
  3. +119
    -61
      ge/graph/load/new_model_manager/model_manager.cc
  4. +8
    -4
      ge/graph/load/new_model_manager/model_manager.h
  5. +16
    -0
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  6. +3
    -0
      ge/single_op/single_op_model.cc

+ 1
- 8
ge/executor/ge_executor.cc View File

@@ -1058,14 +1058,7 @@ ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector<GeTe
} }


Status GeExecutor::ReleaseSingleOpResource(void *stream) { Status GeExecutor::ReleaseSingleOpResource(void *stream) {
// get current context
rtContext_t rt_cur_ctx = nullptr;
auto rt_err = rtCtxGetCurrent(&rt_cur_ctx);
if (rt_err != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err));
return RT_FAILED;
}
ModelManager::GetInstance()->ClearAICPUSo(rt_cur_ctx);
ModelManager::GetInstance()->ClearAicpuSo();
return SingleOpManager::GetInstance().ReleaseResource(stream); return SingleOpManager::GetInstance().ReleaseResource(stream);
} }




+ 2
- 0
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -463,6 +463,8 @@ Status DavinciModel::DoTaskSink() {


GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed.");


GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");

GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");


GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed.");


+ 119
- 61
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -44,12 +44,18 @@ const std::string kCmdTypeProfFinalize = "prof_finalize";
const std::string kCmdTypeProfStart = "prof_start"; const std::string kCmdTypeProfStart = "prof_start";
const std::string kCmdTypeProfStop = "prof_stop"; const std::string kCmdTypeProfStop = "prof_stop";
const char *const kLoadOpFromBuf = "loadOpFromBuf"; const char *const kLoadOpFromBuf = "loadOpFromBuf";
const char *const kBatchLoadBuf = "batchLoadsoFrombuf";
const char *const kDeleteCustOp = "deleteCustOp";
struct CustAicpuSoBuf { struct CustAicpuSoBuf {
uint64_t kernelSoBuf; uint64_t kernelSoBuf;
uint32_t kernelSoBufLen; uint32_t kernelSoBufLen;
uint64_t kernelSoName; uint64_t kernelSoName;
uint32_t kernelSoNameLen; uint32_t kernelSoNameLen;
} __attribute__((packed)); } __attribute__((packed));
struct BatchLoadOpFromBufArgs {
uint32_t soNum;
uint64_t args;
} __attribute__((packed));
} // namespace } // namespace


DumpProperties ModelManager::dump_properties_; DumpProperties ModelManager::dump_properties_;
@@ -1096,97 +1102,149 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) {
return SUCCESS; return SUCCESS;
} }


Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name) {
GELOGI("LoadCustAicpuSo in, op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str());
Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) {
GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str());
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr());
if (aicpu_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str());
return INTERNAL_ERROR;
}

// get current context // get current context
rtContext_t rt_cur_ctx = nullptr; rtContext_t rt_cur_ctx = nullptr;
auto rt_err = rtCtxGetCurrent(&rt_cur_ctx);
if (rt_err != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err));
auto rt_error = rtCtxGetCurrent(&rt_cur_ctx);
if (rt_error != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_error));
return RT_FAILED; return RT_FAILED;
} }
// use current context as resource key instead

// use current context as resource key
uintptr_t resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx); uintptr_t resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx);
auto it = cust_aicpu_so_.find(resource_id); auto it = cust_aicpu_so_.find(resource_id);
if (it == cust_aicpu_so_.end()) { if (it == cust_aicpu_so_.end()) {
GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s",
op_desc->GetName().c_str(), so_name.c_str());
std::set<string> so_name_set;
so_name_set.insert(so_name);
cust_aicpu_so_[resource_id] = so_name_set;
GELOGI("LoadCustAicpuSo new aicpu so resource_id %lu.", resource_id);
std::map<string, CustAICPUKernelPtr> new_so_name;
new_so_name.insert({so_name, aicpu_kernel});
cust_aicpu_so_[resource_id] = new_so_name;
GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id);
return SUCCESS; return SUCCESS;
} }
auto it_so_name = it->second.find(so_name); auto it_so_name = it->second.find(so_name);
if (it_so_name == it->second.end()) { if (it_so_name == it->second.end()) {
GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s",
op_desc->GetName().c_str(), so_name.c_str());
it->second.insert(so_name);
GELOGI("LoadCustAicpuSo add aicpu so resource_id %lu.", resource_id);
it->second.insert({so_name, aicpu_kernel});
GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id);
} }
return SUCCESS; return SUCCESS;
} }


Status ModelManager::ClearAICPUSo(void *ctx) {
auto ctx_id = reinterpret_cast<uintptr_t>(ctx);
GELOGI("ClearAICPUSo in. resource id = 0x%lx", static_cast<uint64_t>(ctx_id));
Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {
GELOGI("LaunchCustAucpuSo in, kernel name %s", kernel_name.c_str());
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
auto it = cust_aicpu_so_.find(ctx_id);
if (cust_aicpu_so_.size() == 0) return SUCCESS;
// get current context
rtContext_t rt_cur_ctx = nullptr;
auto rt_error = rtCtxGetCurrent(&rt_cur_ctx);
if (rt_error != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_error));
return RT_FAILED;
}
uintptr_t resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx);
auto it = cust_aicpu_so_.find(resource_id);
if (it == cust_aicpu_so_.end()) { if (it == cust_aicpu_so_.end()) {
GELOGI("Cust aicpu so map is empty, context id %lu", resource_id);
return SUCCESS; return SUCCESS;
} }
(void)cust_aicpu_so_.erase(it);
return SUCCESS;
}

Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name) {
CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr());
if (aicpu_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str());
return INTERNAL_ERROR;
}
const void *aicpu_data = aicpu_kernel->GetBinData();
uint32_t aicpu_data_length = aicpu_kernel->GetBinDataSize();


void *d_aicpu_data = nullptr;
void *d_so_name = nullptr;
void *args = nullptr;
vector<void *> allocated_mem;
rtError_t status; rtError_t status;
rtStream_t stream = nullptr; rtStream_t stream = nullptr;
GE_CHK_RT(rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM));
GE_CHK_RT(rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM));
GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast<const void *>(so_name.c_str()),
so_name.size(), RT_MEMCPY_HOST_TO_DEVICE));

CustAicpuSoBuf cust_aicpu_so_buf;
cust_aicpu_so_buf.kernelSoBuf = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_aicpu_data));
cust_aicpu_so_buf.kernelSoBufLen = aicpu_data_length;
cust_aicpu_so_buf.kernelSoName = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_so_name));
cust_aicpu_so_buf.kernelSoNameLen = so_name.size();

uint32_t args_size = sizeof(CustAicpuSoBuf);
GE_CHK_RT(rtMalloc(&args, args_size, RT_MEMORY_HBM));
GE_CHK_RT(rtMemcpy(args, args_size, static_cast<void *>(&cust_aicpu_so_buf), args_size, RT_MEMCPY_HOST_TO_DEVICE));
vector<CustAicpuSoBuf> v_cust_so;
void *args = nullptr;

for (const auto &it_so : it->second) {
const void *aicpu_data = it_so.second->GetBinData();
uint32_t aicpu_data_length = it_so.second->GetBinDataSize();
string so_name = it_so.first;
void *d_aicpu_data = nullptr;
void *d_so_name = nullptr;

status = rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_aicpu_data);
status = rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_so_name);
GE_CHK_RT(rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast<const void *>(so_name.c_str()),
so_name.size(), RT_MEMCPY_HOST_TO_DEVICE));

CustAicpuSoBuf cust_aicpu_so_buf;
cust_aicpu_so_buf.kernelSoBuf = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_aicpu_data));
cust_aicpu_so_buf.kernelSoBufLen = aicpu_data_length;
cust_aicpu_so_buf.kernelSoName = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_so_name));
cust_aicpu_so_buf.kernelSoNameLen = so_name.size();
v_cust_so.push_back(cust_aicpu_so_buf);
}
if (kernel_name == kDeleteCustOp) {
(void)cust_aicpu_so_.erase(it);
}

uint32_t args_size = sizeof(CustAicpuSoBuf) * v_cust_so.size();
status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(args);
GE_CHK_RT(rtMemcpy(args, args_size, v_cust_so.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE));

BatchLoadOpFromBufArgs batch_cust_so;
batch_cust_so.soNum = v_cust_so.size();
batch_cust_so.args = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args));

void *batch_args = nullptr;
uint32_t batch_args_size = sizeof(BatchLoadOpFromBufArgs);
status = rtMalloc(&batch_args, batch_args_size, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(batch_args);
GE_CHK_RT(rtMemcpy(batch_args, batch_args_size, static_cast<void *>(&batch_cust_so),
batch_args_size, RT_MEMCPY_HOST_TO_DEVICE));

GE_CHK_RT(rtStreamCreate(&stream, 0)); GE_CHK_RT(rtStreamCreate(&stream, 0));
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream));
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, batch_args, batch_args_size, nullptr, stream));


GELOGI("LaunchCustAicpuSo so buf len %u, so name len %u.", aicpu_data_length, so_name.size());
status = rtStreamSynchronize(stream); status = rtStreamSynchronize(stream);
if (status != RT_ERROR_NONE) { if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
GE_CHK_RT(rtStreamDestroy(stream));
GE_CHK_RT(rtFree(args));
GE_CHK_RT(rtFree(d_aicpu_data));
GE_CHK_RT(rtFree(d_so_name));
return RT_ERROR_TO_GE_STATUS(status); return RT_ERROR_TO_GE_STATUS(status);
} }
GE_CHK_RT(rtStreamDestroy(stream));
GE_CHK_RT(rtFree(args));
GE_CHK_RT(rtFree(d_aicpu_data));
GE_CHK_RT(rtFree(d_so_name));
GELOGI("Cpu kernel launch loadOpFromBuf task success.");
std::function<void()> callback = [&]() {
for (auto mem : allocated_mem) {
GE_CHK_RT(rtFree(mem));
}
GE_CHK_RT(rtStreamDestroy(stream));
};
GE_MAKE_GUARD(release, callback);
GELOGI("Cpu kernel launch task success.");
return SUCCESS;
}

Status ModelManager::ClearAicpuSo() {
GE_CHK_STATUS_RET(LaunchKernelCustAicpuSo(kDeleteCustOp), "delete cust op so failed.");
return SUCCESS;
}

Status ModelManager::LaunchCustAicpuSo() {
GE_CHK_STATUS_RET(LaunchKernelCustAicpuSo(kBatchLoadBuf), "launch cust op so failed.");
return SUCCESS; return SUCCESS;
} }




+ 8
- 4
ge/graph/load/new_model_manager/model_manager.h View File

@@ -270,9 +270,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {


ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); ge::Status DestroyAicpuSessionForInfer(uint32_t model_id);


ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name);
ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name);
ge::Status ClearAICPUSo(void *ctx);
ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name);

ge::Status LaunchCustAicpuSo();

ge::Status ClearAicpuSo();

ge::Status LaunchKernelCustAicpuSo(const string &kernel_name);


ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);


@@ -340,7 +344,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::set<uint64_t> sess_ids_; std::set<uint64_t> sess_ids_;
std::vector<rtExceptionInfo> exception_infos_; std::vector<rtExceptionInfo> exception_infos_;
std::mutex cust_aicpu_mutex_; std::mutex cust_aicpu_mutex_;
std::map<uintptr_t, std::set<std::string>> cust_aicpu_so_;
std::map<uintptr_t, std::map<std::string, CustAICPUKernelPtr>> cust_aicpu_so_;


static DumpProperties dump_properties_; static DumpProperties dump_properties_;
}; };


+ 16
- 0
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -15,6 +15,7 @@
*/ */


#include "hybrid/node_executor/aicpu/aicpu_node_executor.h" #include "hybrid/node_executor/aicpu/aicpu_node_executor.h"
#include "cce/taskdown_common.hpp"
#include "common/formats/formats.h" #include "common/formats/formats.h"
#include "aicpu/common/aicpu_task_struct.h" #include "aicpu/common/aicpu_task_struct.h"
#include "graph/load/new_model_manager/model_manager.h" #include "graph/load/new_model_manager/model_manager.h"
@@ -630,6 +631,16 @@ Status AicpuNodeTask::Init(const HybridModel &model) {
auto &args = kernel_def.args(); auto &args = kernel_def.args();
args_size_ = kernel_def.args_size(); args_size_ = kernel_def.args_size();


const std::string &so_name = kernel_def.so_name();
const OpDescPtr op_desc = MakeShared<OpDesc>(*(node_item_->op_desc));
const auto &context = kernel_def.context();
auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
if (kernel_type == cce::ccKernelType::CUST_AI_CPU) {
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed.");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");

}

GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED,
"Node[%s] task def args.size=%zu, but args_size=%u.", "Node[%s] task def args.size=%zu, but args_size=%u.",
node_name.c_str(), args.size(), args_size_); node_name.c_str(), args.size(), args_size_);
@@ -716,7 +727,12 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) {
GELOGI("Node[%s] launch task start. unknown_type=%d.", node_name_.c_str(), unknown_type_); GELOGI("Node[%s] launch task start. unknown_type=%d.", node_name_.c_str(), unknown_type_);
const auto &so_name = task_def_.kernel().so_name(); const auto &so_name = task_def_.kernel().so_name();
const auto &kernel_name = task_def_.kernel().kernel_name(); const auto &kernel_name = task_def_.kernel().kernel_name();
const auto &kcontext = task_def_.kernel().context();
auto kernel_type = static_cast<cce::ccKernelType>(kcontext.kernel_type());
uint32_t flag = RT_KERNEL_DEFAULT; uint32_t flag = RT_KERNEL_DEFAULT;
if (kernel_type == cce::ccKernelType::CUST_AI_CPU) {
flag |= RT_KERNEL_CUSTOM_AICPU;
}
auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()), auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()),
reinterpret_cast<const void *>(kernel_name.c_str()), reinterpret_cast<const void *>(kernel_name.c_str()),
1, // default core dim is 1 1, // default core dim is 1


+ 3
- 0
ge/single_op/single_op_model.cc View File

@@ -31,6 +31,7 @@
#include "task/aicpu_task_builder.h" #include "task/aicpu_task_builder.h"
#include "task/aicpu_kernel_task_builder.h" #include "task/aicpu_kernel_task_builder.h"
#include "task/tbe_task_builder.h" #include "task/tbe_task_builder.h"
#include "graph/load/new_model_manager/model_manager.h"


static std::atomic<std::uint64_t> aicpu_sessionid(0); static std::atomic<std::uint64_t> aicpu_sessionid(0);


@@ -277,6 +278,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
GELOGD("Skip task type: %d", static_cast<int>(task_type)); GELOGD("Skip task type: %d", static_cast<int>(task_type));
} }
} }
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");
return SUCCESS; return SUCCESS;
} }


@@ -448,6 +450,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
GELOGD("Skip task type: %d", static_cast<int>(task_type)); GELOGD("Skip task type: %d", static_cast<int>(task_type));
} }
} }
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");


return SUCCESS; return SUCCESS;
} }


Loading…
Cancel
Save