Browse Source

Support for custom operator.

tags/v1.1.0
unknown 3 years ago
parent
commit
b7928898f3
7 changed files with 71 additions and 18 deletions
  1. +8
    -0
      ge/executor/ge_executor.cc
  2. +38
    -5
      ge/graph/load/new_model_manager/model_manager.cc
  3. +4
    -4
      ge/graph/load/new_model_manager/model_manager.h
  4. +5
    -4
      ge/single_op/single_op_model.cc
  5. +10
    -0
      ge/single_op/task/aicpu_kernel_task_builder.cc
  6. +4
    -5
      ge/single_op/task/op_task.cc
  7. +2
    -0
      ge/single_op/task/op_task.h

+ 8
- 0
ge/executor/ge_executor.cc View File

@@ -1058,6 +1058,14 @@ ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector<GeTe
}

Status GeExecutor::ReleaseSingleOpResource(void *stream) {
// get current context
rtContext_t rt_cur_ctx = nullptr;
auto rt_err = rtCtxGetCurrent(&rt_cur_ctx);
if (rt_err != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err));
return RT_FAILED;
}
ModelManager::GetInstance()->ClearAICPUSo(rt_cur_ctx);
return SingleOpManager::GetInstance().ReleaseResource(stream);
}



+ 38
- 5
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -236,6 +236,7 @@ ModelManager::~ModelManager() {
std::lock_guard<std::mutex> lock(map_mutex_);
model_map_.clear();
model_aicpu_kernel_.clear();
cust_aicpu_so_.clear();

GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0)));
}
@@ -399,7 +400,6 @@ Status ModelManager::Unload(uint32_t model_id) {
}
std::lock_guard<std::mutex> lock(exeception_infos_mutex_);
exception_infos_.clear();
cust_aicpu_so_.clear();
return SUCCESS;
}

@@ -1096,15 +1096,47 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) {
return SUCCESS;
}

Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, string so_name) {
Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name) {
GELOGI("LoadCustAicpuSo in, op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str());
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
auto it = cust_aicpu_so_.find(so_name);
// get current context
rtContext_t rt_cur_ctx = nullptr;
auto rt_err = rtCtxGetCurrent(&rt_cur_ctx);
if (rt_err != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err));
return RT_FAILED;
}
// use current context as resource key instead
resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx);
auto it = cust_aicpu_so_.find(resource_id);
if (it == cust_aicpu_so_.end()) {
GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s",
op_desc->GetName().c_str(), so_name.c_str());
(void)cust_aicpu_so_.insert(so_name);
GELOGI("LaunchCustAicpuSo op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str());
std::set<string> so_name_set;
so_name_set.insert(so_name);
cust_aicpu_so_[resource_id] = so_name_set;
GELOGI("LoadCustAicpuSo new aicpu so resource_id %lu.", resource_id);
return SUCCESS;
}
auto it_so_name = it->second.find(so_name);
if (it_so_name == it->second.end()) {
GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s",
op_desc->GetName().c_str(), so_name.c_str());
it->second.insert(so_name);
GELOGI("LoadCustAicpuSo add aicpu so resource_id %lu.", resource_id);
}
return SUCCESS;
}

Status ModelManager::ClearAICPUSo(void *ctx) {
auto ctx_id = reinterpret_cast<uintptr_t>(rt_cur_ctx);
GELOGI("ClearAICPUSo in. resource_id = 0x%lx.", static_cast<uint64_t>(ctx_id));
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
auto it = cust_aicpu_so_.find(ctx_id);
if (it == cust_aicpu_so_.end()) {
return SUCCESS;
}
(void)cust_aicpu_so_.erase(it);
return SUCCESS;
}

@@ -1140,6 +1172,7 @@ Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name)
GE_CHK_RT(rtStreamCreate(&stream, 0));
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream));

GELOGI("LaunchCustAicpuSo so buf len %u, so name len %u.", aicpu_data_length, so_name.size());
status = rtStreamSynchronize(stream);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);


+ 4
- 4
ge/graph/load/new_model_manager/model_manager.h View File

@@ -270,9 +270,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

ge::Status DestroyAicpuSessionForInfer(uint32_t model_id);

ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, string so_name);
ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name);
ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name);
ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name);
ge::Status ClearAICPUSo(void *ctx);

ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);

@@ -340,7 +340,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::set<uint64_t> sess_ids_;
std::vector<rtExceptionInfo> exception_infos_;
std::mutex cust_aicpu_mutex_;
std::set<std::string> cust_aicpu_so_;
std::map<uintptr_t, std::set<std::string>> cust_aicpu_so_;

static DumpProperties dump_properties_;
};


+ 5
- 4
ge/single_op/single_op_model.cc View File

@@ -190,6 +190,7 @@ Status SingleOpModel::LoadAllNodes() {
}

ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc);
ge_model->GetCustAICPUKernelStore().LoadCustAICPUKernelBinToOpDesc(op_desc);
}

return SUCCESS;
@@ -247,7 +248,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size());
ParseArgTable(tbe_task, single_op);
single_op.tasks_.emplace_back(tbe_task);
} else if (kernel_type == cce::ccKernelType::AI_CPU) {
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task");
OpTask *task = nullptr;
auto ret = BuildCpuKernelTask(task_def.kernel(), &task);
@@ -256,7 +257,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
}
single_op.tasks_.emplace_back(task);
} else {
GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type());
GELOGE(UNSUPPORTED, "Only TBE kernel, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type());
return UNSUPPORTED;
}
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
@@ -391,13 +392,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
TbeOpTask *tbe_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
single_op.op_task_.reset(tbe_task);
} else if (kernel_type == cce::ccKernelType::AI_CPU) {
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task");
OpTask *task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task));
single_op.op_task_.reset(task);
} else {
GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type());
GELOGE(UNSUPPORTED, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type());
return UNSUPPORTED;
}
return SUCCESS;


+ 10
- 0
ge/single_op/task/aicpu_kernel_task_builder.cc View File

@@ -15,6 +15,8 @@
*/

#include "single_op/task/aicpu_kernel_task_builder.h"
#include "cce/taskdown_common.hpp"
#include "graph/load/new_model_manager/model_manager.h"

namespace ge {
AiCpuCCTaskBuilder::AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def)
@@ -55,6 +57,14 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) {
task.SetkernelName(kernel_name);
task.op_desc_ = op_desc_;

const auto &context = kernel_def_.context();
auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
if (kernel_type == cce::ccKernelType::CUST_AI_CPU) {
task.is_custom_ = true;
task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed");
}

task.num_inputs_ = op_desc_->GetInputsSize();
task.num_outputs_ = op_desc_->GetOutputsSize();



+ 4
- 5
ge/single_op/task/op_task.cc View File

@@ -280,8 +280,6 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
for (size_t j = 0; j < num_outputs_; ++j) {
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]),
"Output[%zu] UpdateOutputShapeAndType failed.", j);
// debug code
GELOGD("No input and output, no need update ext info.");
}
}

@@ -669,9 +667,10 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) {
kernel_name_.data());
// sm_desc is nullptr, because l2 buffer does not support
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_);
auto ret =
rtCpuKernelLaunch(static_cast<const void *>(so_name_.data()), static_cast<const void *>(kernel_name_.data()),
block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), sm_desc, stream);
auto ret = rtCpuKernelLaunchWithFlag(static_cast<const void *>(so_name_.data()),
static_cast<const void *>(kernel_name_.data()),
block_dim_, args_.get(), static_cast<uint32_t>(arg_size_),
sm_desc, stream, dump_flag_);
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Invoke rtCpuKernelLaunch failed. ret = %d", ret);
return RT_FAILED;


+ 2
- 0
ge/single_op/task/op_task.h View File

@@ -234,6 +234,8 @@ private:
uint32_t block_dim_ = 1;
void *sm_desc_ = nullptr;
void *io_addr_ = nullptr;
bool is_custom_ = false;
uint32_t dump_flag_ = RT_KERNEL_DEFAULT;
};
} // namespace ge



Loading…
Cancel
Save