Browse Source

invoke sub kernel with tiling_key in dynamic shape & all scene

tags/v1.2.0
wuweikang 3 years ago
parent
commit
1db59ce1bc
15 changed files with 694 additions and 87 deletions
  1. +3
    -1
      ge/hybrid/model/hybrid_model_builder.cc
  2. +155
    -28
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  3. +34
    -0
      ge/hybrid/node_executor/aicore/aicore_op_task.h
  4. +12
    -8
      ge/single_op/single_op_model.cc
  5. +1
    -1
      ge/single_op/single_op_model.h
  6. +26
    -4
      ge/single_op/task/op_task.cc
  7. +8
    -0
      ge/single_op/task/op_task.h
  8. +144
    -44
      ge/single_op/task/tbe_task_builder.cc
  9. +32
    -1
      ge/single_op/task/tbe_task_builder.h
  10. +7
    -0
      tests/depends/runtime/src/runtime_stub.cc
  11. +6
    -0
      tests/ut/ge/CMakeLists.txt
  12. +101
    -0
      tests/ut/ge/hybrid/ge_hybrid_unittest.cc
  13. +117
    -0
      tests/ut/ge/single_op/single_op_task_unittest.cc
  14. +35
    -0
      third_party/fwkacllib/inc/runtime/kernel.h
  15. +13
    -0
      third_party/fwkacllib/inc/runtime/rt_model.h

+ 3
- 1
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -1199,6 +1199,8 @@ Status HybridModelBuilder::IndexTaskDefs() {
op_index = task_def.kernel_ex().op_index();
} else if (task_type == RT_MODEL_TASK_HCCL) {
op_index = task_def.kernel_hccl().op_index();
} else if (task_type == RT_MODEL_TASK_ALL_KERNEL) {
op_index = task_def.kernel_with_handle().context().op_index();
} else {
GELOGD("Skip task type: %d", static_cast<int>(task_type));
continue;
@@ -1211,7 +1213,7 @@ Status HybridModelBuilder::IndexTaskDefs() {
}

auto &node = iter->second;
if (task_type == RT_MODEL_TASK_KERNEL) {
if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc());
}



+ 155
- 28
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -33,6 +33,20 @@ constexpr char const *kAttrOpParamSize = "op_para_size";
constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size";
} // namespace

TbeHandleHolder::TbeHandleHolder(void *bin_handle)
: bin_handle_(bin_handle) {}

TbeHandleHolder::~TbeHandleHolder() {
if (bin_handle_ != nullptr) {
GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_));
}
}

bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) {
auto ret = registered_handles_.emplace(std::move(holder));
return ret.second;
}

Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def));
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc));
@@ -69,7 +83,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
if (rt_ret != RT_ERROR_NONE || is_single_op_) {
void *bin_handle = nullptr;
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str());
GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str());
rtDevBinary_t binary;
std::string json_string;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string),
@@ -96,7 +110,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));
kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel);
} else {
GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str());
GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str());
kernel_store.ReferTBEHandle(stub_name_.c_str());
}
std::string kernel_name;
@@ -108,25 +122,63 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
return SUCCESS;
}

Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
"[%s] Failed to validate task def: [%s]",
op_desc.GetName().c_str(),
task_def.DebugString().c_str());
Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) {
TbeHandleRegistry &registry = TbeHandleRegistry::GetInstance();
auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str());
return INTERNAL_ERROR;
}

void *bin_handle = nullptr;
GELOGD("Start to register kernel for node: [%s].", op_desc.GetName().c_str());
rtDevBinary_t binary;
std::string json_string;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string),
GELOGI("Get original type of session_graph_id."));
if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC;
} else {
GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str());
return PARAM_INVALID;
}
binary.version = 0;
binary.data = tbe_kernel->GetBinData();
binary.length = tbe_kernel->GetBinDataSize();
GELOGI("TBE: binary.length: %lu", binary.length);
GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle));
handle_ = bin_handle;
auto holder = std::unique_ptr<TbeHandleHolder>(new (std::nothrow) TbeHandleHolder(handle_));
if (holder == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
if (!registry.AddHandle(std::move(holder))) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc.GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
return SUCCESS;
}

Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
stub_name_ = kernel_def.stub_func();

GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc));

GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_));
args_size_ = kernel_def.args_size();
block_dim_ = kernel_def.block_dim();

// malloc args memory
args_.reset(new(std::nothrow) uint8_t[args_size_]);
GE_CHECK_NOTNULL(args_);
if (kernel_def.args().size() < args_size_) {
GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_");
return INTERNAL_ERROR;
}
errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_);
if (err != EOK) {
GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed.");
@@ -157,19 +209,75 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef
block_dim_,
arg_base_,
args_size_);
return SUCCESS;
}

Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) {
const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle();
const domi::KernelContext &context = kernel_with_handle.context();

GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc));
original_kernel_key_ = kernel_with_handle.original_kernel_key() + "_";
node_info_ = kernel_with_handle.node_info() + "/";
args_size_ = kernel_with_handle.args_size();
block_dim_ = kernel_with_handle.block_dim();
// malloc args memory
args_.reset(new(std::nothrow) uint8_t[args_size_]);
GE_CHECK_NOTNULL(args_);
if (kernel_with_handle.args().size() < args_size_) {
GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_");
return INTERNAL_ERROR;
}
errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_);

if (err != EOK) {
GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed.");
return INTERNAL_ERROR;
}

if (context.args_offset().size() < sizeof(uint16_t)) {
GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size());
return INTERNAL_ERROR;
}

const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint32_t offset = *args_offset_buffer;
if (offset > args_size_) {
GELOGE(INTERNAL_ERROR,
"[%s] Arg offset out of range. offset = %u, arg size = %u",
GetName().c_str(),
offset,
args_size_);
return INTERNAL_ERROR;
}

arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset);
max_arg_count_ = (args_size_ - offset) / sizeof(void *);
return SUCCESS;
}

Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
"[%s] Failed to validate task def: [%s]",
op_desc.GetName().c_str(),
task_def.DebugString().c_str());

if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) {
GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def));
} else {
GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def));
}
return SUCCESS;
}

Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) {
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (task_type != RT_MODEL_TASK_KERNEL) {
if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) {
GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type));
return INTERNAL_ERROR;
}

const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type != ccKernelType::TE) {
GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type));
@@ -180,10 +288,9 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) {
}

Status AiCoreOpTask::PrepareWithShape(TaskContext &context) {
if (tiling_buffer_ != nullptr) {
if (is_dynamic_) {
return UpdateTilingInfo(context);
}

return SUCCESS;
}

@@ -212,8 +319,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
clear_atomic_ = tiling_info.clear_atomic;

tiling_data_ = tiling_info.tiling_data.str();
tiling_key_ = tiling_info.tiling_key;
GELOGD("Successfully getting [tiling_key] : %u", tiling_key_);
if (tiling_data_.empty()) {
GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str());
GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str());
return SUCCESS;
}
if (tiling_buffer_ == nullptr) {
GELOGE(INTERNAL_ERROR, "tiling_buffer is nullptr while tiling_data is not empty!");
return INTERNAL_ERROR;
}

@@ -296,16 +409,26 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) {
}

Status AiCoreOpTask::LaunchKernel(rtStream_t stream) {
GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
if (handle_ != nullptr) {
std::string dev_func = original_kernel_key_ + std::to_string(tiling_key_);
std::string kernel_info = node_info_ + std::to_string(tiling_key_);
GELOGD("AiCoreOpTask rtKernelLaunchWithHandle Start (dev_func = %s, block_dim = %u).", dev_func.c_str(),
block_dim_);
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), args_size_, nullptr,
stream, kernel_info.c_str()));
GELOGD("AiCoreOpTask rtKernelLaunchWithHandle End (dev_func = %s, block_dim = %u).", dev_func.c_str(),
block_dim_);
} else {
GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
}
return SUCCESS;
}

Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) {
bool dynamic_supported = false;
(void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported);
if (!dynamic_supported) {
(void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, is_dynamic_);
if (!is_dynamic_) {
GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str());
return SUCCESS;
}
@@ -314,22 +437,26 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) {
int64_t max_size = -1;
(void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size);
GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size);
if (max_size <= 0) {
if (max_size < 0) {
GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size);
return PARAM_INVALID;
}

auto allocator = NpuMemoryAllocator::GetAllocator();
GE_CHECK_NOTNULL(allocator);
tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size));
GE_CHECK_NOTNULL(tiling_buffer_);
if (max_size > 0) {
tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size));
GE_CHECK_NOTNULL(tiling_buffer_);
GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size);
} else {
GELOGD("op_param_size is 0, no need to create tiling buffer.");
}

GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size);
return SUCCESS;
}

bool AiCoreOpTask::IsDynamicShapeSupported() {
return tiling_buffer_ != nullptr;
return is_dynamic_;
}

const std::string &AiCoreOpTask::GetName() const {


+ 34
- 0
ge/hybrid/node_executor/aicore/aicore_op_task.h View File

@@ -28,6 +28,32 @@

namespace ge {
namespace hybrid {
class TbeHandleHolder {
public:
TbeHandleHolder(void *bin_handle);
~TbeHandleHolder();

void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; }
void *GetBinHandle() { return bin_handle_; }

private:
friend class TbeHandleRegistry;
void *bin_handle_ = nullptr;
};

class TbeHandleRegistry {
public:
static TbeHandleRegistry &GetInstance() {
static TbeHandleRegistry instance;
return instance;
}

bool AddHandle(std::unique_ptr<TbeHandleHolder> &&holder);

private:
std::set<std::unique_ptr<TbeHandleHolder>> registered_handles_;
};

class AiCoreOpTask {
public:
AiCoreOpTask() = default;
@@ -67,6 +93,9 @@ class AiCoreOpTask {
Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def);
Status InitTilingInfo(const OpDesc &op_desc);
Status RegisterTbeHandle(const OpDesc &op_desc);
Status RegisterKernelHandle(const OpDesc &op_desc);
Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def);
Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def);

std::string stub_name_;
void *stub_func_ = nullptr;
@@ -76,6 +105,11 @@ class AiCoreOpTask {
bool clear_atomic_ = true;
bool is_single_op_ = false;
std::vector<int> output_indices_to_skip_;
string original_kernel_key_;
string node_info_;
uint32_t tiling_key_ = 0;
void *handle_ = nullptr;
bool is_dynamic_ = false;
};

class AtomicAddrCleanOpTask : public AiCoreOpTask {


+ 12
- 8
ge/single_op/single_op_model.cc View File

@@ -261,7 +261,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
if (kernel_type == ccKernelType::TE) {
GELOGD("Building TBE task");
TbeOpTask *tbe_task = nullptr;
auto ret = BuildKernelTask(task_def.kernel(), &tbe_task);
auto ret = BuildKernelTask(task_def, &tbe_task);
if (ret != SUCCESS) {
return ret;
}
@@ -332,9 +332,11 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) {
}
}

Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) {
Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) {
GE_CHECK_NOTNULL(task);
const auto &context = kernel_def.context();
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();
auto iter = op_list_.find(context.op_index());
if (iter == op_list_.end()) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index());
@@ -347,7 +349,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}

auto builder = TbeTaskBuilder(model_name_, iter->second, kernel_def);
auto builder = TbeTaskBuilder(model_name_, iter->second, task_def);
auto ret = builder.BuildTask(*tbe_task, model_params_);
if (ret != SUCCESS) {
delete tbe_task;
@@ -418,13 +420,15 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
}

Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
const domi::KernelDef &kernel_def = task_def.kernel();
const auto &context = kernel_def.context();
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();

auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::TE) {
GELOGD("Building TBE task");
TbeOpTask *tbe_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
tbe_task->SetModelArgs(model_name_, model_id_);
single_op.op_task_.reset(tbe_task);
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
@@ -453,7 +457,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(),
task_def.DebugString().c_str());
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (task_type == RT_MODEL_TASK_KERNEL) {
if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
if (single_op.op_task_ != nullptr) {
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks.");
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;


+ 1
- 1
ge/single_op/single_op_model.h View File

@@ -67,7 +67,7 @@ class SingleOpModel {

Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op);
Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op);
Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task);
Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task);
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id);
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);


+ 26
- 4
ge/single_op/task/op_task.cc View File

@@ -93,6 +93,14 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size
op_desc_ = op_desc;
}

void TbeOpTask::SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
const OpDescPtr &op_desc,
const domi::KernelDefWithHandle &kernel_def_with_handle) {
SetKernelArgs(std::move(args), arg_size, block_dim, op_desc);
original_kernel_key_ = kernel_def_with_handle.original_kernel_key();
node_info_ = kernel_def_with_handle.node_info();
}

void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; }

void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) {
@@ -165,6 +173,10 @@ const std::string &TbeOpTask::GetStubName() const { return stub_name_; }

uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; }

void TbeOpTask::SetHandle(void *handle) {
this->handle_ = handle;
}

Status TbeOpTask::LaunchKernel(rtStream_t stream) {
GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_);
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_);
@@ -204,8 +216,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve
}
block_dim_ = run_info.block_dim;
tiling_data_ = run_info.tiling_data.str();
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_,
tiling_data_.size());
tiling_key_ = run_info.tiling_key;
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_,
tiling_data_.size(), tiling_key_);

GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces");
return SUCCESS;
@@ -329,8 +342,17 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
}

GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str());
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
if (handle_ == nullptr) {
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
} else {
std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_);
std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_);
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr,
stream, kernel_info.c_str()));
GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str());
}

return SUCCESS;
}



+ 8
- 0
ge/single_op/task/op_task.h View File

@@ -78,6 +78,8 @@ class TbeOpTask : public OpTask {
void SetSmDesc(void *sm_desc);
void SetStubFunc(const std::string &name, const void *stub_func);
void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc);
void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle);

Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc,
const vector<GeTensorDesc> &output_desc) override;
@@ -87,6 +89,7 @@ class TbeOpTask : public OpTask {
const std::string &GetStubName() const;
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size);
uint32_t GetTaskType() const override;
void SetHandle(void *handle);

private:
friend class SingleOpModel;
@@ -107,6 +110,11 @@ class TbeOpTask : public OpTask {
std::string tiling_data_;
std::vector<void *> workspaces_;
NodePtr node_;

uint32_t tiling_key_ = 0;
void* handle_ = nullptr;
std::string original_kernel_key_;
std::string node_info_;
};

class AiCpuBaseTask : public OpTask {


+ 144
- 44
ge/single_op/task/tbe_task_builder.cc View File

@@ -49,6 +49,15 @@ KernelHolder::~KernelHolder() {
}
}

HandleHolder::HandleHolder(void *bin_handle)
: bin_handle_(bin_handle) {}

HandleHolder::~HandleHolder() {
if (bin_handle_ != nullptr) {
GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_));
}
}

const char *KernelBinRegistry::GetUnique(const string &stub_func) {
std::lock_guard<std::mutex> lock(mutex_);
auto it = unique_stubs_.find(stub_func);
@@ -76,10 +85,17 @@ bool KernelBinRegistry::AddKernel(const std::string &stub_name, std::unique_ptr<
return ret.second;
}

TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def)
bool HandleRegistry::AddHandle(std::unique_ptr<HandleHolder> &&holder) {
auto ret = registered_handles_.emplace(std::move(holder));
return ret.second;
}

TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def)
: node_(node),
op_desc_(node->GetOpDesc()),
kernel_def_(kernel_def),
task_def_(task_def),
kernel_def_(task_def.kernel()),
kernel_def_with_handle_(task_def.kernel_with_handle()),
stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {}

Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle,
@@ -89,9 +105,14 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi
binary.data = kernel_bin.GetBinData();
binary.length = kernel_bin.GetBinDataSize();
binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC;
auto ret = rtDevBinaryRegister(&binary, bin_handle);
Status ret = 0;
if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) {
ret = rtRegisterAllKernel(&binary, bin_handle);
} else {
ret = rtDevBinaryRegister(&binary, bin_handle);
}
if (ret != RT_ERROR_NONE) {
GELOGE(ret, "rtDevBinaryRegister failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(),
GELOGE(ret, "DoRegisterBinary failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(),
param.core_type, static_cast<int>(ret));
return ret;
}
@@ -128,14 +149,15 @@ Status TbeTaskBuilder::DoRegisterFunction(void *bin_handle, const char *stub_nam

Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle,
const SingleOpModelParam &param) {
std::string kernel_name;
GetKernelName(op_desc_, kernel_name);

void *handle = nullptr;
auto ret = DoRegisterBinary(tbe_kernel, &handle, param);
if (ret != SUCCESS) {
return ret;
}
if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) {
*bin_handle = handle;
return SUCCESS;
}

ret = DoRegisterMeta(handle);
if (ret != SUCCESS) {
@@ -143,6 +165,8 @@ Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const
return ret;
}

std::string kernel_name;
GetKernelName(op_desc_, kernel_name);
ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str());
if (ret != SUCCESS) {
GE_CHK_RT(rtDevBinaryUnRegister(handle));
@@ -186,13 +210,15 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam

void *bin_handle = nullptr;
auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param);
if (ret == SUCCESS) {
holder->SetBinHandle(bin_handle);
if (!registry.AddKernel(stub_name_, std::move(holder))) {
// should not happen. only one thread can reach here
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. stub name = %s", stub_name_.c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
holder->SetBinHandle(bin_handle);
if (!registry.AddKernel(stub_name_, std::move(holder))) {
// should not happen. only one thread can reach here
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
}

@@ -200,6 +226,35 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam
return SUCCESS;
}

Status TbeTaskBuilder::RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam &param) {
GELOGD("RegisterKernelWithHandle begin.");
HandleRegistry &registry = HandleRegistry::GetInstance();
auto tbe_kernel = GetTbeKernel(op_desc_);
if (tbe_kernel == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s",
op_desc_->GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
void *bin_handle = nullptr;
auto ret = DoRegisterKernel(*tbe_kernel, nullptr, &bin_handle, param);
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. node name = %s", op_desc_->GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
handle_ = bin_handle;
auto holder = std::unique_ptr<HandleHolder>(new (std::nothrow) HandleHolder(handle_));
if (holder == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
if (!registry.AddHandle(std::move(holder))) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc_->GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}

return SUCCESS;
}

Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam &param) const {
const std::string &sm_desc_str = kernel_def_.sm_desc();
if (sm_desc_str.empty()) {
@@ -217,17 +272,17 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam &param
}
}

auto rtRet = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM);
if (rtRet != RT_ERROR_NONE) {
GELOGE(rtRet, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rtRet));
return rtRet;
auto rt_ret = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rt_ret));
return rt_ret;
}

rtRet = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rtRet != RT_ERROR_NONE) {
rt_ret = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
(void)rtMemFreeManaged(*sm_desc);
GELOGE(rtRet, "rtMemcpy, ret: %d", static_cast<int>(rtRet));
return rtRet;
GELOGE(rt_ret, "rtMemcpy, ret: %d", static_cast<int>(rt_ret));
return rt_ret;
}
}

@@ -239,10 +294,10 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &
auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
GE_CHECK_NOTNULL(args);

auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
if (rtRet != RT_ERROR_NONE) {
GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rtRet));
return RT_ERROR_TO_GE_STATUS(rtRet);
auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

const domi::KernelContext &context = kernel_def_.context();
@@ -258,39 +313,83 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
if (rtRet != RT_ERROR_NONE) {
GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rtRet));
return RT_ERROR_TO_GE_STATUS(rtRet);
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}

task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc);

return SUCCESS;
}

Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam &param,
const OpDescPtr &op_desc) {
size_t arg_size = kernel_def_with_handle_.args_size();
auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
GE_CHECK_NOTNULL(args);

auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
return rt_ret;
}

const domi::KernelContext &context = kernel_def_with_handle_.context();
const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint16_t offset = *args_offset_tmp;

bool is_dynamic = false;
(void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic);
if (is_dynamic) {
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task));
} else {
// copy args
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
return rt_ret;
}
}
task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc,
kernel_def_with_handle_);

return SUCCESS;
}

Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam &param) {
GELOGD("Build tbe task begin");
auto ret = SetKernelArgs(task, param, op_desc_);
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type());
auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) :
SetKernelArgs(task, param, op_desc_);
if (ret != SUCCESS) {
return ret;
}

ret = RegisterKernel(task, param);
ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) :
RegisterKernel(task, param);
task.SetHandle(handle_);
if (ret != SUCCESS) {
return ret;
}

auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_);
GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str());

void *stub_func = nullptr;
auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func);
if (rtRet != SUCCESS) {
GELOGE(rtRet, "rtGetFunctionByName failed.");
return RT_ERROR_TO_GE_STATUS(rtRet);
if (task_type != RT_MODEL_TASK_ALL_KERNEL) {
void *stub_func = nullptr;
auto rt_ret = rtGetFunctionByName(stub_name_.c_str(), &stub_func);
if (rt_ret != SUCCESS) {
GELOGE(rt_ret, "rtGetFunctionByName failed.");
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
task.SetStubFunc(stub_name_, stub_func);
}

task.SetStubFunc(stub_name_, stub_func);
return SUCCESS;
}

@@ -299,15 +398,16 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) {
int64_t max_size = -1;
(void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size);
GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size);
if (max_size <= 0) {
if (max_size < 0) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size);
return ACL_ERROR_GE_PARAM_INVALID;
}

void *tiling_buffer = nullptr;
GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM));
GE_CHECK_NOTNULL(tiling_buffer);
GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size);
if (max_size > 0) {
GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM));
GE_CHECK_NOTNULL(tiling_buffer);
GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size);
}

task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size));
return SUCCESS;


+ 32
- 1
ge/single_op/task/tbe_task_builder.h View File

@@ -42,6 +42,19 @@ class KernelHolder {
std::shared_ptr<ge::OpKernelBin> kernel_bin_;
};

class HandleHolder {
public:
HandleHolder(void *bin_handle);
~HandleHolder();

void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; }
void *GetBinHandle() { return bin_handle_; }

private:
friend class HandleRegistry;
void *bin_handle_ = nullptr;
};

class KernelBinRegistry {
public:
static KernelBinRegistry &GetInstance() {
@@ -61,9 +74,22 @@ class KernelBinRegistry {
std::mutex mutex_;
};

class HandleRegistry {
public:
static HandleRegistry &GetInstance() {
static HandleRegistry instance;
return instance;
}

bool AddHandle(std::unique_ptr<HandleHolder> &&holder);

private:
std::set<std::unique_ptr<HandleHolder>> registered_handles_;
};

class TbeTaskBuilder {
public:
TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def);
TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def);
~TbeTaskBuilder() = default;

Status BuildTask(TbeOpTask &task, const SingleOpModelParam &param);
@@ -71,9 +97,11 @@ class TbeTaskBuilder {
private:
Status InitTilingInfo(TbeOpTask &task);
Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc);
Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc);
Status GetSmDesc(void **sm_desc, const SingleOpModelParam &param) const;

Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam &param);
Status RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam &param);
Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle,
const SingleOpModelParam &param);
Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam &param) const;
@@ -83,8 +111,11 @@ class TbeTaskBuilder {

const NodePtr node_;
const OpDescPtr op_desc_;
const domi::TaskDef &task_def_;
const domi::KernelDef &kernel_def_;
const domi::KernelDefWithHandle &kernel_def_with_handle_;
const std::string stub_name_;
void *handle_ = nullptr;
};
} // namespace ge



+ 7
- 0
tests/depends/runtime/src/runtime_stub.cc View File

@@ -131,8 +131,15 @@ rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char

rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; }

rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; }

rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; }

rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo) {
return RT_ERROR_NONE;
}

rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc,
rtStream_t stream) {
return RT_ERROR_NONE;


+ 6
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -759,12 +759,17 @@ set(SINGLE_OP_TEST_FILES
#"single_op/single_op_model_unittest.cc"
"single_op/single_op_manager_unittest.cc"
"single_op/stream_resource_unittest.cc"
"single_op/single_op_task_unittest.cc"
)

set(PROFILING_MNG_TEST_FILES
"profiling/ge_profiling_manager_unittest.cc"
)

set(HYBRID_TEST_FILES
"hybrid/ge_hybrid_unittest.cc"
)

set(OTHERS_TEST_FILES
"plugin_manager/ge_util_unittest.cc"
)
@@ -1059,6 +1064,7 @@ add_executable(ut_libge_distinct_load_utest
${DISTINCT_GRAPH_LOAD_SRC_FILES}
${SINGLE_OP_TEST_FILES}
${PROFILING_MNG_TEST_FILES}
${HYBRID_TEST_FILES}
)

target_compile_options(ut_libge_distinct_load_utest PRIVATE


+ 101
- 0
tests/ut/ge/hybrid/ge_hybrid_unittest.cc View File

@@ -0,0 +1,101 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>
#include <vector>

#include "runtime/rt.h"

#define protected public
#define private public
#include "hybrid/model/hybrid_model_builder.h"
#include "hybrid/model/hybrid_model.h"
#include "model/ge_model.h"
#include "model/ge_root_model.h"

#include "hybrid/node_executor/aicore/aicore_op_task.h"
#include "framework/common/taskdown_common.h"
#include "framework/common/debug/log.h"
#include "graph/ge_context.h"
#include "hybrid/executor/hybrid_execution_context.h"
#include "hybrid/node_executor/aicore/aicore_task_builder.h"
#include "graph/load/model_manager/tbe_handle_store.h"
#include "graph/types.h"

#undef private
#undef protected

using namespace std;
using namespace testing;
using namespace ge;

class UtestGeHybrid : public testing::Test {
protected:
void SetUp() {}

void TearDown() {}
};

static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") {
auto op_desc = std::make_shared<ge::OpDesc>(name, type);
op_desc->SetStreamId(0);
op_desc->SetId(0);

op_desc->SetWorkspace({});
;
op_desc->SetWorkspaceBytes({});
op_desc->SetInputOffset({});
op_desc->SetOutputOffset({});

ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC");
bool support_dynamic = true;
ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic);
return op_desc;
}

TEST_F(UtestGeHybrid, aicore_op_task_init_success) {
// build aicore task
auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
domi::TaskDef task_def;
task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle();
kernel_with_handle->set_original_kernel_key("");
kernel_with_handle->set_node_info("");
kernel_with_handle->set_block_dim(32);
kernel_with_handle->set_args_size(64);
string args(64, '1');
kernel_with_handle->set_args(args.data(), 64);
domi::KernelContext *context = kernel_with_handle->mutable_context();
context->set_op_index(1);
context->set_kernel_type(2); // ccKernelType::TE
uint16_t args_offset[9] = {0};
context->set_args_offset(args_offset, 9 * sizeof(uint16_t));

OpDescPtr op_desc = CreateOpDesc("Add", "Add");
std::vector<char> kernelBin;
TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
std::string kernel_name("kernel/Add");
AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS);
rtStream_t stream = nullptr;
rtStreamCreate(&stream, 0);
ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
char *handle = "";
aicore_task->handle_ = handle;
aicore_task->tiling_key_ = 1;
ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
}

+ 117
- 0
tests/ut/ge/single_op/single_op_task_unittest.cc View File

@@ -0,0 +1,117 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>
#include <vector>

#include "graph/load/model_manager/model_utils.h"
#include "graph/utils/graph_utils.h"
#include "runtime/rt.h"

#define protected public
#define private public
#include "single_op/single_op_model.h"
#include "single_op/task/tbe_task_builder.h"
#include "single_op/task/op_task.h"
#include "single_op/task/tbe_task_builder.h"
#include "external/register/op_tiling_registry.h"
#undef private
#undef protected

using namespace std;
using namespace testing;
using namespace ge;
using namespace optiling;

class UtestSingleOpTask : public testing::Test {
protected:
void SetUp() {}

void TearDown() {}
};

TEST_F(UtestSingleOpTask, test_build_kernel_task) {
string model_data_str = "123456789";
SingleOpModel model("model", model_data_str.c_str(), model_data_str.size());
model.input_offset_list_.push_back(0);
model.input_sizes_.push_back(16);

model.output_offset_list_.push_back(0);
model.output_sizes_.push_back(16);

auto graph = make_shared<ComputeGraph>("graph");
auto op_desc = make_shared<OpDesc>("Add", "Add");
std::vector<char> kernelBin;
TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
std::string kernel_name("kernel/Add");
AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);

vector<int64_t> shape{16, 16};
GeShape ge_shape(shape);
GeTensorDesc desc(ge_shape);
op_desc->AddInputDesc(desc);
op_desc->AddOutputDesc(desc);
auto node = graph->AddNode(op_desc);

std::mutex stream_mu_;
rtStream_t stream_ = nullptr;
StreamResource stream_resource(0);
SingleOp single_op(&stream_resource, &stream_mu_, stream_);

domi::TaskDef task_def;
task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle();
kernel_with_handle->set_original_kernel_key("");
kernel_with_handle->set_node_info("");
kernel_with_handle->set_block_dim(32);
kernel_with_handle->set_args_size(64);
string args(64, '1');
kernel_with_handle->set_args(args.data(), 64);
domi::KernelContext *context = kernel_with_handle->mutable_context();
context->set_op_index(1);
context->set_kernel_type(2); // ccKernelType::TE
uint16_t args_offset[9] = {0};
context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
model.op_list_[1] = node;

TbeOpTask task_tmp;
TbeOpTask *task = &task_tmp;
ASSERT_EQ(model.BuildKernelTask(task_def, &task), SUCCESS);
vector<GeTensorDesc> input_desc;
vector<DataBuffer> input_buffers;
vector<GeTensorDesc> output_desc;
vector<DataBuffer> output_buffers;
task->node_ = node;
OpTilingFunc op_tiling_func = [](const TeOpParas &, const OpCompileInfo &, OpRunInfo &) -> bool {return true;};
OpTilingRegistryInterf("Add", op_tiling_func);
ge::AttrUtils::SetStr(op_desc, "compile_info_key", "op_compile_info_key");
ge::AttrUtils::SetStr(op_desc, "compile_info_json", "op_compile_info_json");
char c = '0';
char* buffer = &c;
task->tiling_buffer_ = buffer;
task->max_tiling_size_ = 64;
task->tiling_data_ = "tiling_data";
task->arg_size_ = 64;
uint8_t task_args{0};
task->args_.reset(&task_args);

ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS);
char handle_tmp = '0';
char *handle = &handle_tmp;
task->SetHandle(handle);
ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS);
}

+ 35
- 0
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData);
#define RT_FUSION_KERNEL_DUMPFLAG (0x04)
#define RT_KERNEL_CUSTOM_AICPU (0x08)

/**
* @ingroup rt_kernel
* @brief kernel mode
*/
#define RT_DEFAULT_KERNEL_MODE (0x00)
#define RT_NORMAL_KERNEL_MODE (0x01)
#define RT_ALL_KERNEL_MODE (0x02)

/**
* @ingroup rt_kernel
* @brief kernel L1 Fusion Dump bit flags
@@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData);
*/
RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);

/**
* @ingroup rt_kernel
* @brief register device binary
* @param [in] bin device binary description
* @param [out] handle device binary handle
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle);

/**
* @ingroup rt_kernel
* @brief register fast memeory device binary
@@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u
RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream);

/**
* @ingroup rt_kernel
* @brief launch kernel with handle to device
* @param [in] handle program
* @param [in] devFunc device function description
* @param [in] blockDim block dimentions
* @param [in] args argments address for kernel function
* @param [in] argsSize argements size
* @param [in] smDesc shared memory description
* @param [in] stream associated stream
* @param [in] kernelInfo kernel info
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo);

/**
* @ingroup rt_kernel
* @brief launch kernel to device


+ 13
- 0
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -50,6 +50,7 @@ typedef enum tagModelTaskType {
RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX,
RT_MODEL_TASK_STREAM_LABEL_GOTO,
RT_MODEL_TASK_MODEL_EXIT,
RT_MODEL_TASK_ALL_KERNEL,
} rtModelTaskType_t;

typedef enum tagModelStreamType {
@@ -127,6 +128,17 @@ typedef struct tagKernelTaskInfo {
uint16_t *argsOffset;
} rtKernelTaskInfo_t;

typedef struct tagAllKernelTaskInfo {
uint16_t blockDim;
uint16_t argsCount;
uint16_t argsSize;
uint16_t reserved;
const void *dev_func;
void *handle;
uint8_t *smDesc;
uint8_t *args;
uint16_t *argsOffset;
} rtAllKernelTaskInfo_t;
typedef struct tagKernelTaskInfoEx {
uint32_t flags;
uint32_t argsSize;
@@ -251,6 +263,7 @@ typedef struct tagTaskInfo {
union {
rtKernelTaskInfoEx_t kernelTaskEx;
rtKernelTaskInfo_t kernelTask;
rtAllKernelTaskInfo_t allkernelTask;
rtEventTaskInfo_t eventTask;
rtStreamSwitchTaskInfo_t streamSwitchTask;
rtStreamActiveTaskInfo_t streamActiveTask;


Loading…
Cancel
Save