From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chentags/v1.3.0
@@ -15,6 +15,8 @@ | |||||
*/ | */ | ||||
#include "common/tbe_kernel_store.h" | #include "common/tbe_kernel_store.h" | ||||
#include "graph/utils/attr_utils.h" | |||||
#include "graph/debug/ge_attr_define.h" | |||||
namespace ge { | namespace ge { | ||||
@@ -31,6 +33,15 @@ void TBEKernelStore::LoadTBEKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc> | |||||
GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin), | GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin), | ||||
GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");) | GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");) | ||||
GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | ||||
std::string atomic_kernel_name; | |||||
(void) AttrUtils::GetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, atomic_kernel_name); | |||||
if (!atomic_kernel_name.empty()) { | |||||
GELOGI("Get atomic kernel name is %s.", atomic_kernel_name.c_str()); | |||||
auto atomic_kernel_bin = FindKernel(atomic_kernel_name); | |||||
GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, atomic_kernel_bin), | |||||
GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for atomic kernel_bin failed");) | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -627,6 +627,50 @@ Status ModelBuilder::MergeWeights() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status ModelBuilder::SaveAtomicTBEKernel(const OpDescPtr &op_desc) { | |||||
ge::NodePtr atomic_clean_node = nullptr; | |||||
atomic_clean_node = op_desc->TryGetExtAttr("atomic_clean_node_ptr", atomic_clean_node); | |||||
if (atomic_clean_node == nullptr) { | |||||
return SUCCESS; | |||||
} | |||||
ge::OpDescPtr atomic_op_desc = atomic_clean_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(atomic_op_desc); | |||||
TBEKernelPtr tbe_kernel = atomic_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||||
if (tbe_kernel == nullptr) { | |||||
std::string kernel_name; | |||||
GeAttrValue::BYTES kernel_buffer; | |||||
(void) AttrUtils::GetStr(atomic_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name); | |||||
(void) AttrUtils::GetBytes(atomic_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer); | |||||
if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { | |||||
GE_CHECK_NOTNULL(kernel_buffer.GetData()); | |||||
std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); | |||||
tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data)); | |||||
GE_CHECK_NOTNULL(tbe_kernel); | |||||
} | |||||
} | |||||
if (tbe_kernel == nullptr) { | |||||
GELOGD("Atomic_clean_node doesn't have tbe_kernel."); | |||||
return SUCCESS; | |||||
} | |||||
tbe_kernel_store_.AddTBEKernel(tbe_kernel); | |||||
GELOGD("Atomic_clean_node tbe_kernel_name %s!", tbe_kernel->GetName().c_str()); | |||||
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, tbe_kernel->GetName()); | |||||
std::string kernel_name; | |||||
(void) AttrUtils::GetStr(atomic_op_desc, atomic_op_desc->GetName() + "_kernelname", kernel_name); | |||||
(void) AttrUtils::SetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name); | |||||
std::string meta_data; | |||||
(void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_METADATA, meta_data); | |||||
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_METADATA, meta_data); | |||||
std::string json_string; | |||||
(void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_MAGIC, json_string); | |||||
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_MAGIC, json_string); | |||||
return SUCCESS; | |||||
} | |||||
Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | ||||
// Add weight | // Add weight | ||||
ge_model.SetWeight(weight_buffer_); | ge_model.SetWeight(weight_buffer_); | ||||
@@ -662,6 +706,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||||
} | } | ||||
tbe_name_set.insert(tbe_kernel->GetName()); | tbe_name_set.insert(tbe_kernel->GetName()); | ||||
tbe_kernel_store_.AddTBEKernel(tbe_kernel); | tbe_kernel_store_.AddTBEKernel(tbe_kernel); | ||||
GE_CHK_STATUS_RET(SaveAtomicTBEKernel(node_op_desc), "[Save][TBEKernel] save atomic tbekernel failed!"); | |||||
} | } | ||||
SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types); | SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types); | ||||
@@ -89,6 +89,8 @@ class ModelBuilder { | |||||
void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types, | void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types, | ||||
std::set<std::string> &aicpu_tf_op_types); | std::set<std::string> &aicpu_tf_op_types); | ||||
Status SaveAtomicTBEKernel(const OpDescPtr &op_desc); | |||||
uint64_t session_id_; | uint64_t session_id_; | ||||
map<int64_t, size_t> mem_type_to_mem_offset_; | map<int64_t, size_t> mem_type_to_mem_offset_; | ||||
@@ -71,22 +71,22 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) | |||||
} | } | ||||
Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | ||||
auto op_desc_ptr = std::make_shared<OpDesc>(op_desc); | |||||
GE_CHECK_NOTNULL(op_desc_ptr); | |||||
auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||||
if (tbe_kernel == nullptr) { | |||||
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | |||||
rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | ||||
if (rt_ret != RT_ERROR_NONE || is_single_op_) { | if (rt_ret != RT_ERROR_NONE || is_single_op_) { | ||||
auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | |||||
GE_CHECK_NOTNULL(op_desc_ptr); | |||||
auto tbe_kernel = op_desc_ptr->TryGetExtAttr(GetKeyForTbeKernel(), TBEKernelPtr()); | |||||
if (tbe_kernel == nullptr) { | |||||
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | |||||
void *bin_handle = nullptr; | void *bin_handle = nullptr; | ||||
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | ||||
GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); | GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); | ||||
rtDevBinary_t binary; | rtDevBinary_t binary; | ||||
std::string json_string; | std::string json_string; | ||||
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), | |||||
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMagic(), json_string), | |||||
GELOGI("Get original type of session_graph_id.")); | GELOGI("Get original type of session_graph_id.")); | ||||
if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | ||||
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | ||||
@@ -104,7 +104,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
GELOGI("TBE: binary.length: %lu", binary.length); | GELOGI("TBE: binary.length: %lu", binary.length); | ||||
GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); | GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); | ||||
std::string meta_data; | std::string meta_data; | ||||
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data), | |||||
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMetaData(), meta_data), | |||||
GELOGI("Get original type of json_string")); | GELOGI("Get original type of json_string")); | ||||
GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | ||||
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | ||||
@@ -114,7 +114,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
kernel_store.ReferTBEHandle(stub_name_.c_str()); | kernel_store.ReferTBEHandle(stub_name_.c_str()); | ||||
} | } | ||||
std::string kernel_name; | std::string kernel_name; | ||||
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name), | |||||
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForKernelName(op_desc), kernel_name), | |||||
GELOGI("Get original type of kernel_name")); | GELOGI("Get original type of kernel_name")); | ||||
GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); | GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); | ||||
GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); | GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); | ||||
@@ -349,9 +349,6 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) | |||||
GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), | GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), | ||||
"Failed calc tiling data of node %s.", | "Failed calc tiling data of node %s.", | ||||
node->GetName().c_str()); | node->GetName().c_str()); | ||||
if (is_single_op_) { | |||||
tiling_info.clear_atomic = false; | |||||
} | |||||
GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); | GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -468,6 +465,22 @@ std::string AiCoreOpTask::GetKeyForOpParamSize() const { | |||||
return kAttrOpParamSize; | return kAttrOpParamSize; | ||||
} | } | ||||
std::string AiCoreOpTask::GetKeyForTbeKernel() const { | |||||
return OP_EXTATTR_NAME_TBE_KERNEL; | |||||
} | |||||
std::string AiCoreOpTask::GetKeyForTvmMagic() const { | |||||
return TVM_ATTR_NAME_MAGIC; | |||||
} | |||||
std::string AiCoreOpTask::GetKeyForTvmMetaData() const { | |||||
return TVM_ATTR_NAME_METADATA; | |||||
} | |||||
std::string AiCoreOpTask::GetKeyForKernelName(const OpDesc &op_desc) const { | |||||
return op_desc.GetName() + "_kernelname"; | |||||
} | |||||
Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def)); | GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def)); | ||||
return InitAtomicAddrCleanIndices(op_desc); | return InitAtomicAddrCleanIndices(op_desc); | ||||
@@ -524,6 +537,22 @@ std::string AtomicAddrCleanOpTask::GetKeyForOpParamSize() const { | |||||
return kAttrAtomicOpParamSize; | return kAttrAtomicOpParamSize; | ||||
} | } | ||||
std::string AtomicAddrCleanOpTask::GetKeyForTbeKernel() const { | |||||
return EXT_ATTR_ATOMIC_TBE_KERNEL; | |||||
} | |||||
std::string AtomicAddrCleanOpTask::GetKeyForTvmMagic() const { | |||||
return ATOMIC_ATTR_TVM_MAGIC; | |||||
} | |||||
std::string AtomicAddrCleanOpTask::GetKeyForTvmMetaData() const { | |||||
return ATOMIC_ATTR_TVM_METADATA; | |||||
} | |||||
std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) const { | |||||
return op_desc.GetName() + "_atomic_kernelname"; | |||||
} | |||||
Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | ||||
GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | ||||
GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), | GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), | ||||
@@ -81,6 +81,10 @@ class AiCoreOpTask { | |||||
protected: | protected: | ||||
Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
virtual std::string GetKeyForTbeKernel() const; | |||||
virtual std::string GetKeyForTvmMagic() const; | |||||
virtual std::string GetKeyForTvmMetaData() const; | |||||
virtual std::string GetKeyForKernelName(const OpDesc &op_desc) const; | |||||
virtual Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info); | virtual Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info); | ||||
std::unique_ptr<TensorBuffer> tiling_buffer_ = nullptr; | std::unique_ptr<TensorBuffer> tiling_buffer_ = nullptr; | ||||
@@ -119,6 +123,10 @@ class AtomicAddrCleanOpTask : public AiCoreOpTask { | |||||
protected: | protected: | ||||
std::string GetKeyForOpParamSize() const override; | std::string GetKeyForOpParamSize() const override; | ||||
std::string GetKeyForTbeKernel() const override; | |||||
std::string GetKeyForTvmMagic() const override; | |||||
std::string GetKeyForTvmMetaData() const override; | |||||
std::string GetKeyForKernelName(const OpDesc &op_desc) const override; | |||||
Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override; | Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override; | ||||
private: | private: | ||||
@@ -70,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<AiCoreNodeTask> &node_task, | |||||
auto atomic_task = | auto atomic_task = | ||||
std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | ||||
GE_CHECK_NOTNULL(atomic_task); | GE_CHECK_NOTNULL(atomic_task); | ||||
atomic_task->SetSingleOp(is_single_op); | |||||
GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | ||||
"[%s] Failed to init task for AtomicAddrClean", | "[%s] Failed to init task for AtomicAddrClean", | ||||
op_desc_->GetName().c_str()); | op_desc_->GetName().c_str()); | ||||
@@ -1 +1 @@ | |||||
Subproject commit 4ff5e3987f2e5d2980019defacaf0891861c84fc | |||||
Subproject commit 366b15574218befa11454311879a4f436eeb67a9 |
@@ -1 +1 @@ | |||||
Subproject commit 51fb6c4850906e8342598d47eccfca0b87ffea59 | |||||
Subproject commit d744541c6ca7f6966c1befacc9f83f53b0829e0a |
@@ -144,3 +144,20 @@ TEST_F(UtestModelBuilderTest, SetInputIsConst) { | |||||
ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false); | ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false); | ||||
EXPECT_EQ(builder.PreBuildModel(), SUCCESS); | EXPECT_EQ(builder.PreBuildModel(), SUCCESS); | ||||
} | } | ||||
TEST_F(UtestModelBuilderTest, test_save_atomic_bin) { | |||||
Graph2SubGraphInfoList subgraphs; | |||||
std::map<std::string, int> stream_max_parallel_num; | |||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||||
ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false); | |||||
auto atomic_op_desc = make_shared<OpDesc>("Atomic", "Atomic"); | |||||
auto kernel_buffer = static_cast<GeAttrValue::BYTES>(Buffer(10)); | |||||
AttrUtils::SetStr(atomic_op_desc, ATTR_NAME_TBE_KERNEL_NAME, "Atomic"); | |||||
AttrUtils::SetBytes(atomic_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer); | |||||
ge::NodePtr atomic_node = graph->AddNode(atomic_op_desc); | |||||
auto op_desc = make_shared<OpDesc>("Sum", "Sum"); | |||||
op_desc->SetExtAttr("atomic_clean_node_ptr", atomic_node); | |||||
EXPECT_EQ(builder.SaveAtomicTBEKernel(op_desc), SUCCESS); | |||||
} |
@@ -540,3 +540,18 @@ TEST_F(UtestGeHybrid, TestOptimizeDependenciesForConstInputs) { | |||||
ASSERT_EQ(dst_node_item->dependents_for_shape_inference.size(), 1); | ASSERT_EQ(dst_node_item->dependents_for_shape_inference.size(), 1); | ||||
ASSERT_EQ(dst_node_item->dependents_for_shape_inference[0], non_const_node_item->node); | ASSERT_EQ(dst_node_item->dependents_for_shape_inference[0], non_const_node_item->node); | ||||
} | } | ||||
TEST_F(UtestGeHybrid, test_key_for_kernel_bin) { | |||||
auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask()); | |||||
OpDesc op_desc("Sum", "Sum"); | |||||
EXPECT_EQ(aicore_task->GetKeyForTbeKernel(), OP_EXTATTR_NAME_TBE_KERNEL); | |||||
EXPECT_EQ(aicore_task->GetKeyForTvmMagic(), TVM_ATTR_NAME_MAGIC); | |||||
EXPECT_EQ(aicore_task->GetKeyForTvmMetaData(), TVM_ATTR_NAME_METADATA); | |||||
EXPECT_EQ(aicore_task->GetKeyForKernelName(op_desc), "Sum_kernelname"); | |||||
auto atomic_task = std::unique_ptr<hybrid::AtomicAddrCleanOpTask>(new(std::nothrow)hybrid::AtomicAddrCleanOpTask()); | |||||
EXPECT_EQ(atomic_task->GetKeyForTbeKernel(), EXT_ATTR_ATOMIC_TBE_KERNEL); | |||||
EXPECT_EQ(atomic_task->GetKeyForTvmMagic(), ATOMIC_ATTR_TVM_MAGIC); | |||||
EXPECT_EQ(atomic_task->GetKeyForTvmMetaData(), ATOMIC_ATTR_TVM_METADATA); | |||||
EXPECT_EQ(atomic_task->GetKeyForKernelName(op_desc), "Sum_atomic_kernelname"); | |||||
} |