| @@ -50,6 +50,7 @@ const char *const kIsInputVar = "INPUT_IS_VAR"; | |||||
| const char *const kIsOutputVar = "OUTPUT_IS_VAR"; | const char *const kIsOutputVar = "OUTPUT_IS_VAR"; | ||||
| const char *const kProfilingMode = "PROFILING_MODE"; | const char *const kProfilingMode = "PROFILING_MODE"; | ||||
| const char *const kIteratorV2 = "IteratorV2"; | const char *const kIteratorV2 = "IteratorV2"; | ||||
| const char *const kKernelInfoNameHccl = "ops_kernel_info_hccl"; | |||||
| const uint32_t kProfilingArStep = 2; | const uint32_t kProfilingArStep = 2; | ||||
| const uint64_t kProfilingFpStartLogid = 1; | const uint64_t kProfilingFpStartLogid = 1; | ||||
| const uint64_t kProfilingBpEndLogid = 2; | const uint64_t kProfilingBpEndLogid = 2; | ||||
| @@ -437,14 +438,15 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| } | } | ||||
| // Reset stream id to ge stream id, as graph load must use ge stream to reassign stream | // Reset stream id to ge stream id, as graph load must use ge stream to reassign stream | ||||
| void *ops_kernel_info_store_ptr = kernel_info_store.get(); | |||||
| for (size_t idx = task_list_size_before; idx < task_list_size_after; ++idx) { | for (size_t idx = task_list_size_before; idx < task_list_size_after; ++idx) { | ||||
| task_def_list[idx].set_stream_id(static_cast<uint32_t>(stream_id)); | task_def_list[idx].set_stream_id(static_cast<uint32_t>(stream_id)); | ||||
| op_name_map[idx] = name; | op_name_map[idx] = name; | ||||
| // Set opsKernelInfoStorePtr and op_index, the two fields be use in DistributeTask and InitTaskInfo | |||||
| TaskDef *task_def_ptr = &task_def_list[idx]; | TaskDef *task_def_ptr = &task_def_list[idx]; | ||||
| GE_CHECK_NOTNULL(task_def_ptr); | GE_CHECK_NOTNULL(task_def_ptr); | ||||
| task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr)); | |||||
| // Set opsKernelInfoStorePtr for hccl which will be use in DistributeTask and InitTaskInfo | |||||
| if (op_kernel_lib_name == kKernelInfoNameHccl) { | |||||
| task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(kernel_info_store.get())); | |||||
| } | |||||
| } | } | ||||
| GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).", | GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).", | ||||
| op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, | op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, | ||||
| @@ -105,10 +105,7 @@ class SubGraphInfo { | |||||
| std::vector<bool> output_flag_; | std::vector<bool> output_flag_; | ||||
| ModelIdInfo model_id_info_; | ModelIdInfo model_id_info_; | ||||
| GeModelPtr ge_model_ptr_; | GeModelPtr ge_model_ptr_; | ||||
| bool malloc_flag_; | |||||
| std::vector<void *> buffer_addr_; | |||||
| std::string output_names_; | std::string output_names_; | ||||
| std::vector<uint32_t> buffer_size_; | |||||
| std::string stream_label_; | std::string stream_label_; | ||||
| std::unordered_map<ge::NodePtr, ge::NodePtr> end_to_pld_; | std::unordered_map<ge::NodePtr, ge::NodePtr> end_to_pld_; | ||||
| std::unordered_map<ge::NodePtr, ge::NodePtr> pld_to_end_; | std::unordered_map<ge::NodePtr, ge::NodePtr> pld_to_end_; | ||||
| @@ -433,11 +433,13 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
| if (!inputs_size.empty()) { | if (!inputs_size.empty()) { | ||||
| StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); | StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); | ||||
| GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); | GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); | ||||
| GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(input_desc, input_buffers)); | |||||
| } | } | ||||
| if (hybrid_model_executor_ != nullptr) { | if (hybrid_model_executor_ != nullptr) { | ||||
| GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | ||||
| if (!inputs_size.empty()) { | |||||
| GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(input_desc, input_buffers)); | |||||
| } | |||||
| hybrid::HybridModelExecutor::ExecuteArgs args; | hybrid::HybridModelExecutor::ExecuteArgs args; | ||||
| GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, output_buffers, input_desc, args)); | GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, output_buffers, input_desc, args)); | ||||
| @@ -293,6 +293,9 @@ Status TbeOpTask::UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, cons | |||||
| } | } | ||||
| Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { | Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { | ||||
| node_ = node; | |||||
| tiling_buffer_ = tiling_buffer; | |||||
| max_tiling_size_ = max_tiling_size; | |||||
| if (tiling_buffer != nullptr) { | if (tiling_buffer != nullptr) { | ||||
| uintptr_t *arg_base = nullptr; | uintptr_t *arg_base = nullptr; | ||||
| size_t arg_num = 0; | size_t arg_num = 0; | ||||
| @@ -310,9 +313,6 @@ Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, | |||||
| } | } | ||||
| arg_base[tiling_index] = reinterpret_cast<uintptr_t>(tiling_buffer); | arg_base[tiling_index] = reinterpret_cast<uintptr_t>(tiling_buffer); | ||||
| } | } | ||||
| node_ = node; | |||||
| tiling_buffer_ = tiling_buffer; | |||||
| max_tiling_size_ = max_tiling_size; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -481,6 +481,25 @@ void TbeOpTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | |||||
| } | } | ||||
| } | } | ||||
| Status AtomicAddrCleanOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { | |||||
| node_ = node; | |||||
| tiling_buffer_ = tiling_buffer; | |||||
| max_tiling_size_ = max_tiling_size; | |||||
| if (tiling_buffer != nullptr) { | |||||
| uintptr_t *arg_base = nullptr; | |||||
| size_t arg_num = 0; | |||||
| GetIoAddr(arg_base, arg_num); | |||||
| uint32_t tiling_index = atomic_output_indices_.size(); | |||||
| if (arg_num == 0 || arg_num < tiling_index) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.", | |||||
| tiling_index, arg_num); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| arg_base[tiling_index] = reinterpret_cast<uintptr_t>(tiling_buffer); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AtomicAddrCleanOpTask::UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | Status AtomicAddrCleanOpTask::UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | ||||
| const vector<GeTensorDesc> &output_desc) { | const vector<GeTensorDesc> &output_desc) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -97,7 +97,7 @@ class TbeOpTask : public OpTask { | |||||
| const void *GetArgs() const; | const void *GetArgs() const; | ||||
| size_t GetArgSize() const; | size_t GetArgSize() const; | ||||
| const std::string &GetStubName() const; | const std::string &GetStubName() const; | ||||
| Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size); | |||||
| virtual Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size); | |||||
| const std::string &GetTaskType() const override; | const std::string &GetTaskType() const override; | ||||
| void SetHandle(void *handle); | void SetHandle(void *handle); | ||||
| @@ -149,6 +149,7 @@ class TbeOpTask : public OpTask { | |||||
| class AtomicAddrCleanOpTask : public TbeOpTask { | class AtomicAddrCleanOpTask : public TbeOpTask { | ||||
| public: | public: | ||||
| Status InitAtomicAddrCleanIndices(); | Status InitAtomicAddrCleanIndices(); | ||||
| Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) override; | |||||
| private: | private: | ||||
| Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | ||||
| @@ -156,8 +157,8 @@ class AtomicAddrCleanOpTask : public TbeOpTask { | |||||
| Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) override; | Status UpdateIoAddr(const vector<DataBuffer> &inputs, const vector<DataBuffer> &outputs) override; | ||||
| Status UpdateTilingArgs(rtStream_t stream) override; | Status UpdateTilingArgs(rtStream_t stream) override; | ||||
| Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info) override; | Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info) override; | ||||
| std::vector<int> atomic_output_indices_; | |||||
| std::vector<int> atomic_output_indices_; | |||||
| }; | }; | ||||
| class AiCpuBaseTask : public OpTask { | class AiCpuBaseTask : public OpTask { | ||||
| @@ -425,7 +425,7 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | |||||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | ||||
| } | } | ||||
| task.EnableDynamicSupport(node_, tiling_buffer, static_cast<uint32_t>(max_size)); | |||||
| GE_CHK_STATUS_RET_NOLOG(task.EnableDynamicSupport(node_, tiling_buffer, static_cast<uint32_t>(max_size))); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -29,6 +29,8 @@ | |||||
| #define protected public | #define protected public | ||||
| #define private public | #define private public | ||||
| #include "init/gelib.h" | |||||
| #include "ge/opskernel_manager/ops_kernel_builder_manager.h" | |||||
| #include "graph/build/task_generator.h" | #include "graph/build/task_generator.h" | ||||
| #include "graph/manager/graph_mem_manager.h" | #include "graph/manager/graph_mem_manager.h" | ||||
| #include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
| @@ -41,9 +43,46 @@ using namespace ge; | |||||
| namespace { | namespace { | ||||
| const char *const kIsInputVar = "INPUT_IS_VAR"; | const char *const kIsInputVar = "INPUT_IS_VAR"; | ||||
| const char *const kIsOutputVar = "OUTPUT_IS_VAR"; | const char *const kIsOutputVar = "OUTPUT_IS_VAR"; | ||||
| } | |||||
| const char *const kKernelInfoNameHccl = "ops_kernel_info_hccl"; | |||||
| } // namespace | |||||
| class UtestTaskGeneratorTest : public testing::Test { | class UtestTaskGeneratorTest : public testing::Test { | ||||
| public: | public: | ||||
| struct FakeOpsKernelBuilder : OpsKernelBuilder { | |||||
| FakeOpsKernelBuilder(){}; | |||||
| private: | |||||
| Status Initialize(const map<std::string, std::string> &options) override { | |||||
| return SUCCESS; | |||||
| }; | |||||
| Status Finalize() override { | |||||
| return SUCCESS; | |||||
| }; | |||||
| Status CalcOpRunningParam(Node &node) override { | |||||
| return SUCCESS; | |||||
| }; | |||||
| Status GenerateTask(const Node &node, RunContext &context, std::vector<domi::TaskDef> &tasks) override { | |||||
| domi::TaskDef task_def; | |||||
| tasks.push_back(task_def); | |||||
| return SUCCESS; | |||||
| }; | |||||
| }; | |||||
| struct FakeOpsKernelInfoStore : OpsKernelInfoStore { | |||||
| FakeOpsKernelInfoStore() = default; | |||||
| private: | |||||
| Status Initialize(const std::map<std::string, std::string> &options) override { | |||||
| return SUCCESS; | |||||
| }; | |||||
| Status Finalize() override { | |||||
| return SUCCESS; | |||||
| }; | |||||
| bool CheckSupported(const OpDescPtr &op_desc, std::string &reason) const override { | |||||
| return true; | |||||
| }; | |||||
| void GetAllOpsKernelInfo(std::map<std::string, ge::OpInfo> &infos) const override{}; | |||||
| }; | |||||
| ge::ComputeGraphPtr BuildGraphFpProfiling() { | ge::ComputeGraphPtr BuildGraphFpProfiling() { | ||||
| ge::ut::GraphBuilder builder("graph"); | ge::ut::GraphBuilder builder("graph"); | ||||
| auto data = builder.AddNode("data", "phony", 1, 1); | auto data = builder.AddNode("data", "phony", 1, 1); | ||||
| @@ -95,6 +134,14 @@ class UtestTaskGeneratorTest : public testing::Test { | |||||
| return builder.GetGraph(); | return builder.GetGraph(); | ||||
| } | } | ||||
| ge::ComputeGraphPtr BuildHcclGraph() { | |||||
| ge::ut::GraphBuilder builder("graph"); | |||||
| auto hccl_node = builder.AddNode("hccl_phony_node", "HCCL_PHONY", 0, 0); | |||||
| auto op_desc = hccl_node->GetOpDesc(); | |||||
| op_desc->SetOpKernelLibName(kKernelInfoNameHccl); | |||||
| op_desc->SetStreamId(0); | |||||
| return builder.GetGraph(); | |||||
| } | |||||
| protected: | protected: | ||||
| void SetUp() {} | void SetUp() {} | ||||
| @@ -156,3 +203,31 @@ TEST_F(UtestTaskGeneratorTest, AutoFindBpOpIndex) { | |||||
| output_desc->SetName("hcom"); | output_desc->SetName("hcom"); | ||||
| EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS); | EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS); | ||||
| } | } | ||||
| TEST_F(UtestTaskGeneratorTest, GenerateTask) { | |||||
| map<string, string> options; | |||||
| Status ret = ge::GELib::Initialize(options); | |||||
| EXPECT_EQ(ret, SUCCESS); | |||||
| shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||||
| EXPECT_NE(instance_ptr, nullptr); | |||||
| OpsKernelInfoStorePtr ops_kernel_info_store_ptr = MakeShared<FakeOpsKernelInfoStore>(); | |||||
| instance_ptr->opsManager_.ops_kernel_store_.insert(make_pair(kKernelInfoNameHccl, ops_kernel_info_store_ptr)); | |||||
| OpsKernelBuilderManager &builder_manager_instance_ptr = ge::OpsKernelBuilderManager::Instance(); | |||||
| OpsKernelBuilderPtr fake_builder = MakeShared<FakeOpsKernelBuilder>(); | |||||
| builder_manager_instance_ptr.ops_kernel_builders_[kKernelInfoNameHccl] = fake_builder; | |||||
| auto graph = BuildHcclGraph(); | |||||
| TaskGenerator task_generator(nullptr, 0); | |||||
| RunContext run_context; | |||||
| run_context.graphStreamList.push_back(static_cast<void *>(ops_kernel_info_store_ptr.get())); | |||||
| vector<uint32_t> all_reduce_nodes; | |||||
| vector<domi::TaskDef> task_def_list; | |||||
| map<uint32_t, string> op_name_map; | |||||
| EXPECT_EQ(task_generator.GenerateTask(run_context, graph, task_def_list, op_name_map), SUCCESS); | |||||
| EXPECT_EQ(task_def_list.size(), 1); | |||||
| EXPECT_EQ(task_def_list[0].ops_kernel_store_ptr(), reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr.get())); | |||||
| } | |||||
| @@ -237,6 +237,7 @@ TEST_F(UtestSingleOpTask, test_aicpu_task_update_io_addr) { | |||||
| ASSERT_EQ(ret, PARAM_INVALID); | ASSERT_EQ(ret, PARAM_INVALID); | ||||
| } | } | ||||
| } | } | ||||
| TEST_F(UtestSingleOpTask, test_aicpu_task_launch_kernel) { | TEST_F(UtestSingleOpTask, test_aicpu_task_launch_kernel) { | ||||
| AiCpuCCTask task; | AiCpuCCTask task; | ||||
| rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
| @@ -258,4 +259,27 @@ TEST_F(UtestSingleOpTask, test_aicpu_task_launch_kernel) { | |||||
| ASSERT_EQ(task.InitForSummaryAndCopy(), SUCCESS); | ASSERT_EQ(task.InitForSummaryAndCopy(), SUCCESS); | ||||
| ASSERT_EQ(task.LaunchKernel(inputs_desc, inputs, outputs_desc, outputs, stream), SUCCESS); | ASSERT_EQ(task.LaunchKernel(inputs_desc, inputs, outputs_desc, outputs, stream), SUCCESS); | ||||
| } | } | ||||
| TEST_F(UtestSingleOpTask, test_dynamic_support) { | |||||
| auto graph = make_shared<ComputeGraph>("graph"); | |||||
| auto op_desc = make_shared<OpDesc>("Add", "Add"); | |||||
| auto node = graph->AddNode(op_desc); | |||||
| AtomicAddrCleanOpTask atomic_task; | |||||
| TbeOpTask tbe_task; | |||||
| tbe_task.arg_size_ = sizeof(void *) * 1; | |||||
| tbe_task.args_.reset(new (std::nothrow) uint8_t[tbe_task.arg_size_]); | |||||
| atomic_task.arg_size_ = sizeof(void *) * 1; | |||||
| atomic_task.args_.reset(new (std::nothrow) uint8_t[atomic_task.arg_size_]); | |||||
| ASSERT_EQ(tbe_task.EnableDynamicSupport(node, (void *)0x0001, 1), ACL_ERROR_GE_INTERNAL_ERROR); | |||||
| ASSERT_EQ(atomic_task.EnableDynamicSupport(node, (void *)0x0001, 1), ACL_ERROR_GE_INTERNAL_ERROR); | |||||
| tbe_task.arg_size_ = sizeof(void *) * 2; | |||||
| tbe_task.args_.reset(new (std::nothrow) uint8_t[tbe_task.arg_size_]); | |||||
| atomic_task.arg_size_ = sizeof(void *) * 2; | |||||
| atomic_task.args_.reset(new (std::nothrow) uint8_t[atomic_task.arg_size_]); | |||||
| ASSERT_EQ(tbe_task.EnableDynamicSupport(node, (void *)0x0001, 1), SUCCESS); | |||||
| ASSERT_EQ(atomic_task.EnableDynamicSupport(node, (void *)0x0001, 1), SUCCESS); | |||||
| tbe_task.tiling_buffer_ = nullptr; | |||||
| atomic_task.tiling_buffer_ = nullptr; | |||||
| } | } | ||||