From 48be801e80aaac9e2b06d1c209fb92adb07064b4 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Wed, 23 Jun 2021 16:49:10 +0800 Subject: [PATCH] Fix mem leak and recursive depth protection --- ge/common/ge/tbe_plugin_manager.cc | 14 +++++++ ge/session/omg.cc | 13 ++++++ ge/single_op/single_op_model.cc | 8 ++-- ge/single_op/single_op_model.h | 2 +- ge/single_op/task/aicpu_task_builder.cc | 8 ++-- ge/single_op/task/aicpu_task_builder.h | 4 +- ge/single_op/task/op_task.cc | 4 +- ge/single_op/task/op_task.h | 1 - tests/ut/ge/CMakeLists.txt | 1 + .../ge/common/tbe_plugin_manager_unittest.cc | 40 +++++++++++++++++++ tests/ut/ge/session/omg_omg_unittest.cc | 9 +++++ .../ge/single_op/single_op_model_unittest.cc | 24 +++++++++++ 12 files changed, 112 insertions(+), 16 deletions(-) create mode 100644 tests/ut/ge/common/tbe_plugin_manager_unittest.cc diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc index 94ba8a9a..c876e300 100755 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -105,17 +105,29 @@ void TBEPluginManager::ProcessSoFullName(vector &file_list, string &caff } void TBEPluginManager::FindParserSo(const string &path, vector &file_list, string &caffe_parser_path) { + static uint32_t temp_depth = 0; + static const uint32_t max_recursive_depth = 20; // For recursive depth protection + + temp_depth++; + if (temp_depth >= max_recursive_depth) { + GELOGW("Recursive depth is become %u, Please check input!", temp_depth); + temp_depth--; + return; + } + // Path, change to absolute path string real_path = RealPath(path.c_str()); // Plugin path does not exist if (real_path.empty()) { GELOGW("RealPath is empty."); + temp_depth--; return; } INT32 is_dir = mmIsDir(real_path.c_str()); // Lib plugin path not exist if (is_dir != EN_OK) { GELOGW("%s is not a dir. errmsg:%s", real_path.c_str(), strerror(errno)); + temp_depth--; return; } @@ -123,6 +135,7 @@ void TBEPluginManager::FindParserSo(const string &path, vector &file_lis auto ret = mmScandir(real_path.c_str(), &entries, nullptr, nullptr); if (ret < EN_OK) { GELOGW("scan dir failed. path = %s, ret = %d, errmsg = %s", real_path.c_str(), ret, strerror(errno)); + temp_depth--; return; } for (int i = 0; i < ret; ++i) { @@ -142,6 +155,7 @@ void TBEPluginManager::FindParserSo(const string &path, vector &file_lis } } mmScandirFree(entries, ret); + temp_depth--; } void TBEPluginManager::GetPluginSoFileList(const string &path, vector &file_list, string &caffe_parser_path) { diff --git a/ge/session/omg.cc b/ge/session/omg.cc index f7f3def7..8d826043 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -221,14 +221,25 @@ static Status ParseOutputFp16NodesFormat(const string &is_output_fp16) { } void FindParserSo(const string &path, vector &file_list, string &caffe_parser_path) { + static uint32_t temp_depth = 0; + static const uint32_t max_recursive_depth = 20; // For recursive depth protection + + temp_depth++; + if (temp_depth >= max_recursive_depth) { + GELOGW("Recursive depth is become %u, Please check input!", temp_depth); + temp_depth--; + return; + } // path, Change to absolute path string real_path = RealPath(path.c_str()); if (real_path.empty()) { // plugin path does not exist + temp_depth--; return; } struct stat stat_buf; if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) { GELOGI("The path %s is not a directory.", real_path.c_str()); + temp_depth--; return; } @@ -237,6 +248,7 @@ void FindParserSo(const string &path, vector &file_list, string &caffe_p if (nullptr == dir) { // plugin path does not exist GELOGW("Open directory %s failed.", path.c_str()); + temp_depth--; return; } @@ -260,6 +272,7 @@ void FindParserSo(const string &path, vector &file_list, string &caffe_p FindParserSo(full_name, file_list, caffe_parser_path); } closedir(dir); + temp_depth--; return; } diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index eefa5165..1d00127a 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -380,7 +380,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s uint64_t singleop_kernel_id = aicpu_kernel_id++; GELOGI("Build singleOp TfTask, kernel_id = %lu", singleop_kernel_id); GE_CHK_STATUS_RET_NOLOG( - BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_kernel_id)); + BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, depend_compute_flag, singleop_kernel_id)); aicpu_task->SetModelArgs(model_name_, model_id_); ParseArgTable(aicpu_task, single_op); single_op.tasks_.emplace_back(aicpu_task); @@ -458,7 +458,7 @@ Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask * } Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, - bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id) { + bool& depend_compute_flag, uint64_t kernel_id) { auto iter = op_list_.find(kernel_def.op_index()); if (iter == op_list_.end()) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, @@ -476,7 +476,7 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC return ACL_ERROR_GE_MEMORY_ALLOCATION; } auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def); - auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, kernel_id); + auto ret = builder.BuildTask(*aicpu_task, model_params_, kernel_id); if (ret != SUCCESS) { GELOGE(ret, "[Build][Task] failed, kernel_id:%lu.", kernel_id); return ret; @@ -631,7 +631,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, bool depend_compute_flag = false; uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); - GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true, + GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, depend_compute_flag, dynamic_singleop_kernel_id)); if (depend_compute_flag) { if (i >= tasks.size() - 1) { diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index bf3ad050..747d99e9 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -70,7 +70,7 @@ class SingleOpModel { Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, - bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); + bool& depend_compute_flag, uint64_t kernel_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def, DynamicSingleOp &single_op); diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 805b1306..1b945280 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -63,7 +63,7 @@ namespace ge { return SUCCESS; } - Status AiCpuTaskBuilder::InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag) { + Status AiCpuTaskBuilder::InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m) { if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size()); @@ -83,9 +83,8 @@ namespace ge { return SUCCESS; } - Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, - bool dynamic_flag, uint64_t kernel_id) { - GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(task, param, dynamic_flag)); + Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, uint64_t kernel_id) { + GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(task, param)); STR_FWK_OP_KERNEL fwk_op_kernel = {0}; auto ret = SetFmkOpKernel(task.io_addr_, task.workspace_addr_, fwk_op_kernel); @@ -124,7 +123,6 @@ namespace ge { task.arg_size_ = sizeof(STR_FWK_OP_KERNEL); task.op_type_ = op_desc_->GetName(); task.task_info_ = kernel_def_.task_info(); - task.dynamic_flag_ = dynamic_flag; task.kernel_id_ = kernel_id; auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); diff --git a/ge/single_op/task/aicpu_task_builder.h b/ge/single_op/task/aicpu_task_builder.h index fe9c9bc2..eca91254 100755 --- a/ge/single_op/task/aicpu_task_builder.h +++ b/ge/single_op/task/aicpu_task_builder.h @@ -29,12 +29,12 @@ namespace ge { AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def); ~AiCpuTaskBuilder() = default; - Status BuildTask(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, uint64_t kernel_id); + Status BuildTask(AiCpuTask &task, const SingleOpModelParam ¶m, uint64_t kernel_id); private: static Status SetKernelArgs(void **args, STR_FWK_OP_KERNEL &kernel); Status SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &kernel); - Status InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag); + Status InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m); const OpDescPtr op_desc_; const domi::KernelExDef &kernel_def_; diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index db2fdfeb..b6a78f9e 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -621,9 +621,7 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector &inputs, const vecto AiCpuTask::~AiCpuTask() { FreeHbm(args_); FreeHbm(io_addr_); - if (dynamic_flag_) { - FreeHbm(workspace_addr_); - } + FreeHbm(workspace_addr_); FreeHbm(copy_workspace_buf_); FreeHbm(copy_ioaddr_dev_); FreeHbm(copy_input_release_flag_dev_); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 2fbb4dc7..19320bc0 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -192,7 +192,6 @@ class AiCpuTask : public AiCpuBaseTask { // host addr std::vector io_addr_host_; - bool dynamic_flag_ = false; // for copy task void *copy_task_args_buf_ = nullptr; void *copy_workspace_buf_ = nullptr; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index cf573343..e3aecf80 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -818,6 +818,7 @@ set(MULTI_PARTS_TEST_FILES "session/inner_session_unittest.cc" "session/session_manager_unittest.cc" "common/host_cpu_engine_unittest.cc" + "common/tbe_plugin_manager_unittest.cc" ) set(GE_OPT_INFO_TEST_FILES diff --git a/tests/ut/ge/common/tbe_plugin_manager_unittest.cc b/tests/ut/ge/common/tbe_plugin_manager_unittest.cc new file mode 100644 index 00000000..16c1650b --- /dev/null +++ b/tests/ut/ge/common/tbe_plugin_manager_unittest.cc @@ -0,0 +1,40 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define protected public +#define private public +#include "common/ge/tbe_plugin_manager.h" +#undef private +#undef protected + +namespace ge { +class UtestTBEPluginManager: public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +TEST_F(UtestTBEPluginManager, CheckFindParserSo) { + string path = ""; + vector file_list = {}; + string caffe_parser_path = ""; + TBEPluginManager::Instance().FindParserSo(path, file_list, caffe_parser_path); + path = "/lib64"; + TBEPluginManager::Instance().FindParserSo(path, file_list, caffe_parser_path); +} +} // namespace ge diff --git a/tests/ut/ge/session/omg_omg_unittest.cc b/tests/ut/ge/session/omg_omg_unittest.cc index 334df319..6176b7c0 100644 --- a/tests/ut/ge/session/omg_omg_unittest.cc +++ b/tests/ut/ge/session/omg_omg_unittest.cc @@ -48,4 +48,13 @@ TEST_F(UtestOmg, display_model_info_success) { attr_def->mutable_list()->add_i(4); PrintModelInfo(&model_def, 1); } + +TEST_F(UtestOmg, find_parser_so) { + string path = ""; + vector file_list = {}; + string caffe_parser_path = ""; + FindParserSo(path, file_list, caffe_parser_path); + path = "/lib64"; + FindParserSo(path, file_list, caffe_parser_path); +} } // namespace ge diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index 1975f9f4..e4a53340 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -310,3 +310,27 @@ TEST_F(UtestSingleOpModel, BuildTaskList) { MemcpyAsyncTask mem_task; ASSERT_EQ(mem_task.LaunchKernel(0), SUCCESS); } + +TEST_F(UtestSingleOpModel, build_aicpu_task) { + ComputeGraphPtr graph = make_shared("single_op"); + GeModelPtr ge_model = make_shared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); + shared_ptr model_task_def = make_shared(); + ge_model->SetModelTaskDef(model_task_def); + + domi::TaskDef *task_def = model_task_def->add_task(); + task_def->set_type(RT_MODEL_TASK_KERNEL_EX); + + string model_data_str = "123456789"; + SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); + std::mutex stream_mu; + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + DynamicSingleOp single_op(0, &stream_mu, stream); + model.model_helper_.model_ = ge_model; + auto op_desc = std::make_shared("add", "Add"); + NodePtr node = graph->AddNode(op_desc); + model.op_list_[0] = node; + StreamResource *res = new (std::nothrow) StreamResource(1); + ASSERT_EQ(model.BuildTaskListForDynamicOp(res, single_op), SUCCESS); +}