Browse Source

Fix mem leak and recursive depth protection

tags/v1.5.1
zhaozhixuan 3 years ago
parent
commit
48be801e80
12 changed files with 112 additions and 16 deletions
  1. +14
    -0
      ge/common/ge/tbe_plugin_manager.cc
  2. +13
    -0
      ge/session/omg.cc
  3. +4
    -4
      ge/single_op/single_op_model.cc
  4. +1
    -1
      ge/single_op/single_op_model.h
  5. +3
    -5
      ge/single_op/task/aicpu_task_builder.cc
  6. +2
    -2
      ge/single_op/task/aicpu_task_builder.h
  7. +1
    -3
      ge/single_op/task/op_task.cc
  8. +0
    -1
      ge/single_op/task/op_task.h
  9. +1
    -0
      tests/ut/ge/CMakeLists.txt
  10. +40
    -0
      tests/ut/ge/common/tbe_plugin_manager_unittest.cc
  11. +9
    -0
      tests/ut/ge/session/omg_omg_unittest.cc
  12. +24
    -0
      tests/ut/ge/single_op/single_op_model_unittest.cc

+ 14
- 0
ge/common/ge/tbe_plugin_manager.cc View File

@@ -105,17 +105,29 @@ void TBEPluginManager::ProcessSoFullName(vector<string> &file_list, string &caff
}

void TBEPluginManager::FindParserSo(const string &path, vector<string> &file_list, string &caffe_parser_path) {
static uint32_t temp_depth = 0;
static const uint32_t max_recursive_depth = 20; // For recursive depth protection

temp_depth++;
if (temp_depth >= max_recursive_depth) {
GELOGW("Recursive depth is become %u, Please check input!", temp_depth);
temp_depth--;
return;
}

// Path, change to absolute path
string real_path = RealPath(path.c_str());
// Plugin path does not exist
if (real_path.empty()) {
GELOGW("RealPath is empty.");
temp_depth--;
return;
}
INT32 is_dir = mmIsDir(real_path.c_str());
// Lib plugin path not exist
if (is_dir != EN_OK) {
GELOGW("%s is not a dir. errmsg:%s", real_path.c_str(), strerror(errno));
temp_depth--;
return;
}

@@ -123,6 +135,7 @@ void TBEPluginManager::FindParserSo(const string &path, vector<string> &file_lis
auto ret = mmScandir(real_path.c_str(), &entries, nullptr, nullptr);
if (ret < EN_OK) {
GELOGW("scan dir failed. path = %s, ret = %d, errmsg = %s", real_path.c_str(), ret, strerror(errno));
temp_depth--;
return;
}
for (int i = 0; i < ret; ++i) {
@@ -142,6 +155,7 @@ void TBEPluginManager::FindParserSo(const string &path, vector<string> &file_lis
}
}
mmScandirFree(entries, ret);
temp_depth--;
}

void TBEPluginManager::GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path) {


+ 13
- 0
ge/session/omg.cc View File

@@ -221,14 +221,25 @@ static Status ParseOutputFp16NodesFormat(const string &is_output_fp16) {
}

void FindParserSo(const string &path, vector<string> &file_list, string &caffe_parser_path) {
static uint32_t temp_depth = 0;
static const uint32_t max_recursive_depth = 20; // For recursive depth protection

temp_depth++;
if (temp_depth >= max_recursive_depth) {
GELOGW("Recursive depth is become %u, Please check input!", temp_depth);
temp_depth--;
return;
}
// path, Change to absolute path
string real_path = RealPath(path.c_str());
if (real_path.empty()) { // plugin path does not exist
temp_depth--;
return;
}
struct stat stat_buf;
if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) {
GELOGI("The path %s is not a directory.", real_path.c_str());
temp_depth--;
return;
}

@@ -237,6 +248,7 @@ void FindParserSo(const string &path, vector<string> &file_list, string &caffe_p

if (nullptr == dir) { // plugin path does not exist
GELOGW("Open directory %s failed.", path.c_str());
temp_depth--;
return;
}

@@ -260,6 +272,7 @@ void FindParserSo(const string &path, vector<string> &file_list, string &caffe_p
FindParserSo(full_name, file_list, caffe_parser_path);
}
closedir(dir);
temp_depth--;
return;
}



+ 4
- 4
ge/single_op/single_op_model.cc View File

@@ -380,7 +380,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
uint64_t singleop_kernel_id = aicpu_kernel_id++;
GELOGI("Build singleOp TfTask, kernel_id = %lu", singleop_kernel_id);
GE_CHK_STATUS_RET_NOLOG(
BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_kernel_id));
BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, depend_compute_flag, singleop_kernel_id));
aicpu_task->SetModelArgs(model_name_, model_id_);
ParseArgTable(aicpu_task, single_op);
single_op.tasks_.emplace_back(aicpu_task);
@@ -458,7 +458,7 @@ Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask *
}

Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id) {
bool& depend_compute_flag, uint64_t kernel_id) {
auto iter = op_list_.find(kernel_def.op_index());
if (iter == op_list_.end()) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
@@ -476,7 +476,7 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def);
auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, kernel_id);
auto ret = builder.BuildTask(*aicpu_task, model_params_, kernel_id);
if (ret != SUCCESS) {
GELOGE(ret, "[Build][Task] failed, kernel_id:%lu.", kernel_id);
return ret;
@@ -631,7 +631,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource,
bool depend_compute_flag = false;
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;
GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id);
GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true,
GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task,
depend_compute_flag, dynamic_singleop_kernel_id));
if (depend_compute_flag) {
if (i >= tasks.size() - 1) {


+ 1
- 1
ge/single_op/single_op_model.h View File

@@ -70,7 +70,7 @@ class SingleOpModel {
Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op);
Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task);
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id);
bool& depend_compute_flag, uint64_t kernel_id);
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);
Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def,
DynamicSingleOp &single_op);


+ 3
- 5
ge/single_op/task/aicpu_task_builder.cc View File

@@ -63,7 +63,7 @@ namespace ge {
return SUCCESS;
}

Status AiCpuTaskBuilder::InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam &param, bool dynamic_flag) {
Status AiCpuTaskBuilder::InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam &param) {
if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size());
@@ -83,9 +83,8 @@ namespace ge {
return SUCCESS;
}

Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam &param,
bool dynamic_flag, uint64_t kernel_id) {
GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(task, param, dynamic_flag));
Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam &param, uint64_t kernel_id) {
GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(task, param));

STR_FWK_OP_KERNEL fwk_op_kernel = {0};
auto ret = SetFmkOpKernel(task.io_addr_, task.workspace_addr_, fwk_op_kernel);
@@ -124,7 +123,6 @@ namespace ge {
task.arg_size_ = sizeof(STR_FWK_OP_KERNEL);
task.op_type_ = op_desc_->GetName();
task.task_info_ = kernel_def_.task_info();
task.dynamic_flag_ = dynamic_flag;
task.kernel_id_ = kernel_id;

auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_);


+ 2
- 2
ge/single_op/task/aicpu_task_builder.h View File

@@ -29,12 +29,12 @@ namespace ge {
AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def);
~AiCpuTaskBuilder() = default;

Status BuildTask(AiCpuTask &task, const SingleOpModelParam &param, bool dynamic_flag, uint64_t kernel_id);
Status BuildTask(AiCpuTask &task, const SingleOpModelParam &param, uint64_t kernel_id);

private:
static Status SetKernelArgs(void **args, STR_FWK_OP_KERNEL &kernel);
Status SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &kernel);
Status InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam &param, bool dynamic_flag);
Status InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam &param);

const OpDescPtr op_desc_;
const domi::KernelExDef &kernel_def_;


+ 1
- 3
ge/single_op/task/op_task.cc View File

@@ -621,9 +621,7 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vecto
AiCpuTask::~AiCpuTask() {
FreeHbm(args_);
FreeHbm(io_addr_);
if (dynamic_flag_) {
FreeHbm(workspace_addr_);
}
FreeHbm(workspace_addr_);
FreeHbm(copy_workspace_buf_);
FreeHbm(copy_ioaddr_dev_);
FreeHbm(copy_input_release_flag_dev_);


+ 0
- 1
ge/single_op/task/op_task.h View File

@@ -192,7 +192,6 @@ class AiCpuTask : public AiCpuBaseTask {
// host addr
std::vector<void *> io_addr_host_;

bool dynamic_flag_ = false;
// for copy task
void *copy_task_args_buf_ = nullptr;
void *copy_workspace_buf_ = nullptr;


+ 1
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -818,6 +818,7 @@ set(MULTI_PARTS_TEST_FILES
"session/inner_session_unittest.cc"
"session/session_manager_unittest.cc"
"common/host_cpu_engine_unittest.cc"
"common/tbe_plugin_manager_unittest.cc"
)

set(GE_OPT_INFO_TEST_FILES


+ 40
- 0
tests/ut/ge/common/tbe_plugin_manager_unittest.cc View File

@@ -0,0 +1,40 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#define protected public
#define private public
#include "common/ge/tbe_plugin_manager.h"
#undef private
#undef protected
namespace ge {
class UtestTBEPluginManager: public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};
TEST_F(UtestTBEPluginManager, CheckFindParserSo) {
string path = "";
vector<string> file_list = {};
string caffe_parser_path = "";
TBEPluginManager::Instance().FindParserSo(path, file_list, caffe_parser_path);
path = "/lib64";
TBEPluginManager::Instance().FindParserSo(path, file_list, caffe_parser_path);
}
} // namespace ge

+ 9
- 0
tests/ut/ge/session/omg_omg_unittest.cc View File

@@ -48,4 +48,13 @@ TEST_F(UtestOmg, display_model_info_success) {
attr_def->mutable_list()->add_i(4);
PrintModelInfo(&model_def, 1);
}
TEST_F(UtestOmg, find_parser_so) {
string path = "";
vector<string> file_list = {};
string caffe_parser_path = "";
FindParserSo(path, file_list, caffe_parser_path);
path = "/lib64";
FindParserSo(path, file_list, caffe_parser_path);
}
} // namespace ge

+ 24
- 0
tests/ut/ge/single_op/single_op_model_unittest.cc View File

@@ -310,3 +310,27 @@ TEST_F(UtestSingleOpModel, BuildTaskList) {
MemcpyAsyncTask mem_task;
ASSERT_EQ(mem_task.LaunchKernel(0), SUCCESS);
}

TEST_F(UtestSingleOpModel, build_aicpu_task) {
ComputeGraphPtr graph = make_shared<ComputeGraph>("single_op");
GeModelPtr ge_model = make_shared<GeModel>();
ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>();
ge_model->SetModelTaskDef(model_task_def);

domi::TaskDef *task_def = model_task_def->add_task();
task_def->set_type(RT_MODEL_TASK_KERNEL_EX);

string model_data_str = "123456789";
SingleOpModel model("model", model_data_str.c_str(), model_data_str.size());
std::mutex stream_mu;
rtStream_t stream = nullptr;
rtStreamCreate(&stream, 0);
DynamicSingleOp single_op(0, &stream_mu, stream);
model.model_helper_.model_ = ge_model;
auto op_desc = std::make_shared<ge::OpDesc>("add", "Add");
NodePtr node = graph->AddNode(op_desc);
model.op_list_[0] = node;
StreamResource *res = new (std::nothrow) StreamResource(1);
ASSERT_EQ(model.BuildTaskListForDynamicOp(res, single_op), SUCCESS);
}

Loading…
Cancel
Save