Browse Source

!818 Memory optimization during model loading

From: @li-lei0106
Reviewed-by: @ji_chen,@xchu42
Signed-off-by: @ji_chen
tags/v1.2.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
12dcf84615
5 changed files with 54 additions and 18 deletions
  1. +7
    -0
      ge/graph/load/new_model_manager/data_dumper.cc
  2. +6
    -4
      ge/graph/load/new_model_manager/data_dumper.h
  3. +21
    -11
      ge/graph/load/new_model_manager/davinci_model.cc
  4. +9
    -3
      ge/graph/load/new_model_manager/davinci_model.h
  5. +11
    -0
      tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc

+ 7
- 0
ge/graph/load/new_model_manager/data_dumper.cc View File

@@ -830,6 +830,13 @@ Status DataDumper::UnloadDumpInfo() {
return SUCCESS;
}

void DataDumper::DumpShrink() {
compute_graph_.reset();
input_map_.clear();
ref_info_.clear();
op_list_.clear();
}

void DataDumper::PrintCheckLog(string &dump_list_key) {
std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
if (model_list.empty()) {


+ 6
- 4
ge/graph/load/new_model_manager/data_dumper.h View File

@@ -83,6 +83,8 @@ class DataDumper {

Status UnloadDumpInfo();

void DumpShrink();

void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; }
const DumpProperties &GetDumpProperties() const { return dump_properties_; }
bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const;
@@ -112,18 +114,18 @@ class DataDumper {
struct InnerInputMapping;

std::vector<OpDescInfo> op_desc_info_;
std::vector<InnerDumpInfo> op_list_;
std::vector<InnerDumpInfo> op_list_; // release after DavinciModel::Init
uint32_t end_graph_task_id_ = 0;
uint32_t end_graph_stream_id_ = 0;
bool is_end_graph_ = false;
std::multimap<std::string, InnerInputMapping> input_map_;
std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init
bool load_flag_;
uint32_t device_id_;
uintptr_t global_step_;
uintptr_t loop_per_iter_;
uintptr_t loop_cond_;
ComputeGraphPtr compute_graph_;
std::map<OpDescPtr, void *> ref_info_;
ComputeGraphPtr compute_graph_; // release after DavinciModel::Init
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init
void *l1_fusion_addr_ = nullptr;




+ 21
- 11
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -150,14 +150,7 @@ DavinciModel::~DavinciModel() {
GELOGW("UnloadDumpInfo failed, ret: %u.", ret);
}

for (const auto &op_and_addr : saved_task_addrs_) {
auto addr = op_and_addr.second;
if (addr != nullptr) {
GE_CHK_RT(rtFree(addr));
}
addr = nullptr;
}
saved_task_addrs_.clear();
ClearTaskAddrs();

GE_CHK_STATUS(ModelRunStop());

@@ -221,6 +214,17 @@ DavinciModel::~DavinciModel() {
}
}

void DavinciModel::ClearTaskAddrs() {
for (const auto &op_and_addr : saved_task_addrs_) {
auto addr = op_and_addr.second;
if (addr != nullptr) {
GE_CHK_RT(rtFree(addr));
}
addr = nullptr;
}
saved_task_addrs_.clear();
}

void DavinciModel::UnbindHcomStream() {
if (!all_hccl_stream_list_.empty()) {
for (size_t i = 0; i < all_hccl_stream_list_.size(); i++) {
@@ -263,7 +267,10 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) {
///
void DavinciModel::Shrink() {
skt_info_ = {0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, -1, 0, nullptr};
DumperShrink();
ge_model_.reset(); // delete object.
op_list_.clear();
ClearTaskAddrs();
}

Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) {
@@ -738,7 +745,6 @@ Status DavinciModel::ReportProfilingData() {
}
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info);
GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");
op_list_.clear();

return SUCCESS;
}
@@ -963,7 +969,9 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
}

data_by_index[data_index] = op_desc;
data_op_list_.push_back(op_desc);
auto data_op = AttrUtils::CopyOpDesc(op_desc);
GE_CHECK_NOTNULL(data_op);
data_op_list_.push_back(data_op);
if (known_node_) {
return SUCCESS;
}
@@ -1019,7 +1027,9 @@ Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_

data_op_list_.clear();
for (auto &item : data_by_index) {
data_op_list_.emplace_back(item.second);
auto data_op = AttrUtils::CopyOpDesc(item.second);
GE_CHECK_NOTNULL(data_op);
data_op_list_.emplace_back(data_op);
auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second);
GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size());
input_addrs_list_.emplace_back(output_addrs);


+ 9
- 3
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -481,6 +481,10 @@ class DavinciModel {
data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args);
}

void DumperShrink() {
data_dumper_.DumpShrink();
}

void SetEndGraphId(uint32_t task_id, uint32_t stream_id);
DavinciModel &operator=(const DavinciModel &model) = delete;

@@ -644,6 +648,8 @@ class DavinciModel {

void ReleaseTask();

void ClearTaskAddrs();

void UnbindTaskSinkStream();

bool IsAicpuKernelConnectSpecifiedLayer();
@@ -875,12 +881,12 @@ class DavinciModel {
string om_name_;

uint32_t version_;
GeModelPtr ge_model_;
GeModelPtr ge_model_; // release after DavinciModel::Init

bool need_destroy_aicpu_kernel_{false};
vector<string> out_node_name_;

map<uint32_t, OpDescPtr> op_list_;
map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init

// data op_desc
vector<OpDescPtr> data_op_list_;
@@ -975,7 +981,7 @@ class DavinciModel {
DataDumper data_dumper_;
uint64_t iterator_count_;
bool is_l1_fusion_enable_;
map<OpDescPtr, void *> saved_task_addrs_;
map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init
void *l1_fusion_addr_ = nullptr;

bool known_node_ = false;


+ 11
- 0
tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc View File

@@ -254,6 +254,17 @@ TEST_F(UtestModelManagerDavinciModel, eventlist_success) {
delete model;
}

// test Shrink
TEST_F(UtestModelManagerDavinciModel, shrink_success) {
DavinciModel model(0, g_label_call_back);
OpDescPtr op_desc_ptr = make_shared<OpDesc>("Cast", "Cast");
void *addr = nullptr;
rtMalloc(&addr, 128, RT_MEMORY_HBM);
model.saved_task_addrs_.emplace(op_desc_ptr, addr);
model.Shrink();
EXPECT_EQ(model.saved_task_addrs_.isEmpty(), true);
}

// test rtMalloc
TEST_F(UtestModelManagerDavinciModel, failed_reset_device) {
DavinciModel model(0, g_label_call_back);


Loading…
Cancel
Save