Browse Source

!1450 Reduce weight memory usage & Remove redundant memcpy

From: @xchu42
Reviewed-by: @wqtshg,@ji_chen
Signed-off-by: @ji_chen
tags/v1.3.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
b964b15ee4
6 changed files with 36 additions and 51 deletions
  1. +0
    -40
      ge/graph/build/graph_builder.cc
  2. +9
    -0
      ge/hybrid/model/hybrid_model.cc
  3. +2
    -1
      ge/hybrid/model/hybrid_model.h
  4. +16
    -5
      ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
  5. +2
    -2
      ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
  6. +7
    -3
      tests/ut/ge/hybrid/known_node_executor_unittest.cc

+ 0
- 40
ge/graph/build/graph_builder.cc View File

@@ -413,43 +413,6 @@ static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchor
return SUCCESS; return SUCCESS;
} }


static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
if (graph->GetGraphUnknownFlag()) {
GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str());
return SUCCESS;
}
for (auto &node : graph->GetDirectNode()) {
// CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
auto op_type = op_desc->GetType();
if (op_type == NETOUTPUT) {
for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
NodePtr in_node = peer_out_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node);

std::string in_node_op_type = in_node->GetType();
if (in_node_op_type == CONSTANT) {
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Insert memcpy between %s and %s failed when GenerateTaskForConstant",
in_node->GetName().c_str(), node->GetName().c_str());
GELOGE(FAILED, "Insert memcpy between %s and %s failed.",
in_node->GetName().c_str(), node->GetName().c_str());
return FAILED;
}
}
}
}
}
return SUCCESS;
}

Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag();
com_graph->SetGraphUnknownFlag(false); com_graph->SetGraphUnknownFlag(false);
@@ -534,9 +497,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
!sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
continue; continue;
} }

GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed.");

if (sub_graph->GetGraphUnknownFlag()) { if (sub_graph->GetGraphUnknownFlag()) {
// unknown shape build flow // unknown shape build flow
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),


+ 9
- 0
ge/hybrid/model/hybrid_model.cc View File

@@ -368,5 +368,14 @@ void *HybridModel::GetGlobalStep() const {
} }
return global_step_->GetData(); return global_step_->GetData();
} }

TensorBuffer *HybridModel::GetModelWeight(const string &subgraph_name) const {
auto it = weight_buffer_map_.find(subgraph_name);
if (it == weight_buffer_map_.end()) {
GELOGD("Model weight not found, subgraph name = %s", subgraph_name.c_str());
return nullptr;
}
return it->second.get();
}
} // namespace hybrid } // namespace hybrid
} // namespace ge } // namespace ge

+ 2
- 1
ge/hybrid/model/hybrid_model.h View File

@@ -93,6 +93,8 @@ class HybridModel {


TensorValue* GetTensor(const NodePtr &node) const; TensorValue* GetTensor(const NodePtr &node) const;


TensorBuffer* GetModelWeight(const std::string &subgraph_name) const;

const std::map<int64_t, std::vector<std::pair<int, Tensor>>> &GetHostTensors() const; const std::map<int64_t, std::vector<std::pair<int, Tensor>>> &GetHostTensors() const;


const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const; const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const;
@@ -159,7 +161,6 @@ class HybridModel {
uint32_t device_id_ = 0; uint32_t device_id_ = 0;
uint32_t model_id_ = 0; uint32_t model_id_ = 0;
uint8_t *var_mem_base_ = nullptr; uint8_t *var_mem_base_ = nullptr;
std::unique_ptr<TensorBuffer> weight_buffer_;
std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_; std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_;
RuntimeParam root_runtime_param_; RuntimeParam root_runtime_param_;
string om_name_; string om_name_;


+ 16
- 5
ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc View File

@@ -118,7 +118,7 @@ Status KnownNodeTask::Init(TaskContext &context) {
return SUCCESS; return SUCCESS;
} }


Status KnownNodeTask::InitDavinciModel(const HybridModel &model) {
Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer) {
GELOGD("[Init][DavinciModel] start"); GELOGD("[Init][DavinciModel] start");
davinci_model_->InitRuntimeParams(); davinci_model_->InitRuntimeParams();
GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed"); GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed");
@@ -133,13 +133,20 @@ Status KnownNodeTask::InitDavinciModel(const HybridModel &model) {
davinci_model_->SetKnownShapeGlobalStep(global_step); davinci_model_->SetKnownShapeGlobalStep(global_step);
} }


GE_CHK_STATUS_RET(DoInitDavinciModel(), "[Init][Model] Failed to init davinci model.");
void *weight = nullptr;
size_t weight_size = 0;
if (weight_buffer != nullptr) {
weight = weight_buffer->GetData();
weight_size = weight_buffer->GetSize();
}
GELOGD("Start to init davinci model, weight size = %zu", weight_size);
GE_CHK_STATUS_RET(DoInitDavinciModel(weight, weight_size), "[Init][Model] Failed to init davinci model.");
GELOGD("[Init][Model] success"); GELOGD("[Init][Model] success");
return SUCCESS; return SUCCESS;
} }


Status KnownNodeTask::DoInitDavinciModel() {
return davinci_model_->Init();
Status KnownNodeTask::DoInitDavinciModel(void *weight, size_t weight_size) {
return davinci_model_->Init(nullptr, 0, weight, weight_size);
} }


Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const {
@@ -165,6 +172,10 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node
const GeModelPtr ge_model = model.GetGeModel(node); const GeModelPtr ge_model = model.GetGeModel(node);
GE_CHECK_NOTNULL(ge_model); GE_CHECK_NOTNULL(ge_model);


AscendString graph_name;
GE_CHK_GRAPH_STATUS_RET(ge_model->GetGraph().GetName(graph_name), "Failed to get graph name");
auto weight_buffer = model.GetModelWeight(graph_name.GetString());

std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr); std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr);
GE_CHECK_NOTNULL(davinci_model); GE_CHECK_NOTNULL(davinci_model);


@@ -181,7 +192,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node


auto known_node_task = MakeShared<KnownNodeTask>(davinci_model); auto known_node_task = MakeShared<KnownNodeTask>(davinci_model);
GE_CHECK_NOTNULL(known_node_task); GE_CHECK_NOTNULL(known_node_task);
GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model));
GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model, weight_buffer));
GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str());
task = std::move(known_node_task); task = std::move(known_node_task);
return SUCCESS; return SUCCESS;


+ 2
- 2
ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h View File

@@ -36,10 +36,10 @@ class KnownNodeTask : public NodeTask {
Status UpdateArgs(TaskContext &context) override; Status UpdateArgs(TaskContext &context) override;
Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;
Status Init(TaskContext &context) override; Status Init(TaskContext &context) override;
Status InitDavinciModel(const HybridModel &model);
Status InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer);


protected: protected:
virtual Status DoInitDavinciModel();
virtual Status DoInitDavinciModel(void *weight, size_t weight_size);
private: private:
std::shared_ptr<DavinciModel> davinci_model_ = nullptr; std::shared_ptr<DavinciModel> davinci_model_ = nullptr;
}; };


+ 7
- 3
tests/ut/ge/hybrid/known_node_executor_unittest.cc View File

@@ -43,7 +43,7 @@ class KnownNodeTaskMock : public KnownNodeTask {
public: public:
KnownNodeTaskMock(std::shared_ptr<DavinciModel> davinci_model): KnownNodeTask(davinci_model) {}; KnownNodeTaskMock(std::shared_ptr<DavinciModel> davinci_model): KnownNodeTask(davinci_model) {};
~KnownNodeTaskMock() override = default; ~KnownNodeTaskMock() override = default;
MOCK_METHOD0(DoInitDavinciModel, Status());
MOCK_METHOD2(DoInitDavinciModel, Status(void *, size_t));
}; };
} }


@@ -62,6 +62,10 @@ TEST_F(UnknownNodeExecutorTest, test_init_davinci_model) {
DumpProperties dump_properties; DumpProperties dump_properties;
dump_properties.enable_dump_ = "1"; dump_properties.enable_dump_ = "1";
DumpManager::GetInstance().AddDumpProperties(model.GetSessionId(), dump_properties); DumpManager::GetInstance().AddDumpProperties(model.GetSessionId(), dump_properties);
EXPECT_CALL(mock, DoInitDavinciModel).WillOnce(::testing::Return(SUCCESS));
ASSERT_EQ(mock.InitDavinciModel(model), SUCCESS);
EXPECT_CALL(mock, DoInitDavinciModel).WillRepeatedly(::testing::Return(SUCCESS));
ASSERT_EQ(mock.InitDavinciModel(model, model.GetModelWeight("subgraph")), SUCCESS);

int32_t buffer[8];
model.weight_buffer_map_.emplace("subgraph", TensorBuffer::Create(buffer, sizeof(buffer)));
ASSERT_EQ(mock.InitDavinciModel(model, model.GetModelWeight("subgraph")), SUCCESS);
} }

Loading…
Cancel
Save