@@ -239,6 +239,10 @@ bool MemoryBlock::IsSameBatchLabel() { | |||||
return all_same_label; | return all_same_label; | ||||
} | } | ||||
bool MemoryBlock::CanReuse(int32_t thread_scope_id) const { | |||||
return (thread_scope_id_.find(thread_scope_id) == thread_scope_id_.end()); | |||||
} | |||||
bool CanNotLifeReuse(MemoryBlock *block) { | bool CanNotLifeReuse(MemoryBlock *block) { | ||||
if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_) { | if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_) { | ||||
return true; | return true; | ||||
@@ -283,6 +287,14 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ | |||||
if (CanNotLifeReuse(this) || CanNotLifeReuse(block) || (batch_label_ != block->batch_label_)) { | if (CanNotLifeReuse(this) || CanNotLifeReuse(block) || (batch_label_ != block->batch_label_)) { | ||||
return; | return; | ||||
} | } | ||||
// not same thread scode id can reuse | |||||
for (auto thread_scope_id : ThreadScopeId()) { | |||||
if (!block->CanReuse(thread_scope_id)) { | |||||
return; | |||||
} | |||||
} | |||||
if (block->continuous_block_) { | if (block->continuous_block_) { | ||||
AddContinuousLifeReuseBlock(block, total_node_depend_stream_life); | AddContinuousLifeReuseBlock(block, total_node_depend_stream_life); | ||||
return; | return; | ||||
@@ -659,7 +671,12 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) { | |||||
return false; | return false; | ||||
} | } | ||||
bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) { | |||||
bool CanReuseBlock(int32_t thread_scope_id, size_t continuous_life_begin, const MemoryBlock &reusable_block, | |||||
size_t block_size) { | |||||
if (!reusable_block.CanReuse(thread_scope_id)) { | |||||
return false; | |||||
} | |||||
bool can_reuse = false; | bool can_reuse = false; | ||||
if (reusable_block.Size() == block_size) { | if (reusable_block.Size() == block_size) { | ||||
// in some continuous input case, continuous first input node's is not same as topo first node. | // in some continuous input case, continuous first input node's is not same as topo first node. | ||||
@@ -1122,6 +1139,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
} | } | ||||
bool is_reuse_memory = false; | bool is_reuse_memory = false; | ||||
int32_t thread_scope_id = kInvalidThreadScopeId; | |||||
(void)ge::AttrUtils::GetInt(node_op_desc, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id); | |||||
if (ge_disable_reuse_mem_env_ != "1") { | if (ge_disable_reuse_mem_env_ != "1") { | ||||
bool reuse_mem_flag = (mem_type == kOutput) ? IsPreReuse(n, out_index) : | bool reuse_mem_flag = (mem_type == kOutput) ? IsPreReuse(n, out_index) : | ||||
!((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); | !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); | ||||
@@ -1141,8 +1160,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); | GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); | ||||
// A node can reuse blocks of the same stream and preorder streams | // A node can reuse blocks of the same stream and preorder streams | ||||
if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) { | |||||
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, | |||||
if (CanReuseBlock(thread_scope_id, continuous_life_begin_, *reusable_block, block_size)) { | |||||
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_, thread_scope_id}, | |||||
real_size, no_align_size); | real_size, no_align_size); | ||||
if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | ||||
@@ -1168,7 +1187,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
// Data and netoutput need zero copy block | // Data and netoutput need zero copy block | ||||
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | ||||
block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); | |||||
block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_, thread_scope_id}, | |||||
real_size, no_align_size); | |||||
block->stream_id_ = node_op_desc->GetStreamId(); | block->stream_id_ = node_op_desc->GetStreamId(); | ||||
block->continuous_block_ = continuous; | block->continuous_block_ = continuous; | ||||
block->batch_label_ = batch_label; | block->batch_label_ = batch_label; | ||||
@@ -2062,7 +2082,13 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, | |||||
size_t real_size, size_t no_align_size, int32_t child_block_level) { | size_t real_size, size_t no_align_size, int32_t child_block_level) { | ||||
ge::OpDescPtr op_desc = node_type.node->GetOpDesc(); | ge::OpDescPtr op_desc = node_type.node->GetOpDesc(); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(op_desc == nullptr, return, "op_desc is null."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(op_desc == nullptr, return, "op_desc is null."); | ||||
string graph_name = node_type.node->GetOwnerComputeGraph()->GetName(); | |||||
auto owner_graph = node_type.node->GetOwnerComputeGraph(); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(owner_graph == nullptr, return, "owner_graph is null."); | |||||
string graph_name = owner_graph->GetName(); | |||||
if (owner_graph->GetParentGraph() != nullptr) { | |||||
graph_name = owner_graph->GetParentGraph()->GetName(); | |||||
} | |||||
vector<int64_t> memorys_type; | vector<int64_t> memorys_type; | ||||
int64_t offset = block->HeadOffset(); | int64_t offset = block->HeadOffset(); | ||||
size_t end = node_type.life_time_end; | size_t end = node_type.life_time_end; | ||||
@@ -2108,12 +2134,12 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, | |||||
op_desc->SetWorkspace(workspace_list); | op_desc->SetWorkspace(workspace_list); | ||||
} | } | ||||
GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] " | GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] " | ||||
"noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", | |||||
"noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s] scope[%d]", | |||||
graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(), | graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(), | ||||
node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_, | node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_, | ||||
block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level, | block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level, | ||||
block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, | block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, | ||||
block->batch_label_.c_str()); | |||||
block->batch_label_.c_str(), node_type.thread_scope_id); | |||||
} | } | ||||
void SetBlockOpMemOffset(MemoryBlock *block, int32_t child_block_level) { | void SetBlockOpMemOffset(MemoryBlock *block, int32_t child_block_level) { | ||||
@@ -2176,8 +2202,7 @@ Status BlockMemAssigner::Assign() { | |||||
bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | ||||
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | ||||
(node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || | |||||
(node_type == ASSIGN) || (node_type == HVDWAIT); | |||||
(node_type == CONSTANTOP) || (node_type == HVDWAIT); | |||||
} | } | ||||
bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { | bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { | ||||
@@ -33,14 +33,17 @@ | |||||
namespace ge { | namespace ge { | ||||
const size_t kMaxLifeTime = 0xffffffff; | const size_t kMaxLifeTime = 0xffffffff; | ||||
const int32_t kInvalidThreadScopeId = -1; | |||||
using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>; | using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>; | ||||
enum OpMemoryType { kOutput, kWorkspace }; | enum OpMemoryType { kOutput, kWorkspace }; | ||||
struct NodeTypeIndex { | struct NodeTypeIndex { | ||||
NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0) | |||||
: node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {} | |||||
NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0, | |||||
int32_t thread_scope_id = kInvalidThreadScopeId) | |||||
: node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin), | |||||
thread_scope_id(thread_scope_id) {} | |||||
ge::NodePtr node = nullptr; | ge::NodePtr node = nullptr; | ||||
OpMemoryType mem_type = kOutput; | OpMemoryType mem_type = kOutput; | ||||
@@ -48,6 +51,7 @@ struct NodeTypeIndex { | |||||
bool ref_input = false; | bool ref_input = false; | ||||
size_t life_time_begin = 0; | size_t life_time_begin = 0; | ||||
size_t life_time_end = kMaxLifeTime; | size_t life_time_end = kMaxLifeTime; | ||||
int32_t thread_scope_id = kInvalidThreadScopeId; | |||||
const string GetMemType() const { | const string GetMemType() const { | ||||
if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
return "output"; | return "output"; | ||||
@@ -143,6 +147,9 @@ class MemoryBlock { | |||||
same_stream_ = false; | same_stream_ = false; | ||||
} | } | ||||
} | } | ||||
if (node_type_index.thread_scope_id != kInvalidThreadScopeId) { | |||||
thread_scope_id_.insert(node_type_index.thread_scope_id); | |||||
} | |||||
} | } | ||||
void AddSymbol(const std::string &symbol) { | void AddSymbol(const std::string &symbol) { | ||||
@@ -154,6 +161,7 @@ class MemoryBlock { | |||||
const std::vector<size_t> &RealSizeList() const { return real_size_list_; } | const std::vector<size_t> &RealSizeList() const { return real_size_list_; } | ||||
const std::vector<MemoryBlock *> &ChildBlockList() const { return child_blocks_; } | const std::vector<MemoryBlock *> &ChildBlockList() const { return child_blocks_; } | ||||
const std::vector<size_t> &NoAlignSizeList() const { return no_align_size_list_; } | const std::vector<size_t> &NoAlignSizeList() const { return no_align_size_list_; } | ||||
const std::set<int32_t> &ThreadScopeId() const { return thread_scope_id_; } | |||||
void Resize(); | void Resize(); | ||||
@@ -175,6 +183,8 @@ class MemoryBlock { | |||||
size_t GetDependLifeBegin(int64_t stream_id, DependStreamLife &node_depend_stream_life); | size_t GetDependLifeBegin(int64_t stream_id, DependStreamLife &node_depend_stream_life); | ||||
bool CanReuse(int32_t thread_scope_id) const; | |||||
int ref_count_; | int ref_count_; | ||||
int64_t stream_id_; | int64_t stream_id_; | ||||
bool deleted_block_; | bool deleted_block_; | ||||
@@ -198,6 +208,7 @@ class MemoryBlock { | |||||
std::vector<NodeTypeIndex> node_type_index_list_; | std::vector<NodeTypeIndex> node_type_index_list_; | ||||
std::vector<std::string> symbol_list_; | std::vector<std::string> symbol_list_; | ||||
std::vector<MemoryBlock *> child_blocks_; | std::vector<MemoryBlock *> child_blocks_; | ||||
std::set<int32_t> thread_scope_id_; | |||||
}; | }; | ||||
class BlockMemAssigner : public MemAssigner { | class BlockMemAssigner : public MemAssigner { | ||||
@@ -44,12 +44,12 @@ using domi::GetContext; | |||||
class UtestMemoryAssignerTest : public testing::Test { | class UtestMemoryAssignerTest : public testing::Test { | ||||
public: | public: | ||||
ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { | |||||
ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some", int64_t size = 1024) { | |||||
ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type); | ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type); | ||||
auto desc_temp_ptr = make_shared<ge::GeTensorDesc>(); | auto desc_temp_ptr = make_shared<ge::GeTensorDesc>(); | ||||
auto desc_temp = *desc_temp_ptr; | auto desc_temp = *desc_temp_ptr; | ||||
TensorUtils::SetSize(desc_temp, 1024); | |||||
TensorUtils::SetSize(desc_temp, size); | |||||
op_def->AddInputDesc(desc_temp); | op_def->AddInputDesc(desc_temp); | ||||
op_def->AddOutputDesc(desc_temp); | op_def->AddOutputDesc(desc_temp); | ||||
@@ -214,6 +214,44 @@ class UtestMemoryAssignerTest : public testing::Test { | |||||
return builder.GetGraph(); | return builder.GetGraph(); | ||||
} | } | ||||
void make_ffts_reuse_graph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId, | |||||
int32_t thread_scope_id_2 = kInvalidThreadScopeId) { | |||||
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); | |||||
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); | |||||
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); | |||||
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); | |||||
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 0); | |||||
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL); | |||||
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); | |||||
if (thread_scope_id_1 != kInvalidThreadScopeId) { | |||||
(void)ge::AttrUtils::SetInt(op_def_a, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_1); | |||||
(void)ge::AttrUtils::SetInt(op_def_b, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_1); | |||||
(void)ge::AttrUtils::SetInt(op_def_c, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_1); | |||||
} | |||||
if (thread_scope_id_2 != kInvalidThreadScopeId) { | |||||
(void)ge::AttrUtils::SetInt(op_def_d, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_2); | |||||
(void)ge::AttrUtils::SetInt(op_def_e, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_2); | |||||
(void)ge::AttrUtils::SetInt(op_def_f, ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_2); | |||||
} | |||||
ge::NodePtr node_a = graph->AddNode(op_def_a); | |||||
ge::NodePtr node_b = graph->AddNode(op_def_b); | |||||
ge::NodePtr node_c = graph->AddNode(op_def_c); | |||||
ge::NodePtr node_d = graph->AddNode(op_def_d); | |||||
ge::NodePtr node_e = graph->AddNode(op_def_e); | |||||
ge::NodePtr node_f = graph->AddNode(op_def_f); | |||||
ge::NodePtr node_g = graph->AddNode(op_def_g); | |||||
ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); | |||||
graph->TopologicalSorting(); | |||||
} | |||||
protected: | protected: | ||||
void SetUp() {} | void SetUp() {} | ||||
@@ -419,3 +457,33 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_atomic_output_and_workspace) | |||||
EXPECT_EQ(mem_offset.mem_offset_, 1024); | EXPECT_EQ(mem_offset.mem_offset_, 1024); | ||||
} | } | ||||
} | } | ||||
TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_no_functinon_op) { | |||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||||
make_ffts_reuse_graph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId); | |||||
HybridMemAssigner hybridMemAssigner(graph); | |||||
ge::Status ret = hybridMemAssigner.Assign(); | |||||
size_t offset = hybridMemAssigner.GetMemOffset(); | |||||
EXPECT_EQ(offset, 5120); | |||||
EXPECT_EQ(ret, SUCCESS); | |||||
} | |||||
TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_two_functinon_op) { | |||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||||
make_ffts_reuse_graph(graph, 0, 1); | |||||
HybridMemAssigner hybridMemAssigner(graph); | |||||
ge::Status ret = hybridMemAssigner.Assign(); | |||||
size_t offset = hybridMemAssigner.GetMemOffset(); | |||||
EXPECT_EQ(offset, 6656); | |||||
EXPECT_EQ(ret, SUCCESS); | |||||
} | |||||
TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_one_functinon_op) { | |||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||||
make_ffts_reuse_graph(graph, 0, kInvalidThreadScopeId); | |||||
HybridMemAssigner hybridMemAssigner(graph); | |||||
ge::Status ret = hybridMemAssigner.Assign(); | |||||
size_t offset = hybridMemAssigner.GetMemOffset(); | |||||
EXPECT_EQ(offset, 5632); | |||||
EXPECT_EQ(ret, SUCCESS); | |||||
} |