You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 77 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "graph/build/memory/hybrid_mem_assigner.h"
  23. #include "graph/build/memory/var_mem_assign_util.h"
  24. #include "graph/build/memory/block_mem_assigner.h"
  25. #include "graph/common/omg_util.h"
  26. #include "graph/debug/ge_attr_define.h"
  27. #include "graph/ge_attr_value.h"
  28. #include "graph/manager/graph_var_manager.h"
  29. #include "graph/utils/tensor_utils.h"
  30. #include "graph/utils/type_utils.h"
  31. namespace {
  32. const int kDataOutputIndex = 0;
  33. const int kAllInputAddrIsAtomic = -1;
  34. const int kVirtualInputNodeMemoryReuse = 0;
  35. const int kVirtualOutputNodeMemoryReuse = 1;
  36. const size_t kVirtualInputNodeOutputSize = 1;
  37. const size_t kVirtualOutputNodeInputSize = 1;
  38. const size_t kVirtualNodeDataIndex = 0;
  39. const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_";
  40. int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
  41. const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
  42. const ge::NodePtr &node, const uint32_t i) {
  43. ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
  44. auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
  45. if (iter1 == anchor_to_symbol.end()) {
  46. return ge::kInvalidOffset;
  47. }
  48. auto out_symbol = iter1->second;
  49. auto iter2 = symbol_to_anchors.find(out_symbol);
  50. if (iter2 == symbol_to_anchors.end()) {
  51. return ge::kInvalidOffset;
  52. }
  53. for (const auto &node_index_io : iter2->second) {
  54. if (node_index_io.value_ == out_symbol) {
  55. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  56. vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
  57. if (node_index_io.index_ >= symbol_output_list.size()) {
  58. return ge::kInvalidOffset;
  59. }
  60. GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
  61. output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
  62. return symbol_output_list.at(node_index_io.index_);
  63. }
  64. }
  65. return ge::kInvalidOffset;
  66. }
  67. } // namespace
  68. namespace ge {
  69. Status VariableMemoryAssigner::Assign() {
  70. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  71. if (result != ge::SUCCESS) {
  72. return result;
  73. }
  74. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  75. if (result != ge::SUCCESS) {
  76. return result;
  77. }
  78. return ge::SUCCESS;
  79. }
  80. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  81. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  82. if (result != ge::SUCCESS) {
  83. return result;
  84. }
  85. return ge::SUCCESS;
  86. }
  87. Status GraphMemoryAssigner::AssignMemory() {
  88. ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_));
  89. if (mem_assigner->Assign() != ge::SUCCESS) {
  90. GELOGE(ge::FAILED, "Memory assigner failed");
  91. return ge::FAILED;
  92. }
  93. MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  94. memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
  95. if (mem_assigner->GetP2PMemOffset() > 0) {
  96. MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
  97. memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  98. }
  99. auto session_id = compute_graph_->GetSessionID();
  100. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  101. auto variable_assigner =
  102. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  103. if (variable_assigner == nullptr) {
  104. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  105. return ge::FAILED;
  106. }
  107. if (variable_assigner->Assign() != ge::SUCCESS) {
  108. return ge::FAILED;
  109. }
  110. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  111. GELOGI("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  112. mem_assigner_ = std::move(mem_assigner);
  113. return ge::SUCCESS;
  114. }
  115. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  116. auto variable_assigner =
  117. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  118. if (variable_assigner == nullptr) {
  119. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  120. return ge::FAILED;
  121. }
  122. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  123. return ge::FAILED;
  124. }
  125. return ge::SUCCESS;
  126. }
  127. ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  128. int64_t dim_index, int64_t &output_mem_size,
  129. int64_t &batch_dim_num, int64_t &out_size) {
  130. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  131. if (graph_status != GRAPH_SUCCESS) {
  132. GELOGE(FAILED, "Opdesc GetSize failed!");
  133. return FAILED;
  134. }
  135. GeShape output_shape = output_desc->GetShape();
  136. std::vector<int64_t> output_dims = output_shape.GetDims();
  137. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  138. GELOGE(FAILED, "Invaild value(%ld) of attr _reuse_input_on_dim_index, which is out of data range [0, %zu).",
  139. dim_index, output_dims.size());
  140. return FAILED;
  141. }
  142. for (int64_t index = 0; index < dim_index; index++) {
  143. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  144. batch_dim_num *= output_dims[index];
  145. output_dims[index] = 1;
  146. }
  147. output_shape = GeShape(output_dims);
  148. Format out_format = output_desc->GetFormat();
  149. DataType data_type = output_desc->GetDataType();
  150. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  151. if (graph_status != GRAPH_SUCCESS) {
  152. GELOGE(graph_status, "Opdesc CalcTensorMemSize failed!");
  153. return FAILED;
  154. }
  155. if (output_mem_size < 0) {
  156. GELOGE(FAILED, "After calculating tensor memory size, output_mem_size = %ld, out of data range [0, %ld]",
  157. output_mem_size, INT64_MAX);
  158. return FAILED;
  159. }
  160. return SUCCESS;
  161. }
  162. Status GraphMemoryAssigner::GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map,
  163. int32_t mem_reuse_model, string &max_batch_label) {
  164. for (auto &i_map : mem_reuse_virtual_nodes_map) {
  165. vector<NodePtr> virtual_nodes_list = i_map.second;
  166. vector<int64_t> max_shape_dims;
  167. size_t max_batch_dim = 0;
  168. bool max_batch_dim_find = false;
  169. for (size_t i = 0; i < virtual_nodes_list.size(); ++i) {
  170. GE_CHECK_NOTNULL(virtual_nodes_list[i]);
  171. OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc();
  172. GE_CHECK_NOTNULL(op_desc);
  173. ge::ConstGeTensorDescPtr input_output_desc;
  174. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  175. input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex);
  176. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  177. input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex);
  178. } else {
  179. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  180. return FAILED;
  181. }
  182. GE_CHECK_NOTNULL(input_output_desc);
  183. if (i == 0) {
  184. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  185. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  186. max_shape_dims = input_output_desc->GetShape().GetDims();
  187. } else {
  188. vector<int64_t> current_shape_dims = input_output_desc->GetShape().GetDims();
  189. if (current_shape_dims.size() != max_shape_dims.size()) {
  190. GELOGE(FAILED, "The shape size of several nodes between multiple batches does not match.");
  191. return FAILED;
  192. }
  193. for (size_t j = 0; j < current_shape_dims.size(); ++j) {
  194. if (current_shape_dims[j] == max_shape_dims[j]) {
  195. continue;
  196. }
  197. if (max_batch_dim_find && max_batch_dim != j) {
  198. GELOGE(FAILED, "The shape of several nodes between multiple batches does not match.");
  199. return FAILED;
  200. }
  201. max_batch_dim_find = true;
  202. max_batch_dim = j;
  203. if (current_shape_dims[j] > max_shape_dims[j]) {
  204. max_shape_dims[j] = current_shape_dims[j];
  205. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  206. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  207. }
  208. // Only compare the first different dim in shape.
  209. break;
  210. }
  211. }
  212. }
  213. // In every element of virtual_input_nodes_map, the label of the max batch node is the same.
  214. break;
  215. }
  216. return SUCCESS;
  217. }
  218. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) {
  219. if (memory_offset_.empty()) {
  220. GELOGE(FAILED, "memory_offset_ is empty.");
  221. return ge::FAILED;
  222. }
  223. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
  224. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousInputMemory(),
  225. "ReAssignReuseAndNoPaddingContinuousInputMemory Failed!");
  226. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousOutputMemory(),
  227. "ReAssignReuseAndNoPaddingContinuousOutputMemory Failed!");
  228. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
  229. size_t total_mem_offset = 0;
  230. for (auto pair : memory_offset_) {
  231. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  232. total_mem_offset += pair.second.mem_offset_;
  233. }
  234. auto session_id = compute_graph_->GetSessionID();
  235. if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  236. GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", total_mem_offset,
  237. VarManager::Instance(session_id)->GetGraphMemoryMaxSize());
  238. for (auto iter : mem_type_to_offset) {
  239. ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"},
  240. {std::to_string(iter.first), std::to_string(iter.second), "featuremap",
  241. std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())});
  242. }
  243. return ge::FAILED;
  244. }
  245. return SUCCESS;
  246. }
  247. Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
  248. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  249. GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;);
  250. size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM];
  251. // set offset for zero copy block
  252. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  253. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  254. continue;
  255. }
  256. memory_block->Resize();
  257. memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]);
  258. mem_offset[RT_MEMORY_HBM] += memory_block->Size();
  259. memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1);
  260. }
  261. GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset[RT_MEMORY_HBM]);
  262. // set offset for zero copy nodes
  263. priority_assigner->SetOpMemOffset(true);
  264. zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp;
  265. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  266. if (iter == memory_offset_.end()) {
  267. GELOGE(FAILED, "Memory offset don't have memory type[hbm].");
  268. return FAILED;
  269. }
  270. iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM];
  271. GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
  272. zero_mem_copy_size);
  273. return SUCCESS;
  274. }
  275. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  276. GELOGI("Begin to reassign continuous memory");
  277. Status ret;
  278. for (auto &node : compute_graph_->GetAllNodes()) {
  279. // Get the continuous input type of the node, default is false
  280. bool is_input_continuous = false;
  281. GE_CHECK_NOTNULL(node->GetOpDesc());
  282. // If GetBool fail, is_input_continuous is false.
  283. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
  284. // Assign continuous input memory
  285. if (is_input_continuous) {
  286. int64_t memory_type = RT_MEMORY_HBM;
  287. GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed.");
  288. int64_t mem_clean_start = 0;
  289. int64_t mem_clean_size = 0;
  290. ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type);
  291. if (ret != ge::SUCCESS) {
  292. GELOGE(ret, "Assign continuous input memory failed!");
  293. return ret;
  294. }
  295. // Clean up atomic address, eg, hcom node
  296. vector<int32_t> input_indexes;
  297. // If GetListInt fail, input_indexes is empty.
  298. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  299. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  300. // check whether there is an atomic conflict between the current node and the peer out node
  301. if (!CheckInputIsSupportAtomic(node)) {
  302. GELOGE(ge::FAILED,
  303. "There is an atomic conflict between the current node and the peer out node, not supported!");
  304. return ge::FAILED;
  305. }
  306. const auto &in_control_anchor = node->GetInControlAnchor();
  307. GE_CHECK_NOTNULL(in_control_anchor);
  308. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  309. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  310. if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
  311. ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size});
  312. if (ret != SUCCESS) {
  313. GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str());
  314. return ret;
  315. }
  316. }
  317. }
  318. }
  319. }
  320. // Get the reference type of the node, default is false
  321. bool is_ref = false;
  322. // If GetBool fail, is_ref is false.
  323. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  324. // Get the continuous output type of the node, default is false
  325. bool is_output_continuous = false;
  326. // If GetBool fail, is_output_continuous is false.
  327. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous);
  328. // If the output is ref type and refers to the ref of an input, the name of the output
  329. // and the input are the same. Ge encounters ref type, finds matching relationship according
  330. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  331. if (!is_ref && is_output_continuous) { // Assign continuous output memory
  332. ret = AssignContinuousOutputMemory(node);
  333. if (ret != ge::SUCCESS) {
  334. GELOGE(ret, "Assign reference memory failed!");
  335. return ret;
  336. }
  337. }
  338. }
  339. for (auto pair : memory_offset_) {
  340. GELOGI("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first,
  341. pair.second.mem_offset_);
  342. }
  343. return ge::SUCCESS;
  344. }
  345. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  346. int64_t &continuous_mem_size, int64_t memory_type) {
  347. GELOGI("Current node %s needs continuous input.", node->GetName().c_str());
  348. bool continuous_input_alloc = false;
  349. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc);
  350. auto iter = memory_offset_.find(memory_type);
  351. if (iter == memory_offset_.end()) {
  352. GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type);
  353. return FAILED;
  354. }
  355. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  356. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  357. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  358. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  359. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  360. bool is_peer_output_continuous = false;
  361. // If GetBool fail, is_peer_output_continuous is false.
  362. (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  363. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  364. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  365. // conflict between the two, we can not support it.
  366. auto peer_output_size = peer_op_desc->GetOutputsSize();
  367. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  368. GELOGE(PARAM_INVALID,
  369. "Current node %s requires continuous input, while the previous node %s requires "
  370. "continuous output. There may be conflict between the two. This node is not supported now.",
  371. node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str());
  372. return PARAM_INVALID;);
  373. bool is_peer_reference = false;
  374. // If GetBool fail, is_peer_reference is false.
  375. (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  376. GE_IF_BOOL_EXEC(is_peer_reference,
  377. GELOGE(PARAM_INVALID,
  378. "Current node %s requires continuous input, while the previous node %s requires "
  379. "reference. There may be conflict between the two. This node is not supported now.",
  380. node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str());
  381. return PARAM_INVALID;);
  382. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  383. std::vector<int64_t> offsets_for_fusion = {};
  384. bool has_offset_attr =
  385. AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion);
  386. if (peer_out_data_anchor->GetIdx() < static_cast<int>(output_list.size())) {
  387. if (continuous_input_alloc && !has_offset_attr) {
  388. if (in_data_anchor->GetIdx() == 0) {
  389. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  390. }
  391. // can not use else if, incase only one input
  392. if (in_data_anchor->GetIdx() == static_cast<int>(node->GetAllInDataAnchors().size()) - 1) {
  393. int64_t tensor_desc_size = 0;
  394. Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())),
  395. tensor_desc_size);
  396. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  397. tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  398. continuous_mem_size =
  399. output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE;
  400. }
  401. GELOGI(
  402. "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
  403. "real_size[%u].",
  404. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  405. peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(),
  406. 0, 0);
  407. continue;
  408. }
  409. output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_;
  410. } else {
  411. GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx());
  412. return FAILED;
  413. }
  414. peer_op_desc->SetOutputOffset(output_list);
  415. size_t pre_mem_offset = iter->second.mem_offset_;
  416. int64_t tensor_desc_size = 0;
  417. if (has_offset_attr) {
  418. if (peer_out_data_anchor->GetIdx() < static_cast<int>(offsets_for_fusion.size())) {
  419. auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()];
  420. iter->second.mem_offset_ += offset_for_fusion;
  421. } else {
  422. GELOGE(FAILED, "fusion: peer node %s index : %d is out of range.", peer_op_desc->GetName().c_str(),
  423. peer_out_data_anchor->GetIdx());
  424. return FAILED;
  425. }
  426. } else {
  427. Status ret =
  428. TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size);
  429. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  430. iter->second.mem_offset_ += tensor_desc_size;
  431. }
  432. // If set tensor_actual_size, Memory alignment is not required.
  433. int32_t is_tensor_actual_size = 0;
  434. ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size);
  435. if (is_tensor_actual_size == 0) {
  436. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  437. }
  438. GELOGI(
  439. "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
  440. "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  441. peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(),
  442. (iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size);
  443. }
  444. iter->second.mem_offset_ += MEM_ALIGN_SIZE;
  445. if (!continuous_input_alloc) {
  446. continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start;
  447. }
  448. return SUCCESS;
  449. }
  450. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) {
  451. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  452. auto out_op_desc = node->GetOpDesc();
  453. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  454. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  455. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  456. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  457. out_op_desc->GetOutputsSize(), output_list.size());
  458. return ge::FAILED;
  459. }
  460. size_t mem_offset = output_list[0];
  461. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  462. output_list[out_data_anchor->GetIdx()] = mem_offset;
  463. int64_t tensor_desc_size = 0;
  464. if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) !=
  465. ge::SUCCESS) {
  466. GELOGE(FAILED, "GetSize failed.");
  467. return FAILED;
  468. }
  469. mem_offset += tensor_desc_size;
  470. if (mem_offset <= 0) {
  471. return FAILED;
  472. }
  473. mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  474. GELOGI(
  475. "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  476. "real_size[%ld].",
  477. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  478. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size);
  479. }
  480. out_op_desc->SetOutputOffset(output_list);
  481. return ge::SUCCESS;
  482. }
  483. Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  484. OpDescPtr op_desc = node->GetOpDesc();
  485. vector<int64_t> output_list = op_desc->GetOutputOffset();
  486. if (output_list.empty()) {
  487. GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str());
  488. return FAILED;
  489. }
  490. output_list.at(0) = mem_offset_reuse;
  491. op_desc->SetOutputOffset(output_list);
  492. GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  493. int64_t attr_dim_index;
  494. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  495. if (!get_attr_dim_flag) {
  496. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  497. return FAILED;
  498. }
  499. size_t extra_memory_size = 0;
  500. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  501. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  502. GE_CHECK_NOTNULL(peer_out_data_anchor);
  503. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  504. GE_CHECK_NOTNULL(peer_op_desc);
  505. vector<int64_t> output_offsets = peer_op_desc->GetOutputOffset();
  506. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_offsets.size())) {
  507. GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  508. return ge::FAILED;
  509. }
  510. output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  511. peer_op_desc->SetOutputOffset(output_offsets);
  512. size_t pre_mem_offset = mem_offset_reuse;
  513. // Calculate tensor real size of each piece of data and out size of complete data
  514. ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx());
  515. GE_CHECK_NOTNULL(output_desc);
  516. int64_t output_mem_size;
  517. int64_t batch_dim_num = 1;
  518. int64_t out_size;
  519. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  520. SUCCESS) {
  521. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].",
  522. peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx());
  523. return FAILED;
  524. }
  525. mem_offset_reuse += output_mem_size;
  526. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  527. GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  528. "real_size[%ld].",
  529. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  530. peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size,
  531. output_mem_size);
  532. }
  533. mem_offset_reuse += extra_memory_size;
  534. size_t after_mem_offset = mem_offset_reuse;
  535. GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.",
  536. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  537. return SUCCESS;
  538. }
  539. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() {
  540. map<string, vector<NodePtr>> mem_reuse_virtual_input_nodes_map;
  541. int64_t memory_type = RT_MEMORY_HBM;
  542. for (const auto &n : compute_graph_->GetAllNodes()) {
  543. OpDescPtr op_desc = n->GetOpDesc();
  544. GE_CHECK_NOTNULL(op_desc);
  545. bool attr_continuous = false;
  546. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous);
  547. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  548. bool attr_reuse = false;
  549. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  550. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  551. if (attr_reuse && attr_continuous) {
  552. if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) {
  553. // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse.
  554. GELOGE(FAILED, "Only one output is supported, current virtual node %s has %zu inputs.", n->GetName().c_str(),
  555. op_desc->GetOutputsSize());
  556. return FAILED;
  557. }
  558. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.");
  559. auto iter = memory_offset_.find(memory_type);
  560. if (iter == memory_offset_.end()) {
  561. GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type);
  562. return FAILED;
  563. }
  564. GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.",
  565. iter->second.mem_offset_, memory_type);
  566. string batch_label_string;
  567. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  568. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  569. if (batch_label_string.empty()) {
  570. size_t node_mem_offset = iter->second.mem_offset_;
  571. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  572. Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset);
  573. if (status != SUCCESS) {
  574. GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str());
  575. return FAILED;
  576. }
  577. iter->second.mem_offset_ = node_mem_offset;
  578. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  579. GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.",
  580. iter->second.mem_offset_, memory_type);
  581. } else {
  582. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  583. string current_node_full_name = op_desc->GetName();
  584. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  585. if (pos == string::npos) {
  586. GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.",
  587. kMbatchNodeNameFlag, n->GetName().c_str());
  588. return FAILED;
  589. }
  590. string fixed_name = current_node_full_name.substr(0, pos);
  591. vector<NodePtr> parallel_virtual_input_nodes;
  592. if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) {
  593. parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name];
  594. }
  595. parallel_virtual_input_nodes.emplace_back(n);
  596. mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes;
  597. }
  598. }
  599. }
  600. int32_t mem_reuse_model = 0;
  601. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) {
  602. GELOGE(FAILED, "Reassign memory of virtual input nodes failed.");
  603. return FAILED;
  604. }
  605. return SUCCESS;
  606. }
  607. Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  608. OpDescPtr op_desc = node->GetOpDesc();
  609. // 1. set memory of to be reused input tensor
  610. auto in_data_anchor_list = node->GetAllInDataAnchors();
  611. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  612. GE_CHECK_NOTNULL(peer_out_data_anchor);
  613. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  614. GE_CHECK_NOTNULL(peer_op_desc);
  615. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  616. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  617. GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  618. return FAILED;
  619. }
  620. in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  621. peer_op_desc->SetOutputOffset(in_node_output_offsets);
  622. GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  623. // 2. set memory of output tensor
  624. vector<int64_t> output_list = op_desc->GetOutputOffset();
  625. if (output_list.empty()) {
  626. GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str());
  627. return FAILED;
  628. }
  629. if (op_desc->GetOutputsSize() > output_list.size()) {
  630. GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(),
  631. output_list.size());
  632. return FAILED;
  633. }
  634. int64_t attr_dim_index;
  635. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  636. if (!get_attr_dim_flag) {
  637. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  638. return FAILED;
  639. }
  640. size_t extra_memory_size = 0;
  641. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  642. output_list[out_data_anchor->GetIdx()] = mem_offset_reuse;
  643. size_t pre_mem_offset = mem_offset_reuse;
  644. // calculate tensor real size of each piece of data and out size of complete data
  645. ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx());
  646. GE_CHECK_NOTNULL(output_desc);
  647. int64_t output_mem_size;
  648. int64_t batch_dim_num = 1;
  649. int64_t out_size;
  650. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  651. SUCCESS) {
  652. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].",
  653. op_desc->GetName().c_str(), out_data_anchor->GetIdx());
  654. return FAILED;
  655. }
  656. mem_offset_reuse += output_mem_size;
  657. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  658. GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].",
  659. node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  660. pre_mem_offset, out_size, output_mem_size);
  661. }
  662. op_desc->SetOutputOffset(output_list);
  663. mem_offset_reuse += extra_memory_size;
  664. size_t after_mem_offset = mem_offset_reuse;
  665. GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.",
  666. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  667. return SUCCESS;
  668. }
  669. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() {
  670. map<string, vector<NodePtr>> mem_reuse_virtual_output_nodes_map;
  671. int64_t memory_type = RT_MEMORY_HBM;
  672. for (const auto &n : compute_graph_->GetAllNodes()) {
  673. OpDescPtr op_desc = n->GetOpDesc();
  674. GE_CHECK_NOTNULL(op_desc);
  675. bool attr_continuous = false;
  676. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous);
  677. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  678. bool attr_reuse = false;
  679. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  680. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  681. if (attr_reuse && attr_continuous) {
  682. auto in_data_anchor_list = n->GetAllInDataAnchors();
  683. if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) {
  684. // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse.
  685. GELOGE(FAILED, "Only one input is supported, current virtual node %s has %zu inputs.", n->GetName().c_str(),
  686. in_data_anchor_list.size());
  687. return FAILED;
  688. }
  689. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed.");
  690. auto iter = memory_offset_.find(memory_type);
  691. if (iter == memory_offset_.end()) {
  692. GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type);
  693. return FAILED;
  694. }
  695. GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.",
  696. iter->second.mem_offset_, memory_type);
  697. string batch_label_string;
  698. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  699. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  700. if (batch_label_string.empty()) {
  701. size_t node_mem_offset = iter->second.mem_offset_;
  702. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  703. Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset);
  704. if (status != SUCCESS) {
  705. GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str());
  706. return FAILED;
  707. }
  708. iter->second.mem_offset_ = node_mem_offset;
  709. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  710. GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.",
  711. iter->second.mem_offset_, memory_type);
  712. } else {
  713. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  714. string current_node_full_name = op_desc->GetName();
  715. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  716. if (pos == string::npos) {
  717. GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual output node, node name: %s.",
  718. kMbatchNodeNameFlag, n->GetName().c_str());
  719. return FAILED;
  720. }
  721. string fixed_name = current_node_full_name.substr(0, pos);
  722. vector<NodePtr> parallel_virtual_output_nodes;
  723. if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) {
  724. parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name];
  725. }
  726. parallel_virtual_output_nodes.emplace_back(n);
  727. mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes;
  728. }
  729. }
  730. }
  731. int32_t mem_reuse_model = 1;
  732. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) {
  733. GELOGE(FAILED, "Reassign memory of virtual output nodes failed.");
  734. return FAILED;
  735. }
  736. return SUCCESS;
  737. }
  738. Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map,
  739. int32_t mem_reuse_model) {
  740. // Find max batch label value
  741. string max_batch_label;
  742. GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label),
  743. "Get max batch label failed.");
  744. GELOGI("The batch label of max batch virtual nodes is %s.", max_batch_label.c_str());
  745. PrintMemoryOffset();
  746. vector<size_t> nodes_mem_offset_list;
  747. for (auto &i_map : mem_reuse_nodes_map) {
  748. vector<NodePtr> virtual_nodes_list = i_map.second;
  749. int64_t memory_type = RT_MEMORY_HBM;
  750. GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type),
  751. "Get node list memory type failed.");
  752. auto iter = memory_offset_.find(memory_type);
  753. if (iter == memory_offset_.end()) {
  754. GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type);
  755. return FAILED;
  756. }
  757. size_t max_batch_node_mem_offset = iter->second.mem_offset_;
  758. nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset);
  759. for (auto &i_node : virtual_nodes_list) {
  760. // Op_desc is not nullptr, it has been checked.
  761. OpDescPtr op_desc = i_node->GetOpDesc();
  762. string batch_label_string;
  763. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  764. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  765. if (batch_label_string == max_batch_label) {
  766. Status status = SUCCESS;
  767. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  768. status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset);
  769. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  770. status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset);
  771. } else {
  772. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  773. return FAILED;
  774. }
  775. if (status != SUCCESS) {
  776. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  777. return FAILED;
  778. }
  779. iter->second.mem_offset_ = max_batch_node_mem_offset;
  780. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  781. GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.",
  782. iter->second.mem_offset_, memory_type);
  783. // Only assign memory of max batch nodes.
  784. break;
  785. }
  786. }
  787. }
  788. PrintMemoryOffset();
  789. size_t memory_reuse_index = 0;
  790. for (auto &i_map : mem_reuse_nodes_map) {
  791. vector<NodePtr> virtual_nodes_list = i_map.second;
  792. for (auto &i_node : virtual_nodes_list) {
  793. size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index];
  794. Status status = SUCCESS;
  795. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  796. status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset);
  797. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  798. status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset);
  799. } else {
  800. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  801. return FAILED;
  802. }
  803. if (status != SUCCESS) {
  804. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  805. return FAILED;
  806. }
  807. }
  808. memory_reuse_index++;
  809. }
  810. return SUCCESS;
  811. }
  812. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  813. map<NodePtr, vector<NodePtr>> normal_atomic_and_clean_nodes_map;
  814. vector<NodePtr> connecting_output_atomic_nodes;
  815. Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  816. if (status != SUCCESS) {
  817. GELOGE(status, "Failed to filter atomic nodes for memory assignment.");
  818. return status;
  819. }
  820. auto mem_iter = memory_offset_.find(RT_MEMORY_HBM);
  821. if (mem_iter == memory_offset_.end()) {
  822. GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM);
  823. return FAILED;
  824. }
  825. for (auto &iter : normal_atomic_and_clean_nodes_map) {
  826. int64_t atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  827. GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
  828. for (auto &atomic_node : iter.second) {
  829. vector<int64_t> mem_offset_end;
  830. status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
  831. if (status != SUCCESS) {
  832. GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.",
  833. atomic_node->GetName().c_str());
  834. return status;
  835. }
  836. }
  837. int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start;
  838. if (atomic_mem_size != 0) {
  839. GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}),
  840. "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str());
  841. }
  842. }
  843. if (AssignConnectNetOutputAtomicMemory(connecting_output_atomic_nodes) != SUCCESS) {
  844. GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput.");
  845. return FAILED;
  846. }
  847. return SUCCESS;
  848. }
  849. Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map,
  850. vector<NodePtr> &connecting_output_atomic_nodes) {
  851. GE_CHECK_NOTNULL(compute_graph_);
  852. for (const auto &node : compute_graph_->GetAllNodes()) {
  853. if (node->GetType() == ATOMICADDRCLEAN) {
  854. vector<NodePtr> tmp_normal_atomic_nodes;
  855. const auto &out_control_anchor = node->GetOutControlAnchor();
  856. GE_CHECK_NOTNULL(out_control_anchor);
  857. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  858. if (peer_in_control_anchor != nullptr) {
  859. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  860. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  861. if (peer_in_node_desc != nullptr) {
  862. bool is_atomic_node = false;
  863. // If GetBool fail, is_atomic_node is false.
  864. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  865. if (is_atomic_node) {
  866. bool is_reference = false;
  867. // If GetBool fail, is_reference is false.
  868. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
  869. if (is_reference) {
  870. GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and is_reference attribute.",
  871. peer_in_node_desc->GetName().c_str());
  872. return ge::PARAM_INVALID;
  873. }
  874. vector<int> is_connecting_output;
  875. // If GetBool fail, attr is_connecting_output is an empty vector.
  876. (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
  877. if (is_connecting_output.empty()) {
  878. tmp_normal_atomic_nodes.emplace_back(peer_in_node);
  879. continue;
  880. }
  881. connecting_output_atomic_nodes.emplace_back(peer_in_node);
  882. tmp_normal_atomic_nodes.clear();
  883. break;
  884. }
  885. }
  886. }
  887. }
  888. if (!tmp_normal_atomic_nodes.empty()) {
  889. normal_atomic_nodes_map[node] = tmp_normal_atomic_nodes;
  890. }
  891. }
  892. }
  893. return SUCCESS;
  894. }
  895. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  896. vector<int64_t> &mem_offset_end) {
  897. auto node_op_desc = node->GetOpDesc();
  898. // Assign atomic node output memory
  899. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  900. if (ret != SUCCESS) {
  901. GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str());
  902. return ret;
  903. }
  904. // Check and assign atomic node workspace memory
  905. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  906. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  907. if (!atomic_workspace_info.empty()) {
  908. bool is_fusion_node = false;
  909. // If GetBool fail, is_fusion_node is false.
  910. (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  911. if (is_fusion_node) {
  912. // Assign fusion atomic node workspace memory
  913. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  914. } else {
  915. // Assign single ordinary atomic node workspace memory, not include fusion node
  916. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  917. }
  918. if (ret != SUCCESS) {
  919. GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str());
  920. return ret;
  921. }
  922. } else {
  923. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  924. }
  925. return SUCCESS;
  926. }
  927. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  928. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  929. if (iter == memory_offset_.end()) {
  930. GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM);
  931. return FAILED;
  932. }
  933. for (auto &node : connect_netoutput_nodes) {
  934. GE_CHECK_NOTNULL(node);
  935. if (node->GetOpDesc() == nullptr) {
  936. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  937. continue;
  938. }
  939. // Atomic memory start addr
  940. int64_t original_atomic_mem_start = static_cast<int64_t>(iter->second.mem_offset_);
  941. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  942. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  943. vector<int64_t> mem_offset_end;
  944. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  945. GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
  946. return FAILED;
  947. }
  948. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  949. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) {
  950. GELOGE(FAILED, "Failed to set atomic attr separately.");
  951. return FAILED;
  952. }
  953. }
  954. return SUCCESS;
  955. }
  956. Status GraphMemoryAssigner::AssignReferenceMemory() {
  957. for (auto &node : compute_graph_->GetDirectNode()) {
  958. // Get the reference type of the node, default is false
  959. bool is_ref = false;
  960. // If GetBool fail, is_ref is false.
  961. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  962. if (!is_ref) {
  963. continue;
  964. }
  965. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  966. node->GetName().c_str());
  967. auto out_op_desc = node->GetOpDesc();
  968. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  969. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  970. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  971. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  972. out_op_desc->GetOutputsSize(), output_list.size());
  973. return ge::FAILED;
  974. }
  975. map<string, int> input_name_index;
  976. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  977. int index = out_op_desc->GetInputIndexByName(input_name);
  978. input_name_index.emplace(input_name, index);
  979. }
  980. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  981. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  982. auto iter = input_name_index.find(out_data_anchor_name);
  983. if (iter != input_name_index.end()) {
  984. int index = iter->second;
  985. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  986. iter->first.c_str(), out_data_anchor_name.c_str());
  987. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  988. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  989. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  990. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  991. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  992. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  993. GE_CHECK_NOTNULL(peer_out_op_desc);
  994. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  995. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  996. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  997. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  998. } else {
  999. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  1000. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  1001. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  1002. }
  1003. }
  1004. out_op_desc->SetOutputOffset(output_list);
  1005. }
  1006. return ge::SUCCESS;
  1007. }
  1008. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  1009. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  1010. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  1011. if (peer_out_data_anchor == nullptr) {
  1012. continue;
  1013. }
  1014. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  1015. if (peer_op_desc == nullptr) {
  1016. continue;
  1017. }
  1018. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  1019. (peer_op_desc->GetType() == VARIABLE)) {
  1020. GELOGE(ge::FAILED,
  1021. "The current node is %s, and the peer out node is %s. Currently, this scenario is not supported",
  1022. node->GetName().c_str(), peer_op_desc->GetName().c_str());
  1023. return false;
  1024. }
  1025. }
  1026. return true;
  1027. }
  1028. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  1029. auto op_desc = node->GetOpDesc();
  1030. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  1031. mem_offset_end.clear();
  1032. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  1033. vector<int64_t> atomic_output_index;
  1034. // If GetListInt fail, atomic_output_index is empty.
  1035. (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  1036. // Check atomic output
  1037. vector<int64_t> output_list = op_desc->GetOutputOffset();
  1038. if (atomic_output_index.size() > output_list.size()) {
  1039. GELOGE(ge::FAILED, "The size of atomic_output_index is more than the size of output_list");
  1040. return ge::FAILED;
  1041. }
  1042. auto output_list_size = static_cast<int64_t>(output_list.size());
  1043. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  1044. if (iter == memory_offset_.end()) {
  1045. GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM);
  1046. return FAILED;
  1047. }
  1048. for (auto &output_index : atomic_output_index) {
  1049. if (output_index >= output_list_size) {
  1050. GELOGE(ge::PARAM_INVALID, "The output index %ld is more than the size %ld of output_list.", output_index,
  1051. output_list_size);
  1052. return ge::PARAM_INVALID;
  1053. }
  1054. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  1055. bool is_assigned_mem = false;
  1056. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  1057. GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str());
  1058. return ge::FAILED;
  1059. }
  1060. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  1061. if (is_assigned_mem) {
  1062. GELOGI(
  1063. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  1064. "ReAssignContinuousMemory function.",
  1065. op_desc->GetName().c_str());
  1066. continue;
  1067. }
  1068. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  1069. int64_t size = 0;
  1070. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  1071. GELOGI("Get size failed");
  1072. }
  1073. output_list[output_index] = iter->second.mem_offset_;
  1074. GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld].",
  1075. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index,
  1076. iter->second.mem_offset_, op_desc->GetStreamId(), size, size);
  1077. iter->second.mem_offset_ += size;
  1078. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1079. mem_offset_end.emplace_back(iter->second.mem_offset_);
  1080. }
  1081. op_desc->SetOutputOffset(output_list);
  1082. return ge::SUCCESS;
  1083. }
  1084. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  1085. bool &is_mem_assigned) {
  1086. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  1087. GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index);
  1088. return ge::PARAM_INVALID;
  1089. }
  1090. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  1091. GE_CHECK_NOTNULL(out_data_anchor);
  1092. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  1093. for (auto &input_anchor : input_anchors) {
  1094. auto output_node = input_anchor->GetOwnerNode();
  1095. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  1096. /// has been assigned
  1097. vector<int64_t> atomic_input_index;
  1098. (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  1099. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  1100. is_mem_assigned = true;
  1101. break;
  1102. }
  1103. }
  1104. return SUCCESS;
  1105. }
  1106. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1107. map<string, map<int64_t, int64_t>> &workspace_info,
  1108. vector<int64_t> &mem_offset_end) {
  1109. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  1110. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1111. if (mem_type_iter == memory_offset_.end()) {
  1112. GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM);
  1113. return FAILED;
  1114. }
  1115. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  1116. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  1117. if (op_desc->GetName() != iter->first) {
  1118. GELOGE(ge::PARAM_INVALID, "The node name %s and the node name %s in workspace info are inconsistent.",
  1119. op_desc->GetName().c_str(), iter->first.c_str());
  1120. return ge::PARAM_INVALID;
  1121. }
  1122. if (iter->second.empty()) {
  1123. continue;
  1124. }
  1125. for (auto &info_iter : iter->second) {
  1126. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1127. auto workspace_size = info_iter.second;
  1128. if (workspace_index >= workspace_vector.size()) {
  1129. GELOGE(ge::PARAM_INVALID, "The workspace index %lu is more than the size %zu of workspace vector.",
  1130. workspace_index, workspace_vector.size());
  1131. return ge::PARAM_INVALID;
  1132. }
  1133. workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_;
  1134. GELOGI(
  1135. "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1136. "size[%ld] real_size[%ld].",
  1137. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index,
  1138. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size);
  1139. mem_type_iter->second.mem_offset_ += workspace_size;
  1140. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1141. }
  1142. }
  1143. op_desc->SetWorkspace(workspace_vector);
  1144. return SUCCESS;
  1145. }
  1146. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1147. map<string, map<int64_t, int64_t>> &workspace_info,
  1148. vector<int64_t> &mem_offset_end) {
  1149. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1150. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1151. if (mem_type_iter == memory_offset_.end()) {
  1152. GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM);
  1153. return FAILED;
  1154. }
  1155. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1156. for (auto &iter : workspace_info) {
  1157. if (iter.second.empty()) {
  1158. continue;
  1159. }
  1160. map<int64_t, int64_t> index_offset;
  1161. for (auto &info_iter : iter.second) {
  1162. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1163. auto workspace_size = info_iter.second;
  1164. size_t workspace_offset = mem_type_iter->second.mem_offset_;
  1165. GELOGI(
  1166. "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] "
  1167. "real_size[%ld].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index,
  1168. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size);
  1169. mem_type_iter->second.mem_offset_ += workspace_size;
  1170. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1171. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1172. }
  1173. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1174. }
  1175. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1176. GELOGE(FAILED, "Set EXT_ATTR_ATOMIC_WORKSPACE_OFFSET failed, op name:%s.", op_desc->GetName().c_str());
  1177. return FAILED;
  1178. }
  1179. return SUCCESS;
  1180. }
  1181. Status GraphMemoryAssigner::CheckOffset() {
  1182. std::map<std::string, std::string> anchor_to_symbol;
  1183. std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  1184. if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
  1185. GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str());
  1186. return FAILED;
  1187. }
  1188. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1189. GE_CHECK_NOTNULL(node->GetOpDesc());
  1190. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1191. for (auto input : input_list) {
  1192. if (input == ge::kInvalidOffset) {
  1193. GELOGE(FAILED, "Invalid offset in node: %s input: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1194. return FAILED;
  1195. }
  1196. }
  1197. bool need_update_output = false;
  1198. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1199. for (uint32_t i = 0; i < output_list.size(); ++i) {
  1200. if (output_list[i] == ge::kInvalidOffset) {
  1201. GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1202. return FAILED;
  1203. }
  1204. if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
  1205. auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
  1206. if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
  1207. output_list[i] = symbol_offset;
  1208. need_update_output = true;
  1209. }
  1210. }
  1211. }
  1212. if (need_update_output) {
  1213. node->GetOpDesc()->SetOutputOffset(output_list);
  1214. }
  1215. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1216. for (auto workspace : workspace_list) {
  1217. if (workspace == ge::kInvalidOffset) {
  1218. GELOGE(FAILED, "Invalid offset in node: %s workspace: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1219. return FAILED;
  1220. }
  1221. }
  1222. }
  1223. return SUCCESS;
  1224. }
  1225. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1226. if (memory_offset_.empty()) {
  1227. GELOGE(FAILED, "memory_offset_ is empty.");
  1228. return FAILED;
  1229. }
  1230. for (auto pair : memory_offset_) {
  1231. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memory type[%ld]", compute_graph_->GetName().c_str(),
  1232. pair.second.mem_offset_, pair.first);
  1233. }
  1234. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1235. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1236. GELOGE(ge::FAILED, "Update op input offset failed");
  1237. return ge::FAILED;
  1238. }
  1239. }
  1240. return ge::SUCCESS;
  1241. }
  1242. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1243. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1244. return node;
  1245. }
  1246. if (NodeUtils::IsDynamicShape(node)) {
  1247. return node;
  1248. }
  1249. return NodeUtils::GetParentInput(node);
  1250. }
  1251. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1252. uint32_t parent_index = 0;
  1253. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1254. return SUCCESS;
  1255. }
  1256. // Subgraph Data Node, check for constant input.
  1257. std::string op_type;
  1258. const auto &in_node = NodeUtils::GetParentInput(node);
  1259. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1260. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1261. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1262. return SUCCESS; // Constant input.
  1263. }
  1264. // Memory allocated for dynamic shape subgraph Data.
  1265. if (NodeUtils::IsDynamicShape(node)) {
  1266. return SUCCESS;
  1267. }
  1268. const auto &owner = node->GetOwnerComputeGraph();
  1269. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1270. const auto parent_inputs = parent_desc->GetInputOffset();
  1271. if (parent_inputs.size() <= parent_index) {
  1272. GELOGE(FAILED, "Get Parent input offset failed, node: %s, input size: %zu, parent index: %u",
  1273. node->GetName().c_str(), parent_inputs.size(), parent_index);
  1274. return FAILED;
  1275. }
  1276. input_list = {parent_inputs[parent_index]};
  1277. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1278. return SUCCESS;
  1279. }
  1280. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1281. vector<int64_t> origin_input_list;
  1282. vector<int64_t> memory_type;
  1283. auto tmp_op_desc = node->GetOpDesc();
  1284. origin_input_list = tmp_op_desc->GetInputOffset();
  1285. int64_t valid_input_index = 0;
  1286. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1287. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1288. vector<int64_t> output_list;
  1289. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1290. if (peer_out_anchor == nullptr) {
  1291. continue;
  1292. }
  1293. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1294. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1295. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1296. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1297. output_list = last_peer_out_op_desc->GetOutputOffset();
  1298. auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
  1299. if (output_list.size() > static_cast<size_t>(out_index)) {
  1300. int64_t input_offset = output_list.at(out_index);
  1301. if (has_mem_type_attr && !origin_input_list.empty()) {
  1302. auto input_size = tmp_op_desc->GetInputsSize();
  1303. auto ori_input_offset_list_size = origin_input_list.size();
  1304. auto mem_type_size = memory_type.size();
  1305. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1306. GELOGE(ge::FAILED,
  1307. "fusion: node[%s] input_size[%zu] diff from memory_type_size[%zu]"
  1308. " from ori_input_offset_list_size[%lu]",
  1309. tmp_op_desc->GetName().c_str(), input_size, mem_type_size, ori_input_offset_list_size);
  1310. return ge::FAILED;
  1311. }
  1312. // not hbm keep orignal inputoffest
  1313. // hbm inputoffset = original inputoffset + outputoffset
  1314. input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 ? origin_input_list[valid_input_index]
  1315. : origin_input_list[valid_input_index] + output_list.at(out_index));
  1316. }
  1317. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1318. if (in_node->GetType() == CONSTANT) {
  1319. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  1320. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
  1321. }
  1322. GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]",
  1323. has_mem_type_attr == true ? "Fusion" : "",
  1324. tmp_op_desc->GetName().c_str(),
  1325. valid_input_index,
  1326. peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(),
  1327. out_index,
  1328. input_offset);
  1329. input_list.emplace_back(input_offset);
  1330. valid_input_index++;
  1331. }
  1332. }
  1333. return ge::SUCCESS;
  1334. }
  1335. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1336. GE_CHECK_NOTNULL(node->GetOpDesc());
  1337. vector<int64_t> input_list;
  1338. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1339. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1340. vector<int64_t> output_list;
  1341. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1342. if (peer_out_anchor == nullptr) {
  1343. continue;
  1344. }
  1345. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1346. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1347. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1348. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1349. if (last_peer_out_node->GetType() != VARIABLE) {
  1350. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1351. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1352. output_list = last_peer_out_op_desc->GetOutputOffset();
  1353. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1354. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1355. }
  1356. } else {
  1357. vector<int64_t> cur_node_input_list;
  1358. auto cur_node_op_desc = node->GetOpDesc();
  1359. GE_CHECK_NOTNULL(cur_node_op_desc);
  1360. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1361. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1362. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1363. }
  1364. }
  1365. }
  1366. } else if (node->GetType() == DATA_TYPE) {
  1367. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1368. GELOGE(FAILED, "Update data: %s args offset failed.", node->GetName().c_str());
  1369. return FAILED;
  1370. }
  1371. } else {
  1372. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1373. GELOGE(FAILED, "Update node: %s input offset failed.", node->GetName().c_str());
  1374. return FAILED;
  1375. }
  1376. }
  1377. node->GetOpDesc()->SetInputOffset(input_list);
  1378. return SUCCESS;
  1379. }
  1380. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1381. const vector<int64_t> &mem_offset_end) {
  1382. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1383. // Parsing offset and size vectors
  1384. vector<int64_t> memory_offset_start;
  1385. vector<int64_t> memory_offset_size;
  1386. memory_offset_start.emplace_back(atomic_mem_start);
  1387. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1388. memory_offset_start.emplace_back(mem_offset_end[i]);
  1389. // Number 1 means element index
  1390. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1391. memory_offset_size.emplace_back(size);
  1392. }
  1393. memory_offset_start.pop_back();
  1394. const auto &in_control_anchor = node->GetInControlAnchor();
  1395. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1396. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1397. if (peer_out_control_anchor == nullptr) {
  1398. continue;
  1399. }
  1400. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1401. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1402. if (peer_out_node_desc == nullptr) {
  1403. continue;
  1404. }
  1405. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1406. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1407. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1408. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) {
  1409. GELOGE(FAILED, "Set atomic clean attr failed.");
  1410. return FAILED;
  1411. }
  1412. }
  1413. }
  1414. }
  1415. return SUCCESS;
  1416. }
  1417. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
  1418. const vector<int64_t> &atomic_mem_size) {
  1419. auto node_op_desc = node->GetOpDesc();
  1420. if (node_op_desc != nullptr) {
  1421. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1422. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1423. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1424. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1425. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1426. node_op_desc->SetWorkspace(workspace_vector);
  1427. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1428. std::vector<int64_t> mem_start_vector;
  1429. // If GetListInt fail, mem_start_vector is empty.
  1430. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1431. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1432. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1433. GELOGE(FAILED, "SetListInt failed.");
  1434. return FAILED);
  1435. std::vector<int64_t> mem_size_vector;
  1436. // If GetListInt fail, mem_size_vector is empty.
  1437. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1438. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1439. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1440. GELOGE(FAILED, "SetListInt failed.");
  1441. return FAILED);
  1442. std::stringstream ss;
  1443. for (auto iter : atomic_mem_start) {
  1444. ss << iter << " ";
  1445. }
  1446. string atomic_mem_start_str = ss.str();
  1447. ss.clear();
  1448. ss.str("");
  1449. for (auto iter : atomic_mem_size) {
  1450. ss << iter << " ";
  1451. }
  1452. string atomic_mem_size_str = ss.str();
  1453. GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
  1454. node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1455. atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
  1456. }
  1457. return SUCCESS;
  1458. }
  1459. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) {
  1460. if (mem_align_size <= 0) {
  1461. return;
  1462. }
  1463. auto iter = memory_offset_.find(memory_type);
  1464. if (iter == memory_offset_.end()) {
  1465. GELOGW("Memory offset don't have memory type[%ld].", memory_type);
  1466. return;
  1467. }
  1468. iter->second.mem_offset_ =
  1469. (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1470. }
  1471. ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nodes, int32_t mem_reuse_model,
  1472. int64_t &memory_type) {
  1473. memory_type = RT_MEMORY_HBM;
  1474. // In the dynamic batch scenario, the memory attributes of nodes are the same.
  1475. for (auto &n : nodes) {
  1476. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  1477. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.")
  1478. break;
  1479. }
  1480. if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  1481. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed.");
  1482. break;
  1483. }
  1484. }
  1485. return SUCCESS;
  1486. }
  1487. ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) {
  1488. memory_type = RT_MEMORY_HBM;
  1489. vector<int64_t> mem_type_list;
  1490. if (input_or_output == "input") {
  1491. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list);
  1492. }
  1493. if (input_or_output == "output") {
  1494. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list);
  1495. }
  1496. if (mem_type_list.empty()) {
  1497. if (memory_offset_.find(memory_type) == memory_offset_.end()) {
  1498. GELOGE(FAILED, "Memory offset map does not have memory type[%ld].", memory_type);
  1499. return FAILED;
  1500. }
  1501. return SUCCESS;
  1502. }
  1503. if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) {
  1504. GELOGE(FAILED, "The size[%zu] of mem type list is not equal to the size of in data anchor[%u].",
  1505. mem_type_list.size(), node->GetAllInDataAnchorsSize());
  1506. return FAILED;
  1507. }
  1508. if (!CheckContinuousMemType(mem_type_list)) {
  1509. GELOGE(FAILED, "Check continuous memory type failed.");
  1510. return FAILED;
  1511. }
  1512. // It is continuous memory and memory type is the same, so use the first memory.
  1513. memory_type = mem_type_list[0];
  1514. return SUCCESS;
  1515. }
  1516. bool GraphMemoryAssigner::CheckContinuousMemType(vector<int64_t> mem_type_list) {
  1517. if (mem_type_list.size() == 0) {
  1518. return true;
  1519. }
  1520. int64_t mem_type_tmp = mem_type_list[0];
  1521. for (auto mem_type : mem_type_list) {
  1522. if (mem_type != mem_type_tmp) {
  1523. GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].",
  1524. mem_type_tmp, mem_type);
  1525. return false;
  1526. }
  1527. }
  1528. if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) {
  1529. GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp);
  1530. return false;
  1531. }
  1532. return true;
  1533. }
  1534. void GraphMemoryAssigner::PrintMemoryOffset() {
  1535. for (auto pair : memory_offset_) {
  1536. // Assign memory of max batch nodes that have the same batch label.
  1537. GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.",
  1538. pair.first, pair.second.mem_offset_);
  1539. }
  1540. }
  1541. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示