You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 84 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "framework/common/debug/log.h"
  23. #include "graph/build/memory/hybrid_mem_assigner.h"
  24. #include "graph/build/memory/var_mem_assign_util.h"
  25. #include "graph/build/memory/block_mem_assigner.h"
  26. #include "graph/common/omg_util.h"
  27. #include "graph/debug/ge_attr_define.h"
  28. #include "graph/ge_attr_value.h"
  29. #include "graph/manager/graph_var_manager.h"
  30. #include "graph/utils/tensor_utils.h"
  31. #include "graph/utils/type_utils.h"
  32. namespace {
  33. const int kAllInputAddrIsAtomic = -1;
  34. const int kVirtualInputNodeMemoryReuse = 0;
  35. const int kVirtualOutputNodeMemoryReuse = 1;
  36. // One state per bit cannot be repeated
  37. enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 };
  38. int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
  39. const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
  40. const ge::NodePtr &node, const uint32_t i) {
  41. ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
  42. auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
  43. if (iter1 == anchor_to_symbol.end()) {
  44. return ge::kInvalidOffset;
  45. }
  46. auto out_symbol = iter1->second;
  47. auto iter2 = symbol_to_anchors.find(out_symbol);
  48. if (iter2 == symbol_to_anchors.end()) {
  49. return ge::kInvalidOffset;
  50. }
  51. for (const auto &node_index_io : iter2->second) {
  52. if (node_index_io.value_ == out_symbol) {
  53. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  54. vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
  55. if (node_index_io.index_ >= symbol_output_list.size()) {
  56. return ge::kInvalidOffset;
  57. }
  58. GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
  59. output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
  60. return symbol_output_list.at(node_index_io.index_);
  61. }
  62. }
  63. return ge::kInvalidOffset;
  64. }
  65. } // namespace
  66. namespace ge {
  67. Status VariableMemoryAssigner::Assign() {
  68. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  69. if (result != ge::SUCCESS) {
  70. return result;
  71. }
  72. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  73. if (result != ge::SUCCESS) {
  74. return result;
  75. }
  76. return ge::SUCCESS;
  77. }
  78. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  79. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  80. if (result != ge::SUCCESS) {
  81. return result;
  82. }
  83. return ge::SUCCESS;
  84. }
  85. Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() {
  86. Status result = ge::VarMemAssignUtil::AssignMemory2HasRefAttrNode(compute_graph_);
  87. if (result != ge::SUCCESS) {
  88. return result;
  89. }
  90. return ge::SUCCESS;
  91. }
  92. Status GraphMemoryAssigner::AssignMemory() {
  93. ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_));
  94. if (mem_assigner->Assign() != ge::SUCCESS) {
  95. GELOGE(ge::FAILED, "[Assign][GraphMem]graph_id:%u, graph_name:%s",
  96. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  97. return ge::FAILED;
  98. }
  99. MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  100. memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
  101. if (mem_assigner->GetP2PMemOffset() >= 0) {
  102. MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
  103. memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  104. }
  105. auto session_id = compute_graph_->GetSessionID();
  106. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  107. auto variable_assigner =
  108. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  109. if (variable_assigner == nullptr) {
  110. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  111. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  112. REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, "
  113. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  114. return ge::FAILED;
  115. }
  116. if (variable_assigner->Assign() != ge::SUCCESS) {
  117. return ge::FAILED;
  118. }
  119. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  120. GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  121. mem_assigner_ = std::move(mem_assigner);
  122. return ge::SUCCESS;
  123. }
  124. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  125. auto variable_assigner =
  126. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  127. if (variable_assigner == nullptr) {
  128. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  129. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  130. REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, "
  131. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  132. return ge::FAILED;
  133. }
  134. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  135. return ge::FAILED;
  136. }
  137. return ge::SUCCESS;
  138. }
  139. ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() {
  140. auto variable_assigner =
  141. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  142. if (variable_assigner == nullptr) {
  143. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  144. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  145. REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, "
  146. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  147. }
  148. if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) {
  149. return ge::FAILED;
  150. }
  151. return ge::SUCCESS;
  152. }
  153. ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  154. int64_t dim_index, int64_t &output_mem_size,
  155. int64_t &batch_dim_num, int64_t &out_size) {
  156. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  157. if (graph_status != GRAPH_SUCCESS) {
  158. GELOGE(FAILED, "[Get][TensorSize]");
  159. REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory");
  160. return FAILED;
  161. }
  162. GeShape output_shape = output_desc->GetShape();
  163. std::vector<int64_t> output_dims = output_shape.GetDims();
  164. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  165. REPORT_INNER_ERROR("E19999", "Inner param dim_index value:%ld invalid, bigger than dim size:%lu in shape:%s",
  166. dim_index, output_dims.size(), output_shape.ToString().c_str());
  167. GELOGE(FAILED, "[Check][Param:dim_index]value:%ld invalid, bigger than dim size:%lu in shape:%s",
  168. dim_index, output_dims.size(), output_shape.ToString().c_str());
  169. return FAILED;
  170. }
  171. for (int64_t index = 0; index < dim_index; index++) {
  172. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  173. batch_dim_num *= output_dims[index];
  174. output_dims[index] = 1;
  175. }
  176. output_shape = GeShape(output_dims);
  177. Format out_format = output_desc->GetFormat();
  178. DataType data_type = output_desc->GetDataType();
  179. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  180. if (graph_status != GRAPH_SUCCESS) {
  181. GELOGE(graph_status, "[Calc][TensorSize]");
  182. return FAILED;
  183. }
  184. if (output_mem_size < 0) {
  185. REPORT_INNER_ERROR("E19999", "After calculating, tensor memory size:%ld invalid, less than 0. "
  186. "shape:%s, format:%s, dtype:%s, maybe has dynamic shape",
  187. output_mem_size,
  188. output_shape.ToString().c_str(),
  189. TypeUtils::FormatToSerialString(out_format).c_str(),
  190. TypeUtils::DataTypeToSerialString(data_type).c_str());
  191. GELOGE(FAILED, "[Check][TensorSize]value:%ld invalid after calc, less than 0. shape:%s, format:%s, dtype:%s, "
  192. "maybe has dynamic shape",
  193. output_mem_size,
  194. output_shape.ToString().c_str(),
  195. TypeUtils::FormatToSerialString(out_format).c_str(),
  196. TypeUtils::DataTypeToSerialString(data_type).c_str());
  197. return FAILED;
  198. }
  199. return SUCCESS;
  200. }
  201. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) {
  202. if (memory_offset_.empty()) {
  203. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when ReAssignMemory, "
  204. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  205. GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, "
  206. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  207. return ge::FAILED;
  208. }
  209. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
  210. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
  211. size_t total_mem_offset = 0;
  212. for (auto pair : memory_offset_) {
  213. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  214. total_mem_offset += pair.second.mem_offset_;
  215. }
  216. auto session_id = compute_graph_->GetSessionID();
  217. if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  218. GELOGE(ge::FAILED, "[Check][TotalMemOffset] %zu is greater than memory manager malloc max size %zu, "
  219. "graph_id:%u, graph_name:%s, reduce your batchsize or scale your model may solve problem",
  220. total_mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize(),
  221. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  222. for (auto iter : mem_type_to_offset) {
  223. ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"},
  224. {std::to_string(iter.first), std::to_string(iter.second), "featuremap",
  225. std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())});
  226. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  227. iter.second, iter.first);
  228. }
  229. return ge::FAILED;
  230. }
  231. return SUCCESS;
  232. }
  233. Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
  234. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  235. if (priority_assigner == nullptr) {
  236. REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected when AssignZeroCopyMemory, "
  237. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  238. GELOGE(FAILED, "[Check][InnerData:priority_assigner]nullptr is invalid, "
  239. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  240. return ge::FAILED;
  241. }
  242. size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM];
  243. // set offset for zero copy block
  244. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  245. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  246. continue;
  247. }
  248. memory_block->Resize();
  249. memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]);
  250. mem_offset[RT_MEMORY_HBM] += memory_block->Size();
  251. memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1);
  252. }
  253. // set offset for zero copy nodes
  254. priority_assigner->SetOpMemOffset(true);
  255. zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp;
  256. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  257. if (iter == memory_offset_.end()) {
  258. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  259. "not expected when AssignZeroCopyMemory, "
  260. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  261. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  262. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  263. return FAILED;
  264. }
  265. iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM];
  266. GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
  267. zero_mem_copy_size);
  268. return SUCCESS;
  269. }
  270. uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) {
  271. if (op_desc == nullptr) {
  272. return 0;
  273. };
  274. bool is_continuous = false;
  275. uint32_t continuous_type = 0;
  276. // If GetBool fail, is_continuous is false.
  277. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous);
  278. if (is_continuous) {
  279. continuous_type |= kTypeInput;
  280. } else {
  281. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous);
  282. if (is_continuous) {
  283. bool attr_reuse = false;
  284. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  285. if (attr_reuse) {
  286. continuous_type |= kTypeInputNoPadding;
  287. }
  288. }
  289. }
  290. is_continuous = false;
  291. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous);
  292. if (is_continuous) {
  293. continuous_type |= kTypeOutput;
  294. } else {
  295. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous);
  296. if (is_continuous) {
  297. bool attr_reuse = false;
  298. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  299. if (attr_reuse) {
  300. continuous_type |= kTypeOutputNoPadding;
  301. }
  302. }
  303. }
  304. if (continuous_type != 0) {
  305. GELOGI("[Get][MemType:Continuous]Current node %s, value is %d", op_desc->GetName().c_str(), continuous_type);
  306. }
  307. return continuous_type;
  308. }
  309. Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type,
  310. int64_t &tensor_size, int64_t &nopadding_size) {
  311. if ((op_desc == nullptr) || (output_desc == nullptr)) {
  312. REPORT_INNER_ERROR("E19999", "InnerData param op_desc or output_desc is nullptr, "
  313. "not expected when GetMemorySize");
  314. GELOGE(FAILED, "[Check][Param]op_desc or output_desc is nullptr");
  315. }
  316. tensor_size = 0;
  317. nopadding_size = 0;
  318. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  319. if (is_nopadding) {
  320. int64_t attr_dim_index;
  321. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  322. if (!get_attr_dim_flag) {
  323. REPORT_INNER_ERROR("E19999", "Get Attr:%s failed when GetMemorySize, op_name:%s",
  324. ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str());
  325. GELOGE(FAILED, "[Get][Attr:%s]fail for op_name:%s",
  326. ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str());
  327. return FAILED;
  328. }
  329. // Calculate tensor real size of each piece of data and out size of complete data
  330. int64_t batch_dim_num = 1;
  331. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) !=
  332. SUCCESS) {
  333. REPORT_CALL_ERROR("E19999", "CalculateTensorRealSizeAndOutSize failed, attr_dim_index:%ld, op_name:%s",
  334. attr_dim_index, op_desc->GetName().c_str());
  335. GELOGE(FAILED, "[Calculate][NopaddingSize]failed for node %s, attr_dim_index:%ld",
  336. op_desc->GetName().c_str(), attr_dim_index);
  337. return FAILED;
  338. }
  339. } else {
  340. if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) {
  341. REPORT_INNER_ERROR("E19999", "Get Tensor Size failed, op_name:%s", op_desc->GetName().c_str());
  342. GELOGE(FAILED, "[Get][TensorSize]failed in padding case, op_name:%s", op_desc->GetName().c_str());
  343. return FAILED;
  344. }
  345. }
  346. if ((tensor_size < 0) || (nopadding_size < 0)) {
  347. REPORT_INNER_ERROR("E19999", "GetMemorySize fail, "
  348. "tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s",
  349. tensor_size, nopadding_size, op_desc->GetName().c_str());
  350. GELOGE(FAILED, "[Get][MemorySize]tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s",
  351. tensor_size, nopadding_size, op_desc->GetName().c_str());
  352. return FAILED;
  353. }
  354. return SUCCESS;
  355. }
  356. void AlignMemOffset(int64_t &mem_align_size) {
  357. if (mem_align_size <= 0) {
  358. return;
  359. }
  360. mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  361. }
  362. bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) {
  363. bool is_peer_output_continuous = false;
  364. // If GetBool fail, is_peer_output_continuous is false.
  365. (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  366. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  367. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  368. // conflict between the two, we can not support it.
  369. auto peer_output_size = peer_op_desc->GetOutputsSize();
  370. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  371. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  372. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  373. " requires continuous output. There may be conflict between the two." +
  374. "This node is not supported now.";
  375. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  376. return true;);
  377. bool is_peer_reference = false;
  378. // If GetBool fail, is_peer_reference is false.
  379. (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  380. GE_IF_BOOL_EXEC(is_peer_reference,
  381. std::string warning = "[Check][Continuous]Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  382. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  383. " is ref. There may be conflict between the two.";
  384. GELOGW("%s", warning.c_str());
  385. return false;);
  386. return false;
  387. }
  388. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  389. Status ret;
  390. // Stored nodes which need assign continuous input memory in `reverse topo order`
  391. std::vector<NodePtr> nodes_stack;
  392. std::map<NodePtr, uint32_t> node_2_continuous_type;
  393. // Traverse nodes
  394. for (auto &node : compute_graph_->GetAllNodes()) {
  395. GE_CHECK_NOTNULL(node);
  396. uint32_t continuous_type;
  397. auto iter = node_2_continuous_type.find(node);
  398. if (iter == node_2_continuous_type.end()) {
  399. continuous_type = GetContinuousMemoryType(node->GetOpDesc());
  400. node_2_continuous_type.emplace(node, continuous_type);
  401. } else {
  402. continuous_type = iter->second;
  403. }
  404. // Assign continuous input memory
  405. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  406. if (continuous_input) {
  407. if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) {
  408. GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type),
  409. "[Assign][Memory:Continuous:Input]fail for node:%s", node->GetName().c_str())
  410. } else {
  411. nodes_stack.push_back(node);
  412. }
  413. }
  414. // Assign continuous output memory
  415. int64_t memory_type = RT_MEMORY_HBM;
  416. bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  417. if (continuous_output) {
  418. GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"),
  419. "[Get][MemType]fail for node:%s", node->GetName().c_str());
  420. ret = AssignContinuousOutputMemory(node, memory_type, continuous_type);
  421. if (ret != ge::SUCCESS) {
  422. GELOGE(ret, "[Assign][Memory:Continuous:Ouput]fail for node:%s", node->GetName().c_str());
  423. return ret;
  424. }
  425. }
  426. }
  427. // Assign continuous input memory in `reverse topo order` which stored before
  428. while (!nodes_stack.empty()){
  429. auto node = nodes_stack.back();
  430. nodes_stack.pop_back();
  431. auto iter = node_2_continuous_type.find(node);
  432. if (iter == node_2_continuous_type.end()) {
  433. REPORT_INNER_ERROR("E19999", "Inner data error when process continuous memory alloc for node:%s, "
  434. "but has no continuous type", node->GetName().c_str());
  435. GELOGE(FAILED, "[Get][ContinuousType] find fail for node:%s", node->GetName().c_str());
  436. return FAILED;
  437. }
  438. GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true),
  439. "[Assign][Memory:Continuous:Input]fail for node:%s.", node->GetName().c_str())
  440. }
  441. for (auto pair : memory_offset_) {
  442. GELOGD("[Reassign][Memory:Continuous]At last, memory type = %ld, mem offset = %zu", pair.first,
  443. pair.second.mem_offset_);
  444. }
  445. return ge::SUCCESS;
  446. }
  447. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  448. int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) {
  449. GELOGI("[Assign][Memory:Input:Continuous]start for Current node %s", node->GetName().c_str());
  450. auto iter = memory_offset_.find(memory_type);
  451. if (iter == memory_offset_.end()) {
  452. REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, "
  453. "when assign continuous input memory for node:%s, ", memory_type, node->GetName().c_str());
  454. GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s",
  455. memory_type, node->GetName().c_str());
  456. return FAILED;
  457. }
  458. // The head and tail of hcom continuous input should be added 512
  459. iter->second.mem_offset_ += MEM_ALIGN_SIZE;
  460. continuous_mem_start = iter->second.mem_offset_;
  461. int64_t mem_offset = iter->second.mem_offset_;
  462. int64_t extra_memory_size = 0;
  463. bool is_continuous_input_allocated = false;
  464. auto op_desc = node->GetOpDesc();
  465. GE_CHECK_NOTNULL(op_desc);
  466. vector<int64_t> output_list_this = op_desc->GetOutputOffset();
  467. if (output_list_this.empty()) {
  468. REPORT_INNER_ERROR("E19999", "No output offset in node :%s, not expected when assign continuous input memory",
  469. node->GetName().c_str());
  470. GELOGE(FAILED, "[Get][OutputOffset] empty is invalid, node:%s", node->GetName().c_str());
  471. return FAILED;
  472. }
  473. (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated);
  474. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  475. GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue);
  476. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  477. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  478. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  479. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  480. GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;);
  481. int64_t tensor_desc_size = 0;
  482. int64_t nopadding_size = 0;
  483. int64_t real_size = 0;
  484. std::vector<int64_t> offsets_of_fusion = {};
  485. bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion);
  486. lx_fusion = lx_fusion && !offsets_of_fusion.empty();
  487. if (lx_fusion) {
  488. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) {
  489. std::string error = "fusion: peer node:" + FmtToStr(peer_op_desc->GetName()) +
  490. " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) +
  491. " is out of range:" + FmtToStr(offsets_of_fusion.size());
  492. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  493. return FAILED;
  494. }
  495. nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()];
  496. tensor_desc_size = nopadding_size;
  497. } else {
  498. if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()),
  499. continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  500. return FAILED;
  501. }
  502. }
  503. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion;
  504. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  505. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) {
  506. std::string error = "peer node:" + FmtToStr(peer_op_desc->GetName()) +
  507. " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) +
  508. " is out of range:" + FmtToStr(output_list.size());
  509. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  510. return FAILED;
  511. }
  512. // when continuous input has been allocated first input is beginning offset
  513. bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0);
  514. if (is_allocated_first_input) {
  515. std::map<int32_t, int32_t> out2ins;
  516. GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "[Get][AllRef]fail for node: %s", node->GetName().c_str());
  517. // output is beginning offset, set offset for input; only support this case now
  518. if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) {
  519. auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx());
  520. output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first);
  521. peer_op_desc->SetOutputOffset(output_list);
  522. GELOGI("[Update][Offset]Node %s out %d ref in %d input node %s, use output offset %ld update %ld",
  523. node->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second,
  524. peer_op_desc->GetName().c_str(), output_list_this.at(out2ins.begin()->first), peer_output_offset);
  525. } else {
  526. GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(),
  527. out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size());
  528. }
  529. // first input is beginning offset
  530. mem_offset = output_list.at(peer_out_data_anchor->GetIdx());
  531. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  532. } else {
  533. // set offset for input
  534. output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset;
  535. peer_op_desc->SetOutputOffset(output_list);
  536. }
  537. int64_t align_size = tensor_desc_size;
  538. if (is_nopadding) {
  539. mem_offset += nopadding_size;
  540. extra_memory_size += (tensor_desc_size - nopadding_size);
  541. real_size = nopadding_size;
  542. } else {
  543. ge::AlignMemOffset(align_size);
  544. mem_offset += align_size;
  545. // The head and tail of hcom continuous input should be added 512
  546. extra_memory_size = MEM_ALIGN_SIZE;
  547. real_size = tensor_desc_size;
  548. }
  549. GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
  550. "size[%zu] realsize[%ld] nopadding size[%d]", node->GetOwnerComputeGraph()->GetName().c_str(),
  551. peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
  552. output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
  553. is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
  554. }
  555. mem_offset += extra_memory_size;
  556. ge::AlignMemOffset(mem_offset);
  557. continuous_mem_size = mem_offset - continuous_mem_start;
  558. if (is_continuous_input_allocated) {
  559. // not allocate memory here, so no need add 512 in header
  560. iter->second.mem_offset_ -= MEM_ALIGN_SIZE;
  561. } else {
  562. iter->second.mem_offset_ = mem_offset;
  563. }
  564. return SUCCESS;
  565. }
  566. Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) {
  567. auto in_data_anchor_list = node->GetAllInDataAnchors();
  568. if (in_data_anchor_list.empty()) {
  569. REPORT_INNER_ERROR("E19999", "InAnchor list empty in node:%s, not expect when GetFirstInputPeerOutOutputOffset",
  570. node->GetName().c_str());
  571. GELOGE(FAILED, "[Get][InAnchor]empty is invalid, node:%s", node->GetName().c_str());
  572. return FAILED;
  573. }
  574. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  575. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr,
  576. REPORT_INNER_ERROR("E19999", "PeerAcnhor is null, "
  577. "not expect when GetFirstInputPeerOutOutputOffset for node:%s",
  578. node->GetName().c_str());
  579. GELOGE(ge::FAILED, "[Check][PeerAnchor]null is invalid, node:%s", node->GetName().c_str());
  580. return ge::FAILED);
  581. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  582. GE_IF_BOOL_EXEC(peer_op_desc == nullptr,
  583. REPORT_INNER_ERROR("E19999", "PeerOpDesc is null, "
  584. "not expect when GetFirstInputPeerOutOutputOffset for node:%s",
  585. node->GetName().c_str());
  586. GELOGE(ge::FAILED, "[Check][PeerOpDesc]null is invalid, node:%s", node->GetName().c_str());
  587. return ge::FAILED);
  588. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  589. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  590. REPORT_INNER_ERROR("E19999", "PeerAnchorIndex:%d bigger than in_offset size:%lu, "
  591. "judge invalid when GetFirstInputPeerOutOutputOffset for node:%s",
  592. peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str());
  593. GELOGE(FAILED, "[Check][Index:PeerOutDataAnchor]PeerIndex:%d bigger than in_offset size:%lu, node:%s",
  594. peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str());
  595. return FAILED;
  596. }
  597. mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx());
  598. return SUCCESS;
  599. }
  600. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type,
  601. uint32_t continuous_type) {
  602. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  603. auto out_op_desc = node->GetOpDesc();
  604. GE_IF_BOOL_EXEC(out_op_desc == nullptr,
  605. REPORT_INNER_ERROR("E19999", "OpDesc is null, "
  606. "not expect when AssignContinuousOutputMemory for node:%s",
  607. node->GetName().c_str());
  608. GELOGE(ge::FAILED, "[Check][OpDesc]null is invalid, node:%s", node->GetName().c_str()));
  609. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  610. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  611. REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, invalid in node:%s, "
  612. "when AssignContinuousOutputMemory",
  613. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  614. GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
  615. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  616. return ge::FAILED;
  617. }
  618. int64_t mem_offset = 0;
  619. bool is_nopadding = ((continuous_type & kTypeOutputNoPadding) != 0);
  620. if (is_nopadding) {
  621. // out tensor memory must be reused input tensor memory
  622. if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) {
  623. return ge::FAILED;
  624. }
  625. } else {
  626. // Get the reference type of the node, default is false
  627. bool is_ref = false;
  628. // If GetBool fail, is_ref is false.
  629. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  630. // If the output is ref type and refers to the ref of an input, the name of the output
  631. // and the input are the same. Ge encounters ref type, finds matching relationship according
  632. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  633. if (is_ref) {
  634. GELOGI("Current node %s no needs assign continuous output because reference input by name.",
  635. node->GetName().c_str());
  636. return SUCCESS;
  637. }
  638. mem_offset = output_list[0];
  639. }
  640. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  641. output_list[out_data_anchor->GetIdx()] = mem_offset;
  642. int64_t tensor_desc_size = 0;
  643. int64_t nopadding_size = 0;
  644. if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type,
  645. tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  646. return FAILED;
  647. }
  648. if (is_nopadding) {
  649. mem_offset += nopadding_size;
  650. } else {
  651. mem_offset += tensor_desc_size;
  652. ge::AlignMemOffset(mem_offset);
  653. }
  654. GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]"
  655. " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
  656. out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(),
  657. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL,
  658. is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding);
  659. }
  660. out_op_desc->SetOutputOffset(output_list);
  661. return ge::SUCCESS;
  662. }
  663. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  664. // key:dynamic batch, batch name
  665. map<string, map<NodePtr, vector<NodePtr>>> normal_atomic_and_clean_nodes_map;
  666. map<string, vector<NodePtr>> connecting_output_atomic_nodes;
  667. Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  668. if (status != SUCCESS) {
  669. GELOGE(status, "[Filter][AtomicNode]failed in graph_id:%u, graph_name:%s",
  670. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  671. return status;
  672. }
  673. auto mem_iter = memory_offset_.find(RT_MEMORY_HBM);
  674. if (mem_iter == memory_offset_.end()) {
  675. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  676. "not expected when ReAssignAtomicMemory, "
  677. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  678. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  679. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  680. return FAILED;
  681. }
  682. int64_t batch_atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  683. int64_t batch_max_mem_offset = batch_atomic_mem_start;
  684. for (auto &iter_batch : normal_atomic_and_clean_nodes_map) {
  685. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  686. for (auto &iter : iter_batch.second) {
  687. int64_t atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  688. GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
  689. for (auto &atomic_node : iter.second) {
  690. vector<int64_t> mem_offset_end;
  691. status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
  692. if (status != SUCCESS) {
  693. GELOGE(status, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.",
  694. atomic_node->GetName().c_str());
  695. return status;
  696. }
  697. }
  698. int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start;
  699. if (atomic_mem_size != 0) {
  700. GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM),
  701. "[Set][Attr]fail for atomic addr clean node %s.", iter.first->GetName().c_str());
  702. }
  703. }
  704. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  705. }
  706. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  707. batch_atomic_mem_start = batch_max_mem_offset;
  708. for (auto &iter_batch : connecting_output_atomic_nodes) {
  709. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  710. if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) {
  711. GELOGE(FAILED, "[Assign][Memory]for nodes that connect to netoutput failed."
  712. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  713. return FAILED;
  714. }
  715. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  716. }
  717. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  718. return SUCCESS;
  719. }
  720. Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(
  721. map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
  722. map<string, vector<NodePtr>> &connecting_output_atomic_nodes) {
  723. GE_CHECK_NOTNULL(compute_graph_);
  724. for (const auto &node : compute_graph_->GetAllNodes()) {
  725. if (node->GetType() == ATOMICADDRCLEAN) {
  726. map<string, vector<NodePtr>> tmp_normal_atomic_nodes;
  727. const auto &out_control_anchor = node->GetOutControlAnchor();
  728. GE_CHECK_NOTNULL(out_control_anchor);
  729. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  730. if (peer_in_control_anchor != nullptr) {
  731. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  732. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  733. if (peer_in_node_desc != nullptr) {
  734. bool is_atomic_node = false;
  735. // If GetBool fail, is_atomic_node is false.
  736. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  737. if (is_atomic_node) {
  738. bool is_reference = false;
  739. // If GetBool fail, is_reference is false.
  740. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
  741. if (is_reference) {
  742. REPORT_INNER_ERROR("E19999", "Op:%s cannot have both atomic and is_reference attribute, "
  743. "not support now", peer_in_node_desc->GetName().c_str());
  744. GELOGE(FAILED, "[Check][Attr]Op:%s cannot have both atomic and is_reference attribute, "
  745. "not support now", peer_in_node_desc->GetName().c_str());
  746. return ge::PARAM_INVALID;
  747. }
  748. std::string batch_label;
  749. (void)ge::AttrUtils::GetStr(peer_in_node_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  750. vector<int> is_connecting_output;
  751. // If GetBool fail, attr is_connecting_output is an empty vector.
  752. (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
  753. if (is_connecting_output.empty()) {
  754. tmp_normal_atomic_nodes[batch_label].emplace_back(peer_in_node);
  755. continue;
  756. }
  757. connecting_output_atomic_nodes[batch_label].emplace_back(peer_in_node);
  758. tmp_normal_atomic_nodes[batch_label].clear();
  759. break;
  760. }
  761. }
  762. }
  763. }
  764. for (auto &it_atomic_node : tmp_normal_atomic_nodes) {
  765. if (!it_atomic_node.second.empty()) {
  766. normal_atomic_nodes_map[it_atomic_node.first][node] = it_atomic_node.second;
  767. }
  768. }
  769. }
  770. }
  771. return SUCCESS;
  772. }
  773. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  774. vector<int64_t> &mem_offset_end) {
  775. auto node_op_desc = node->GetOpDesc();
  776. // Assign atomic node output memory
  777. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  778. if (ret != SUCCESS) {
  779. GELOGE(ret, "[Assign][Memory:Ouput:Atomic]Failed for node:%s.", node_op_desc->GetName().c_str());
  780. return ret;
  781. }
  782. // Check and assign atomic node workspace memory
  783. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  784. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  785. if (!atomic_workspace_info.empty()) {
  786. bool is_fusion_node = false;
  787. // If GetBool fail, is_fusion_node is false.
  788. (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  789. if (is_fusion_node) {
  790. // Assign fusion atomic node workspace memory
  791. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  792. } else {
  793. // Assign single ordinary atomic node workspace memory, not include fusion node
  794. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  795. }
  796. if (ret != SUCCESS) {
  797. GELOGE(ret, "[Assign][Memory:Atomic:Workspace]fail for node:%s.", node_op_desc->GetName().c_str());
  798. return ret;
  799. }
  800. } else {
  801. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  802. }
  803. return SUCCESS;
  804. }
  805. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  806. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  807. if (iter == memory_offset_.end()) {
  808. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  809. "not expected when AssignConnectNetOutputAtomicMemory, "
  810. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  811. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  812. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  813. return FAILED;
  814. }
  815. for (auto &node : connect_netoutput_nodes) {
  816. GE_CHECK_NOTNULL(node);
  817. if (node->GetOpDesc() == nullptr) {
  818. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  819. continue;
  820. }
  821. // Atomic memory start addr
  822. int64_t original_atomic_mem_start = static_cast<int64_t>(iter->second.mem_offset_);
  823. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  824. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  825. vector<int64_t> mem_offset_end;
  826. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  827. GELOGE(FAILED, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.",
  828. node->GetName().c_str());
  829. return FAILED;
  830. }
  831. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  832. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) {
  833. GELOGE(FAILED, "[Set][Attr:IndependentAtomic]fail for node:%s", node->GetName().c_str());
  834. return FAILED;
  835. }
  836. }
  837. return SUCCESS;
  838. }
  839. Status GraphMemoryAssigner::AssignReferenceMemory() {
  840. for (auto &node : compute_graph_->GetDirectNode()) {
  841. // Get the reference type of the node, default is false
  842. bool is_ref = false;
  843. // If GetBool fail, is_ref is false.
  844. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  845. if (!is_ref) {
  846. continue;
  847. }
  848. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  849. node->GetName().c_str());
  850. auto out_op_desc = node->GetOpDesc();
  851. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  852. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  853. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  854. REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s "
  855. "when AssignReferenceMemory",
  856. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  857. GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
  858. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  859. return ge::FAILED;
  860. }
  861. map<string, int> input_name_index;
  862. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  863. int index = out_op_desc->GetInputIndexByName(input_name);
  864. input_name_index.emplace(input_name, index);
  865. }
  866. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  867. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  868. auto iter = input_name_index.find(out_data_anchor_name);
  869. if (iter != input_name_index.end()) {
  870. int index = iter->second;
  871. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  872. iter->first.c_str(), out_data_anchor_name.c_str());
  873. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  874. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  875. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  876. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  877. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  878. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  879. GE_CHECK_NOTNULL(peer_out_op_desc);
  880. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  881. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  882. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  883. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  884. } else {
  885. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  886. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  887. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  888. }
  889. }
  890. out_op_desc->SetOutputOffset(output_list);
  891. }
  892. return ge::SUCCESS;
  893. }
  894. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  895. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  896. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  897. if (peer_out_data_anchor == nullptr) {
  898. continue;
  899. }
  900. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  901. if (peer_op_desc == nullptr) {
  902. continue;
  903. }
  904. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  905. (peer_op_desc->GetType() == VARIABLE)) {
  906. REPORT_INNER_ERROR("E19999", "node(type:%s, name:%s) link to atomic node(name:%s), "
  907. "this situation not supported now",
  908. peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str());
  909. GELOGE(ge::FAILED, "[Check][Link]node(type:%s, name:%s) link to atomic node(name:%s), "
  910. "this situation not supported now",
  911. peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str());
  912. return false;
  913. }
  914. }
  915. return true;
  916. }
  917. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  918. auto op_desc = node->GetOpDesc();
  919. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  920. mem_offset_end.clear();
  921. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  922. vector<int64_t> atomic_output_index;
  923. // If GetListInt fail, atomic_output_index is empty.
  924. (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  925. // Check atomic output
  926. vector<int64_t> output_list = op_desc->GetOutputOffset();
  927. if (atomic_output_index.size() > output_list.size()) {
  928. std::string error =
  929. "Op:" + FmtToStr(node->GetName()) + "'s size:" + FmtToStr(atomic_output_index.size()) +
  930. " of atomic_output_index is more than the size:" + FmtToStr(output_list.size()) + " of output_list";
  931. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  932. return ge::FAILED;
  933. }
  934. auto output_list_size = static_cast<int64_t>(output_list.size());
  935. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  936. if (iter == memory_offset_.end()) {
  937. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  938. "not expected when AssignAtomicOutputMemory, "
  939. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  940. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  941. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  942. return FAILED;
  943. }
  944. for (auto &output_index : atomic_output_index) {
  945. if (output_index >= output_list_size) {
  946. std::string error =
  947. "Op:" + FmtToStr(node->GetName()) + "'s atomic_output index:" + FmtToStr(output_index) +
  948. " is more than the size:" + FmtToStr(output_list_size) + " of output_list.";
  949. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  950. return ge::PARAM_INVALID;
  951. }
  952. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  953. bool is_assigned_mem = false;
  954. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  955. GELOGE(ge::FAILED, "[Get][MemoryAssignmentStatus]fail for node %s, out_index:%ld",
  956. node->GetName().c_str(), output_index);
  957. return ge::FAILED;
  958. }
  959. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  960. if (is_assigned_mem) {
  961. GELOGI(
  962. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  963. "ReAssignContinuousMemory function.",
  964. op_desc->GetName().c_str());
  965. continue;
  966. }
  967. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  968. int64_t size = 0;
  969. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  970. GELOGI("Get size failed");
  971. }
  972. output_list[output_index] = iter->second.mem_offset_;
  973. std::string batch_label;
  974. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  975. GELOGI("[IMAS]Atomic output : Set %s name[%s] optype[%s] output[%ld] offset to [%zu] stream_id[%ld] memtype[%u] "
  976. "size[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(),
  977. node->GetType().c_str(), output_index, iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM,
  978. size, size, batch_label.c_str());
  979. iter->second.mem_offset_ += size;
  980. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  981. mem_offset_end.emplace_back(iter->second.mem_offset_);
  982. }
  983. op_desc->SetOutputOffset(output_list);
  984. return ge::SUCCESS;
  985. }
  986. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  987. bool &is_mem_assigned) {
  988. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  989. std::string error =
  990. "Op:" + FmtToStr(node->GetName()) + "'s output index:" + FmtToStr(output_index) +
  991. " is more than the size:" + FmtToStr(node->GetAllOutDataAnchors().size()) + " of node's AllOutDataAnchors.";
  992. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  993. return ge::PARAM_INVALID;
  994. }
  995. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  996. GE_CHECK_NOTNULL(out_data_anchor);
  997. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  998. for (auto &input_anchor : input_anchors) {
  999. auto output_node = input_anchor->GetOwnerNode();
  1000. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  1001. /// has been assigned
  1002. vector<int64_t> atomic_input_index;
  1003. (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  1004. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  1005. is_mem_assigned = true;
  1006. break;
  1007. }
  1008. }
  1009. return SUCCESS;
  1010. }
  1011. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1012. map<string, map<int64_t, int64_t>> &workspace_info,
  1013. vector<int64_t> &mem_offset_end) {
  1014. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  1015. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1016. if (mem_type_iter == memory_offset_.end()) {
  1017. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  1018. "not expected when AssignOrdinaryAtomicWorkspaceMemory, "
  1019. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1020. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1021. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1022. return FAILED;
  1023. }
  1024. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  1025. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  1026. if (op_desc->GetName() != iter->first) {
  1027. std::string error = "The node name" + FmtToStr(op_desc->GetName()) +
  1028. " and the node name" + FmtToStr(iter->first) + " in workspace info are inconsistent.";
  1029. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1030. return ge::PARAM_INVALID;
  1031. }
  1032. if (iter->second.empty()) {
  1033. continue;
  1034. }
  1035. for (auto &info_iter : iter->second) {
  1036. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1037. auto workspace_size = info_iter.second;
  1038. if (workspace_index >= workspace_vector.size()) {
  1039. std::string error = "The workspace index:" + FmtToStr(workspace_index) +
  1040. " is more than the size:" + FmtToStr(workspace_vector.size()) + " of workspace vector in op:" +
  1041. op_desc->GetName().c_str();
  1042. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1043. return ge::PARAM_INVALID;
  1044. }
  1045. workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_;
  1046. std::string batch_label;
  1047. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1048. GELOGI(
  1049. "[IMAS]Atomic ordinary workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1050. "memtype[%u] size[%ld] real_size[%ld] batch[%s].",
  1051. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index,
  1052. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size,
  1053. batch_label.c_str());
  1054. mem_type_iter->second.mem_offset_ += workspace_size;
  1055. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1056. }
  1057. }
  1058. op_desc->SetWorkspace(workspace_vector);
  1059. return SUCCESS;
  1060. }
  1061. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1062. map<string, map<int64_t, int64_t>> &workspace_info,
  1063. vector<int64_t> &mem_offset_end) {
  1064. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1065. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1066. if (mem_type_iter == memory_offset_.end()) {
  1067. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  1068. "not expected when AssignFusionAtomicWorkspaceMemory, "
  1069. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1070. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1071. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1072. return FAILED;
  1073. }
  1074. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1075. for (auto &iter : workspace_info) {
  1076. if (iter.second.empty()) {
  1077. continue;
  1078. }
  1079. map<int64_t, int64_t> index_offset;
  1080. for (auto &info_iter : iter.second) {
  1081. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1082. auto workspace_size = info_iter.second;
  1083. size_t workspace_offset = mem_type_iter->second.mem_offset_;
  1084. std::string batch_label;
  1085. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1086. GELOGI(
  1087. "[IMAS]Atomic fusion workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1088. "memtype[%u] ssize[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(),
  1089. op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, mem_type_iter->second.mem_offset_,
  1090. op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str());
  1091. mem_type_iter->second.mem_offset_ += workspace_size;
  1092. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1093. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1094. }
  1095. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1096. }
  1097. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1098. REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for node:%s when AssignFusionAtomicWorkspaceMemory",
  1099. EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str());
  1100. GELOGE(FAILED, "[Set][Attr:%s]fail for node:%s.",
  1101. EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str());
  1102. return FAILED;
  1103. }
  1104. return SUCCESS;
  1105. }
  1106. Status GraphMemoryAssigner::CheckOffset() {
  1107. std::map<std::string, std::string> anchor_to_symbol;
  1108. std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  1109. if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
  1110. REPORT_CALL_ERROR("E19999", "Get ref-mapping for graph %s failed", compute_graph_->GetName().c_str());
  1111. GELOGE(FAILED, "[Get][RefMapping]fail for graph %s", compute_graph_->GetName().c_str());
  1112. return FAILED;
  1113. }
  1114. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1115. GE_CHECK_NOTNULL(node->GetOpDesc());
  1116. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1117. for (auto input : input_list) {
  1118. if (input == ge::kInvalidOffset) {
  1119. std::string error = "Invalid input offset" + FmtToStr(ge::kInvalidOffset) +
  1120. + " in node" + FmtToStr(node->GetName());
  1121. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1122. return FAILED;
  1123. }
  1124. }
  1125. bool need_update_output = false;
  1126. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1127. for (uint32_t i = 0; i < output_list.size(); ++i) {
  1128. if (output_list[i] == ge::kInvalidOffset) {
  1129. std::string error = "Invalid output offset" + FmtToStr(ge::kInvalidOffset) +
  1130. + " in node" + FmtToStr(node->GetName());
  1131. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1132. return FAILED;
  1133. }
  1134. if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
  1135. auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
  1136. if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
  1137. output_list[i] = symbol_offset;
  1138. need_update_output = true;
  1139. }
  1140. }
  1141. }
  1142. if (need_update_output) {
  1143. node->GetOpDesc()->SetOutputOffset(output_list);
  1144. }
  1145. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1146. for (auto workspace : workspace_list) {
  1147. if (workspace == ge::kInvalidOffset) {
  1148. std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) +
  1149. + " in node" + FmtToStr(node->GetName());
  1150. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1151. return FAILED;
  1152. }
  1153. }
  1154. }
  1155. return SUCCESS;
  1156. }
  1157. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1158. if (memory_offset_.empty()) {
  1159. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when SetInputOffset, "
  1160. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1161. GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, "
  1162. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1163. }
  1164. for (auto pair : memory_offset_) {
  1165. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  1166. pair.second.mem_offset_, pair.first);
  1167. }
  1168. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1169. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1170. GELOGE(ge::FAILED, "[Update][Offset:Input]fail for op:%s", node->GetName().c_str());
  1171. return ge::FAILED;
  1172. }
  1173. }
  1174. return ge::SUCCESS;
  1175. }
  1176. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1177. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1178. return node;
  1179. }
  1180. if (NodeUtils::IsDynamicShape(node)) {
  1181. return node;
  1182. }
  1183. return NodeUtils::GetParentInput(node);
  1184. }
  1185. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1186. uint32_t parent_index = 0;
  1187. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1188. return SUCCESS;
  1189. }
  1190. // Subgraph Data Node, check for constant input.
  1191. std::string op_type;
  1192. const auto &in_node = NodeUtils::GetParentInput(node);
  1193. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1194. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1195. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1196. return SUCCESS; // Constant input.
  1197. }
  1198. // Memory allocated for dynamic shape subgraph Data.
  1199. if (NodeUtils::IsDynamicShape(node)) {
  1200. return SUCCESS;
  1201. }
  1202. const auto &owner = node->GetOwnerComputeGraph();
  1203. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1204. const auto parent_inputs = parent_desc->GetInputOffset();
  1205. if (parent_inputs.size() <= parent_index) {
  1206. std::string error = "Get Parent input offset failed, node is " + FmtToStr(node->GetName()) +
  1207. + ", input_size is " + FmtToStr(parent_inputs.size()) + ", parent index is " +
  1208. FmtToStr(parent_index);
  1209. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1210. return FAILED;
  1211. }
  1212. input_list = {parent_inputs[parent_index]};
  1213. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1214. return SUCCESS;
  1215. }
  1216. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1217. vector<int64_t> origin_input_list;
  1218. vector<int64_t> memory_type;
  1219. auto tmp_op_desc = node->GetOpDesc();
  1220. origin_input_list = tmp_op_desc->GetInputOffset();
  1221. int64_t valid_input_index = 0;
  1222. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1223. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1224. vector<int64_t> output_list;
  1225. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1226. if (peer_out_anchor == nullptr) {
  1227. continue;
  1228. }
  1229. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1230. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1231. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1232. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1233. output_list = last_peer_out_op_desc->GetOutputOffset();
  1234. auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
  1235. if (output_list.size() > static_cast<size_t>(out_index)) {
  1236. int64_t input_offset = output_list.at(out_index);
  1237. if (has_mem_type_attr && !origin_input_list.empty()) {
  1238. auto input_size = tmp_op_desc->GetInputsSize();
  1239. auto ori_input_offset_list_size = origin_input_list.size();
  1240. auto mem_type_size = memory_type.size();
  1241. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1242. std::string error = "fusion: node" + FmtToStr(tmp_op_desc->GetName()) +
  1243. + " input_size" + FmtToStr(input_size) + " diff from memory_type_size" +
  1244. FmtToStr(mem_type_size) + " from ori_input_offset_list_size" +
  1245. FmtToStr(ori_input_offset_list_size);
  1246. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1247. return ge::FAILED;
  1248. }
  1249. // not hbm keep orignal inputoffest
  1250. // hbm inputoffset = original inputoffset + outputoffset
  1251. input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 ? origin_input_list[valid_input_index]
  1252. : origin_input_list[valid_input_index] + output_list.at(out_index));
  1253. }
  1254. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1255. if (in_node->GetType() == CONSTANT) {
  1256. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  1257. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
  1258. }
  1259. GELOGD("%s node[%s] input[%ld] is set from node[%s] out index[%lu] offset[%ld]",
  1260. has_mem_type_attr ? "Fusion" : "",
  1261. tmp_op_desc->GetName().c_str(),
  1262. valid_input_index,
  1263. peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(),
  1264. out_index,
  1265. input_offset);
  1266. input_list.emplace_back(input_offset);
  1267. valid_input_index++;
  1268. }
  1269. }
  1270. return ge::SUCCESS;
  1271. }
  1272. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1273. GE_CHECK_NOTNULL(node->GetOpDesc());
  1274. vector<int64_t> input_list;
  1275. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1276. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1277. vector<int64_t> output_list;
  1278. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1279. if (peer_out_anchor == nullptr) {
  1280. continue;
  1281. }
  1282. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1283. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1284. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1285. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1286. if (last_peer_out_node->GetType() != VARIABLE) {
  1287. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1288. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1289. output_list = last_peer_out_op_desc->GetOutputOffset();
  1290. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1291. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1292. }
  1293. } else {
  1294. vector<int64_t> cur_node_input_list;
  1295. auto cur_node_op_desc = node->GetOpDesc();
  1296. GE_CHECK_NOTNULL(cur_node_op_desc);
  1297. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1298. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1299. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1300. }
  1301. }
  1302. }
  1303. } else if (node->GetType() == DATA_TYPE) {
  1304. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1305. GELOGE(FAILED, "[Update][Offset:Input:Const]fail for node:%s ", node->GetName().c_str());
  1306. return FAILED;
  1307. }
  1308. } else {
  1309. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1310. GELOGE(FAILED, "[Update][Offset:Input]fail for node:%s", node->GetName().c_str());
  1311. return FAILED;
  1312. }
  1313. }
  1314. node->GetOpDesc()->SetInputOffset(input_list);
  1315. return SUCCESS;
  1316. }
  1317. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1318. const vector<int64_t> &mem_offset_end, int64_t memory_type) {
  1319. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1320. // Parsing offset and size vectors
  1321. vector<int64_t> memory_offset_start;
  1322. vector<int64_t> memory_offset_size;
  1323. memory_offset_start.emplace_back(atomic_mem_start);
  1324. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1325. memory_offset_start.emplace_back(mem_offset_end[i]);
  1326. // Number 1 means element index
  1327. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1328. memory_offset_size.emplace_back(size);
  1329. }
  1330. memory_offset_start.pop_back();
  1331. const auto &in_control_anchor = node->GetInControlAnchor();
  1332. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1333. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1334. if (peer_out_control_anchor == nullptr) {
  1335. continue;
  1336. }
  1337. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1338. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1339. if (peer_out_node_desc == nullptr) {
  1340. continue;
  1341. }
  1342. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1343. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1344. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1345. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) {
  1346. GELOGE(FAILED, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str());
  1347. return FAILED;
  1348. }
  1349. }
  1350. }
  1351. }
  1352. return SUCCESS;
  1353. }
  1354. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
  1355. const vector<int64_t> &atomic_mem_size, int64_t memory_type) {
  1356. auto node_op_desc = node->GetOpDesc();
  1357. if (node_op_desc != nullptr) {
  1358. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1359. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1360. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1361. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1362. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1363. node_op_desc->SetWorkspace(workspace_vector);
  1364. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1365. std::vector<int64_t> mem_start_vector;
  1366. // If GetListInt fail, mem_start_vector is empty.
  1367. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1368. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1369. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1370. REPORT_INNER_ERROR("E19999", "Set Attr:%s failed when SetAtomicCleanAttr, op_name:%s",
  1371. ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str());
  1372. GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s",
  1373. ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str());
  1374. return FAILED);
  1375. std::vector<int64_t> mem_size_vector;
  1376. // If GetListInt fail, mem_size_vector is empty.
  1377. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1378. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1379. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1380. REPORT_INNER_ERROR("E19999", "Set Attr:%s failed when SetAtomicCleanAttr, op_name:%s",
  1381. ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str());
  1382. GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s",
  1383. ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str());
  1384. return FAILED);
  1385. std::stringstream ss;
  1386. for (auto iter : atomic_mem_start) {
  1387. ss << iter << " ";
  1388. }
  1389. string atomic_mem_start_str = ss.str();
  1390. ss.clear();
  1391. ss.str("");
  1392. for (auto iter : atomic_mem_size) {
  1393. ss << iter << " ";
  1394. }
  1395. string atomic_mem_size_str = ss.str();
  1396. GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] optype[%s] output[0] offset to [%s] streamid[%ld]"
  1397. " memtype[%ld] size[%s]",node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1398. node->GetType().c_str(), atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), memory_type,
  1399. atomic_mem_size_str.c_str());
  1400. }
  1401. return SUCCESS;
  1402. }
  1403. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) {
  1404. if (mem_align_size <= 0) {
  1405. return;
  1406. }
  1407. auto iter = memory_offset_.find(memory_type);
  1408. if (iter == memory_offset_.end()) {
  1409. GELOGW("Memory offset don't have memory type[%ld].", memory_type);
  1410. return;
  1411. }
  1412. iter->second.mem_offset_ =
  1413. (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1414. }
  1415. ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nodes, int32_t mem_reuse_model,
  1416. int64_t &memory_type) {
  1417. memory_type = RT_MEMORY_HBM;
  1418. // In the dynamic batch scenario, the memory attributes of nodes are the same.
  1419. for (auto &n : nodes) {
  1420. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  1421. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"),
  1422. "[Get][MemType:input]fail for node:%s", n->GetName().c_str())
  1423. break;
  1424. }
  1425. if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  1426. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"),
  1427. "[Get][MemType:output]fail for node:%s", n->GetName().c_str())
  1428. break;
  1429. }
  1430. }
  1431. return SUCCESS;
  1432. }
  1433. ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) {
  1434. memory_type = RT_MEMORY_HBM;
  1435. vector<int64_t> mem_type_list;
  1436. if (input_or_output == "input") {
  1437. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list);
  1438. }
  1439. if (input_or_output == "output") {
  1440. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list);
  1441. }
  1442. if (mem_type_list.empty()) {
  1443. if (memory_offset_.find(memory_type) == memory_offset_.end()) {
  1444. std::string error = "Memory offset map does not have memory type" + FmtToStr(memory_type) +
  1445. + ", opname is " + FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1446. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1447. return FAILED;
  1448. }
  1449. return SUCCESS;
  1450. }
  1451. if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) {
  1452. std::string error = "The size" + FmtToStr(mem_type_list.size()) +
  1453. " of mem type list is not equal to the size of in data anchor" +
  1454. FmtToStr(node->GetAllInDataAnchorsSize()) + ", opname is " +
  1455. FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1456. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1457. return FAILED;
  1458. }
  1459. if (!CheckContinuousMemType(mem_type_list)) {
  1460. GELOGE(FAILED, "[Check][MemType:Continuous]fail for node:%s", node->GetName().c_str());
  1461. return FAILED;
  1462. }
  1463. // It is continuous memory and memory type is the same, so use the first memory.
  1464. memory_type = mem_type_list[0];
  1465. return SUCCESS;
  1466. }
  1467. bool GraphMemoryAssigner::CheckContinuousMemType(vector<int64_t> mem_type_list) {
  1468. if (mem_type_list.size() == 0) {
  1469. return true;
  1470. }
  1471. int64_t mem_type_tmp = mem_type_list[0];
  1472. for (auto mem_type : mem_type_list) {
  1473. if (mem_type != mem_type_tmp) {
  1474. std::string error = "The memory is continuous, but the type of the input memory is inconsistent. They are " +
  1475. FmtToStr(mem_type_tmp) + " and " + FmtToStr(mem_type);
  1476. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1477. GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].",
  1478. mem_type_tmp, mem_type);
  1479. return false;
  1480. }
  1481. }
  1482. if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) {
  1483. std::string error = "Memory offset map does not have memory type" + FmtToStr(mem_type_tmp);
  1484. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1485. GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp);
  1486. return false;
  1487. }
  1488. return true;
  1489. }
  1490. void GraphMemoryAssigner::PrintMemoryOffset() {
  1491. for (auto pair : memory_offset_) {
  1492. // Assign memory of max batch nodes that have the same batch label.
  1493. GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.",
  1494. pair.first, pair.second.mem_offset_);
  1495. }
  1496. }
  1497. ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map<int32_t, int32_t> &out2ins) {
  1498. for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  1499. int32_t reuse_in_index = -1;
  1500. bool reuse_input_flag = GraphUtils::IsRefFromInput(out_data_anchor, reuse_in_index);
  1501. if (reuse_input_flag) {
  1502. if (node->GetInDataAnchor(reuse_in_index) != nullptr) {
  1503. out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index);
  1504. } else {
  1505. REPORT_INNER_ERROR("E19999", "Invalid reuse_input value %d on output %d of node %s, "
  1506. "please check attr reuse_input",
  1507. reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
  1508. GELOGE(FAILED, "[Check][Attr]Invalid reuse_input value %d on output %d of node %s, "
  1509. "please check attr reuse_input",
  1510. reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
  1511. return FAILED;
  1512. }
  1513. }
  1514. }
  1515. return ge::SUCCESS;
  1516. }
  1517. bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
  1518. const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) {
  1519. for (const auto &in_node : input_continuous_node->GetInDataNodes()) {
  1520. if (in_node->GetType() == VARIABLE) {
  1521. GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(),
  1522. in_node->GetName().c_str());
  1523. return true;
  1524. }
  1525. auto iter = node_2_continuous_type.find(in_node);
  1526. // In node's topo order in the front, so function can not be exception
  1527. auto continuous_type = iter->second;
  1528. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  1529. if (continuous_input) {
  1530. GELOGI("[Store][Node] of %s cause it's precursor node %s need assign continuous input memory",
  1531. input_continuous_node->GetName().c_str(), in_node->GetName().c_str());
  1532. return false;
  1533. }
  1534. }
  1535. for (const auto &out_node : input_continuous_node->GetOutDataNodes()) {
  1536. auto continuous_type = GetContinuousMemoryType(out_node->GetOpDesc());
  1537. node_2_continuous_type.emplace(out_node, continuous_type);
  1538. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  1539. if (continuous_input) {
  1540. GELOGI("[Store][Node] of %s cause it's succeed node %s need assign continuous input memory",
  1541. input_continuous_node->GetName().c_str(), out_node->GetName().c_str());
  1542. return false;
  1543. }
  1544. }
  1545. return true;
  1546. }
  1547. ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
  1548. uint32_t continuous_type,
  1549. bool reverse_refresh) {
  1550. int64_t mem_clean_start = 0;
  1551. int64_t mem_clean_size = 0;
  1552. int64_t memory_type = RT_MEMORY_HBM;
  1553. GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"),
  1554. "[Get][MemType]fail for node:%s", input_continuous_node->GetName().c_str());
  1555. auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type,
  1556. continuous_type, reverse_refresh);
  1557. if (ret != ge::SUCCESS) {
  1558. GELOGE(ret, "[Assign][Memory:Input:continuous]fail for node:%s", input_continuous_node->GetName().c_str());
  1559. return ret;
  1560. }
  1561. // Clean up atomic address, eg, hcom node
  1562. vector<int32_t> input_indexes;
  1563. // If GetListInt fail, input_indexes is empty.
  1564. (void)ge::AttrUtils::GetListInt(input_continuous_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  1565. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  1566. // check whether there is an atomic conflict between the current node and the peer out node
  1567. if (!CheckInputIsSupportAtomic(input_continuous_node)) {
  1568. return ge::FAILED;
  1569. }
  1570. const auto &in_control_anchor = input_continuous_node->GetInControlAnchor();
  1571. GE_CHECK_NOTNULL(in_control_anchor);
  1572. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1573. GE_CHECK_NOTNULL(peer_out_control_anchor);
  1574. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1575. if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
  1576. ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type);
  1577. if (ret != SUCCESS) {
  1578. GELOGE(ret, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str());
  1579. return ret;
  1580. }
  1581. }
  1582. }
  1583. }
  1584. return ge::SUCCESS;
  1585. }
  1586. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示